From d4f911798eb9dea7b0e223308f1094daa92fa422 Mon Sep 17 00:00:00 2001 From: Greg Dubicki Date: Sun, 9 Apr 2023 16:30:13 +0100 Subject: [PATCH] Automate updating the data every day a minute after midnight + after each push to master. Also the data.json is now pretty-printed. Implements #34 --- .github/workflows/main.yml | 76 +- .gitignore | 1 + automation.py | 0 data.json | 43620 ++++++++++++++++++++++++++++++++++- index.html | 4 +- requirements.txt | 1 + scraper.py | 2 + 7 files changed, 43673 insertions(+), 31 deletions(-) create mode 100644 .gitignore delete mode 100644 automation.py create mode 100644 requirements.txt mode change 100644 => 100755 scraper.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f1ec312d5..e25a64ba0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,49 +1,67 @@ -# This is a basic workflow to help you get started with Actions +name: Update data -name: CI - -# Controls when the action will run. Triggers the workflow on push or pull request -# events but only for the master branch on: push: branches: [ master ] - pull_request: - branches: [ master ] + schedule: + # every day after midnight, UTC + - cron: '1 * * * *' -# A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - # This workflow contains a single job called "build" - build: - # The type of runner that the job will run on + scrape-and-publish: runs-on: ubuntu-latest - # Steps represent a sequence of tasks that will be executed as part of the job steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 with: - python-version: "3.7" + python-version: "3.11" + - name: Install dependencies run: | - python -m pip install --upgrade pip + pip install -r requirements.txt + sudo apt-get install -y jq + - name: Remember current time + id: time + run: echo "DATE=$(date --utc)" >> $GITHUB_OUTPUT + - name: Remember checksum of the data before update + id: data-before + run: echo "MD5=$(md5sum data.json)" >> $GITHUB_OUTPUT - # Runs a single command using the runners shell - - name: Check repo data.json md5 hash - run: echo "::set-env name=datamd5::$(python $GITHUB_WORKSPACE/data.json | md5sum)" + - name: Scrape data + run: ./scraper.py > data.raw - # Runs a single command using the runners shell - - name: Check gcpinstances.info data.json md5 hash - run: echo "::set-env name=sitemd5::$(curl -s https://gcpinstances.info/data.json | md5sum)" + - name: Prettify JSON + run: jq --sort-keys . data.raw > data.json + - name: Get checksum of the data after update + id: data-after + run: echo "MD5=$(md5sum data.json)" >> $GITHUB_OUTPUT + - name: Update checking timestamp + run: sed -i 's/id="last_check">.*${{ steps.time.outputs.DATE }}.*${{ steps.time.outputs.DATE }}GCPinstances.info Easy GCP Compute Engine Instance Comparison (by DoiT International) -

Last Update: 2023-04-09 15:00:00 UTC

+

Last Prices Check: Sun Apr 9 16:02:35 UTC 2023

+

Last Change: Sun Apr 9 16:02:35 UTC 2023

+