Skip to content

Expand README - notes on prod mode #30

Expand README - notes on prod mode

Expand README - notes on prod mode #30

Workflow file for this run

name: Scrape latest data
on:
push:
workflow_dispatch:
schedule:
- cron: "21 11 * * *"
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/[email protected]
- name: Setup Python
uses: actions/[email protected]
with:
python-version: "3.11.x"
- name: Install Python dependencies
run: |
pip install uv
uv pip install -r requirements.txt --system
- name: Fetch latest data
run: |
cp data/Street_Tree_List.csv Street_Tree_List-old.csv
curl -o Street_Tree_List-unsorted.csv "https://data.sfgov.org/api/views/tkzw-k3nq/rows.csv?accessType=DOWNLOAD"
# Remove heading line and use it to start a new file
head -n 1 Street_Tree_List-unsorted.csv > data/Street_Tree_List.csv
# Sort all but the first line and append to that file
tail -n +2 "Street_Tree_List-unsorted.csv" | sort >> data/Street_Tree_List.csv
# Generate commit message using csv-diff
csv-diff Street_Tree_List-old.csv data/Street_Tree_List.csv --key=TreeID --singular=tree --plural=trees > message.txt
- name: Commit and push if it changed
run: |-
git config user.name "Automated"
git config user.email "[email protected]"
git add data/Street_Tree_List.csv
timestamp=$(date -u)
git commit -F message.txt || exit 0
git push
# TODO: add dbt build to MD on fresh data scrape