Scrape Hacker News #23722
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Scrape Hacker News | |
on: | |
push: | |
workflow_dispatch: | |
schedule: | |
- cron: '17 * * * *' | |
jobs: | |
shot-scraper: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python 3.12 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.12" | |
cache: "pip" | |
- name: Cache Playwright browsers | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/ms-playwright/ | |
key: ${{ runner.os }}-browsers | |
- name: Install dependencies | |
run: | | |
pip install -r requirements.txt | |
- name: Install Playwright dependencies | |
run: | | |
shot-scraper install | |
- name: Scrape | |
run: | | |
shot-scraper javascript \ | |
"https://news.ycombinator.com/from?site=simonwillison.net" \ | |
-i scrape.js -o simonwillison-net-new.json | |
sleep 5 | |
shot-scraper javascript \ | |
"https://news.ycombinator.com/from?site=datasette.io" \ | |
-i scrape.js -o datasette-io-new.json | |
- name: Generate commit message | |
run: | | |
echo "$(date -u)" > /tmp/commit.txt | |
echo "" >> /tmp/commit.txt | |
csv-diff simonwillison-net.json simonwillison-net-new.json --key id --format json \ | |
--extra title '{title}' \ | |
--extra latest 'https://news.ycombinator.com/latest?id={id}' \ | |
--extra item 'https://news.ycombinator.com/item?id={id}' >> /tmp/commit.txt | |
echo "" >> /tmp/commit.txt | |
csv-diff datasette-io.json datasette-io-new.json --key id --format json \ | |
--extra title '{title}' \ | |
--extra latest 'https://news.ycombinator.com/latest?id={id}' \ | |
--extra item 'https://news.ycombinator.com/item?id={id}' >> /tmp/commit.txt | |
- name: Update data | |
run: | | |
mv simonwillison-net-new.json simonwillison-net.json | |
mv datasette-io-new.json datasette-io.json | |
- name: Submit fresh data to my Datasette Cloud space | |
env: | |
DS_TOKEN: ${{ secrets.SIMON_DS_WRITE_TOKEN }} | |
run: | | |
./submit-to-datasette-cloud.sh | |
- name: Commit and push | |
run: |- | |
git config user.name "Automated" | |
git config user.email "[email protected]" | |
git add -A | |
git commit -F /tmp/commit.txt || exit 0 | |
git pull --rebase | |
git push |