Skip to content

Download and clean transcripts #38

Download and clean transcripts

Download and clean transcripts #38

name: Download and clean transcripts
on:
workflow_dispatch:
schedule:
- cron: "41 23 * * 6" # “At 23:43 on Saturday.”
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v3
with:
submodules: recursive
- name: Update submodule
run: git submodule update --remote
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: 'pip' # caching pip dependencies
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements/requirements.txt
- name: Add spaCy model language
run: python -m spacy download en_core_web_sm
- name: Run flow
run: python flows/clean_transcripts.py
- name: Commit and push if it changed
run: |-
git config user.name "Automated"
git config user.email "[email protected]"
git add -A
timestamp=$(date -u)
git commit -m "Update data: ${timestamp}" || exit 0
git push