Embed the transcripts and add to chroma #33
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Embed the transcripts and add to chroma | |
on: | |
workflow_dispatch: | |
schedule: | |
# Let 5 minutes after download_transcripts.yml | |
- cron: "46 23 * * 6" # “At 23:46 on Saturday.” | |
env: | |
HF_ACCESS_TOKEN: ${{ secrets.HF_ACCESS_TOKEN }} | |
TALKING_PYTHON_ACCESS_TOKEN: ${{ secrets.TALKING_PYTHON_ACCESS_TOKEN }} | |
jobs: | |
scheduled: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Check out this repo | |
uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
cache: 'pip' # caching pip dependencies | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements/requirements_embed.txt | |
pip install ./src | |
- name: Run flow | |
run: python flows/embed.py hugging_face release download | |
- name: Commit and push if it changed | |
run: |- | |
git config user.name "Automated" | |
git config user.email "[email protected]" | |
git add -A | |
timestamp=$(date -u) | |
git commit -m "embed transcript: ${timestamp}" || exit 0 | |
git push |