Daily Crawler #12
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Daily Crawler | |
on: | |
schedule: | |
- cron: '0 15 * * 0' # UTC 기준 매주 일요일 15:00 (한국 시간 월요일 자정) | |
workflow_dispatch: # 수동 실행 옵션 | |
jobs: | |
crawl: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.12.4' | |
- name: Cache pip packages | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
- name: Setup Chrome | |
uses: browser-actions/setup-chrome@latest | |
- name: Run crawler | |
run: python crawler.py --verbose --workflow | |
env: | |
PYTHONUNBUFFERED: 1 | |
- name: Commit and push changes | |
run: | | |
git config --global user.email "[email protected]" | |
git config --global user.name "xeros" | |
# 각 카테고리별로 변경사항 확인 및 커밋 | |
for file in dataset/*.json; do | |
category=$(basename "$file" .json) | |
git add "$file" | |
if git diff --staged --quiet; then | |
echo "No changes in $category" | |
else | |
git commit -m "Update data for $category" | |
git push | |
fi | |
done | |
# history 파일 커밋 | |
today=$(date +%Y-%m-%d) | |
history_file="dataset/history/${today}.json" | |
if [ -f "$history_file" ]; then | |
git add "$history_file" | |
git commit -m "Add history data for $today" | |
git push | |
fi | |
- name: Check crawling results | |
run: | | |
echo "Crawling results:" | |
for file in dataset/*.json; do | |
category=$(basename "$file" .json) | |
count=$(jq length "$file") | |
echo "$category: $count products" | |
done | |
# 누락된 카테고리 확인 | |
expected_categories=$(jq -r 'keys[]' target-list.json) | |
for category in $expected_categories; do | |
if [ ! -f "dataset/${category}.json" ]; then | |
echo "Missing category: ${category}" | |
fi | |
done | |
# 히스토리 파일 확인 | |
if [ -f "$history_file" ]; then | |
echo "Today's history file created: ${today}.json" | |
else | |
echo "Error: Today's history file not found" | |
fi |