-
Notifications
You must be signed in to change notification settings - Fork 1
88 lines (75 loc) · 2.49 KB
/
crawler.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
name: Daily Crawler
on:
schedule:
- cron: '0 15 * * 0' # UTC 기준 매주 일요일 15:00 (한국 시간 월요일 자정)
workflow_dispatch: # 수동 실행 옵션
jobs:
crawl:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12.4'
- name: Cache pip packages
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Setup Chrome
uses: browser-actions/setup-chrome@latest
- name: Run crawler
run: python crawler.py --verbose --workflow
env:
PYTHONUNBUFFERED: 1
- name: Commit and push changes
run: |
git config --global user.email "[email protected]"
git config --global user.name "xeros"
# 각 카테고리별로 변경사항 확인 및 커밋
for file in dataset/*.json; do
category=$(basename "$file" .json)
git add "$file"
if git diff --staged --quiet; then
echo "No changes in $category"
else
git commit -m "Update data for $category"
git push
fi
done
# history 파일 커밋
today=$(date +%Y-%m-%d)
history_file="dataset/history/${today}.json"
if [ -f "$history_file" ]; then
git add "$history_file"
git commit -m "Add history data for $today"
git push
fi
- name: Check crawling results
run: |
echo "Crawling results:"
for file in dataset/*.json; do
category=$(basename "$file" .json)
count=$(jq length "$file")
echo "$category: $count products"
done
# 누락된 카테고리 확인
expected_categories=$(jq -r 'keys[]' target-list.json)
for category in $expected_categories; do
if [ ! -f "dataset/${category}.json" ]; then
echo "Missing category: ${category}"
fi
done
# 히스토리 파일 확인
if [ -f "$history_file" ]; then
echo "Today's history file created: ${today}.json"
else
echo "Error: Today's history file not found"
fi