-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpywb-collections-import
executable file
·32 lines (28 loc) · 1.22 KB
/
pywb-collections-import
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env bash
import_collection_and_delete_files() {
local collection_name="$1"
local file_pattern="$2"
local files=($(shopt -s nullglob dotglob; find /mnt/omega/web-archives/import -path "/mnt/omega/web-archives/import/lost\+found" -prune -o -iname "$file_pattern" -print))
if (( ${#files[@]} )); then
cd /mnt/omega/web-archives || exit
for file in "${files[@]}"; do
wb-manager add "$collection_name" "$file"
done
for file in "${files[@]}"; do
rm "$file"
done
fi
}
# Activate Python virtual env.
cd /opt || exit
python3 -m venv pywbenv
source pywbenv/bin/activate
# Import collections and delete related files
import_collection_and_delete_files "academic-calendars" "YORK-ACADEMIC-CALENDARS*warc.gz"
import_collection_and_delete_files "yul" "YORK-LIBRARIES*warc.gz"
import_collection_and_delete_files "yu-crs" "YU-CRS*warc.gz"
import_collection_and_delete_files "yu-magazine" "YU-MAGAZINE*warc.gz"
import_collection_and_delete_files "yfile" "YU-YFILE*warc.gz"
import_collection_and_delete_files "YU-SOCIAL-MEDIA" "YU-SOCIAL-MEDIA*warc.gz"
import_collection_and_delete_files "exhibits" "redmine-3934-c-cave*warc.gz"
import_collection_and_delete_files "exhibits" "redmine-3964-exhibits*warc.gz"