-
Notifications
You must be signed in to change notification settings - Fork 0
/
db_migrate_SQLite.py
100 lines (95 loc) · 4.78 KB
/
db_migrate_SQLite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import traceback
import apsw
import os
import xxhash
def tagDB(db, picture_id: str):
cursor = db.cursor()
results = cursor_to_dict(cursor,
"SELECT name, translated_name FROM tags WHERE tag_id IN (SELECT tag_id FROM picture_tags WHERE picture_id == '" + picture_id + "')")
return results
def insertDB(db, pk, data, force_update=False):
# convert pk into xxhash integer
pk = xxhash.xxh32_intdigest(pk)
try:
# create a cursor object
cursor = db.cursor()
# check if force_update is True
if force_update:
# use the INSERT OR REPLACE statement to proform 'UPSERT' operation
cursor.execute("INSERT OR REPLACE INTO pictures (picture_id, id, author_id, author_name, title, page_no, page_count, orientation, r18, ai_type, url, local_filename, local_filename_compressed) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ((
pk), data["id"], data["author_id"], data["author_name"], data["title"], data["page_no"], data["page_count"], data["orientation"], data["r18"], data["ai_type"], data["url"], data["local_filename"], data["local_filename_compressed"]))
# commit changes
# commit by apsw
# insert tags
for tag in data["tags"]:
# calculate tag_id by hashing tag name
tag_id = xxhash.xxh32_intdigest(str(tag["name"]))
# use the INSERT OR REPLACE statement to proform 'UPSERT' operation
cursor.execute("INSERT OR REPLACE INTO tags (tag_id, name, translated_name) VALUES (?, ?, ?)", ((tag_id, tag["name"], tag["translated_name"])))
# commit changes
# commit by apsw
# use the INSERT OR REPLACE statement to proform 'UPSERT' operation
cursor.execute(
"INSERT OR REPLACE INTO picture_tags (picture_id, tag_id) VALUES (?, (SELECT tag_id FROM tags WHERE name = ?))", ((pk), tag["name"]))
# commit changes
# commit by apsw
return True
except apsw.ConstraintError as e:
if force_update:
print(
"Aborting database insertion for picture_id: " + str(pk) + " due to error: " + str(e))
return False
except Exception as e:
print("Aborting database insertion for picture_id: " + str(pk) + " due to error: " +
str(e) + "\n" + traceback.format_exc())
return False
def migrateDB(db, db_old, reverse_proxy):
# transform the old database to the new database
cursor = db_old.cursor()
results = cursor_to_dict(cursor, "SELECT * FROM pictures")
success_count = 0
for item in results:
data = {
"id": item["id"],
"author_id": item["author_id"],
"author_name": item["author_name"],
"title": item["title"],
"page_no": item["page_no"],
"page_count": item["page_count"],
"r18": item["r18"],
"ai_type": item["ai_type"],
"url": item["url"],
"local_filename": item["local_filename"],
"local_filename_compressed": item["local_filename_compressed"] if "local_filename_compressed" in item else "",
}
pk = item["picture_id"]
data["tags"] = tagDB(db_old, pk)
try:
data["orientation"] = get_image_orientation_from_source(
data["local_filename"])
except Exception:
try:
# If the file is not found, try to use url instead
data["orientation"] = get_image_orientation_from_source(
data["url"].replace("i.pximg.net", reverse_proxy))
except Exception:
print("Failed to get orientation for picture_id: " + str(pk) + ", assigning 0 (Portrait) as default.")
data["orientation"] = 0
result = insertDB(db, pk, data, True)
print("Migrating {}... {}".format(
pk, "Success" if result else "Failed"))
success_count += 1 if result else 0
print("Migration finished. {}/{} records migrated.".format(success_count, len(results)))
if __name__ == "__main__":
# transform the database from old structure to new structure according to the path given in the user input
db_path = input(
"Please enter the path of the SQLite database file (default: db.sqlite3): ") or "db.sqlite3"
reverse_proxy = input(
"Please enter the reverse proxy for the image URL (default: i.pixiv.re): ") or "i.pixiv.re"
# rename the database file to the backup file
os.rename(db_path, db_path + ".bak")
# import necessary functions from pixiv_crawler
from pixiv_crawler import initDB, cursor_to_dict, get_image_orientation_from_source
db_old = apsw.Connection(db_path + ".bak")
db = initDB(db_path)
migrateDB(db, db_old, reverse_proxy)