-
Notifications
You must be signed in to change notification settings - Fork 1
/
pdfjoin.py
147 lines (123 loc) · 5.63 KB
/
pdfjoin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
import glob
import json
from collections import OrderedDict
from pypdf import PdfWriter, PdfReader
import csv
def get_page_count(file_name):
with open(file_name, 'rb') as file:
pdf = PdfReader(file)
pg = pdf.get_num_pages()
return pg
def pdf_ok_writing(pdf, temp_path="./temp"):
if not os.path.exists(temp_path):
os.makedirs(temp_path)
try:
merger = PdfWriter()
merger.append(pdf)
merger.write(temp_path + "/test_can_be_deleted.pdf")
merger.close()
return True
except:
return False
def is_foreign(file_name, foreign_submitids):
file_submitid = file_name[file_name.find("__")+2:-4]
try:
return file_submitid in foreign_submitids
except:
return False
def join_it(download_path, split_exceptions, problems):
exceptions = []
if split_exceptions == "d" or split_exceptions == False:
split_exceptions = False
else:
split_exceptions = True
submitids_path = os.path.join(download_path, 'submitids.csv')
if not os.path.exists(submitids_path):
print("\nWarning: submitids.csv not found!")
print("This file is needed to separate foreign and Czech solutions.")
print("Please download the list of foreign submits from fksdb (export 'Submity zahraničních řešitelů')")
print("The file should be in:", submitids_path)
input("\nPress Enter once you've added the file...")
if not os.path.exists(submitids_path):
raise FileNotFoundError("submitids.csv still not found. Cannot continue without it.")
# Read the foreign submitids
with open(os.path.join(download_path, 'submitids.csv'), 'r') as file:
csv_reader = csv.reader(file, delimiter=';')
headers = next(csv_reader)
submit_index = headers.index('submit')
foreign_submitids = [row[submit_index] for row in csv_reader]
for problem in problems:
print(f'uloha: {problem}')
writer_czech = PdfWriter()
writer_foreign = PdfWriter()
path_list = glob.glob(download_path + f'/uloha-{problem}/*')
pdf_list = [path for path in path_list if 'pdf' in path[-4:].lower()]
pdf_list = sorted(pdf_list)
dictnarozdeleni_czech = OrderedDict()
dictnarozdeleni_foreign = OrderedDict()
for pdf in pdf_list:
if pdf_ok_writing(pdf):
reader = PdfReader(pdf)
page_count = reader.get_num_pages()
last_page = reader.pages[page_count-1]
if is_foreign(pdf, foreign_submitids):
writer_foreign.append(reader)
if page_count % 2 != 0:
# if you use defaults (no params) you get: AttributeError: 'NoneType' object has no attribute 'get_object'
writer_foreign.add_blank_page(last_page.mediabox.width,last_page.mediabox.height)
page_count += 1
dictnarozdeleni_foreign.update({pdf: page_count})
else:
writer_czech.append(reader)
if page_count % 2 != 0:
writer_czech.add_blank_page(last_page.mediabox.width,last_page.mediabox.height)
page_count += 1
dictnarozdeleni_czech.update({pdf: page_count})
else:
try:
_ = get_page_count(pdf)
print("chyba writing: ", pdf)
except:
print("chyba get_page_count: ", pdf)
exceptions.append(pdf)
joined_path_czech = download_path + f'/joined_uloha-{problem}_czech.pdf'
joined_path_foreign = download_path + f'/joined_uloha-{problem}_foreign.pdf'
writer_czech.write(joined_path_czech)
writer_foreign.write(joined_path_foreign)
writer_czech.close()
writer_foreign.close()
with open(download_path + f"/stranyprorozdeleni_uloha-{problem}_czech.txt", "w") as f:
json.dump(dictnarozdeleni_czech, f)
with open(download_path + f"/stranyprorozdeleni_uloha-{problem}_foreign.txt", "w") as f:
json.dump(dictnarozdeleni_foreign, f)
jp_czech = get_page_count(joined_path_czech)
jp_foreign = get_page_count(joined_path_foreign)
print(f'Joined get_page_count (Czech): {jp_czech}')
print(f'Joined get_page_count (Foreign): {jp_foreign}')
print()
if split_exceptions:
exc_path = download_path + f'/exceptions/uloha-{problem}'
if not os.path.exists(exc_path):
os.makedirs(exc_path)
for exception in exceptions:
a = exception.find(f'uloha')
os.rename(exception, exc_path + exception[a+7:])
exceptions = []
if not split_exceptions:
print('Exceptions:')
exc_path = download_path + f'/exceptions'
if not os.path.exists(exc_path):
os.mkdir(exc_path)
for exception in exceptions:
print(exception)
a = exception.find(f'uloha')
os.rename(exception, exc_path + exception[a+7:])
if __name__ == "__main__":
problems = ["1", "2", "3", "4", "5", "P", "E", "S"]
rocnik = int(input('Zadejte číslo ročníku: '))
serie = int(input('Zadejte číslo série: '))
split_exceptions = input('Vyjimky oddelene nebo dohromady? o/d (oddelene pro elektronicke opravovani, dohromady po tisk, default = o) ')
print()
download_path = os.path.join(os.path.dirname(__file__), "download", f"rocnik{rocnik}", f"serie{serie}")
join_it(download_path, split_exceptions, problems)