-
Notifications
You must be signed in to change notification settings - Fork 0
/
pop_notes
executable file
·175 lines (131 loc) · 6.58 KB
/
pop_notes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/python3.5
# -*- coding: utf-8 -*-
## requires /usr/bin/pip3 install python-poppler-qt5
## For mac: sudo port install poppler-qt5
import popplerqt5
import os, sys, urllib, re
import PyQt5
SUMM = "Summary" # Purple
DNU = "Did not understand" # Red
NOTE = "Note" # Yellow
QUOTE = "Quote" # Green
CITE = "New Source" # Blue
color_dict = {226 : SUMM, # Purple
230 : SUMM, # Purple
253 : DNU, # Red/Pink
248 : NOTE, # Yellow
255 : NOTE, # Yellow
178 : QUOTE, # Green
188 : QUOTE, # Green
164 : CITE, # Blue
165 : CITE # Blue
}
def main(filename, text, jtex, basepage = 1, quotes = False, silent = False):
print(filename)
doc = popplerqt5.Poppler.Document.load(filename)
n_pages = doc.numPages()
annotations = []
for i in range(n_pages):
#########
## Now use the popplerqt5 interface to get the highlights....
## following http://stackoverflow.com/questions/21050551/
## and https://people.freedesktop.org/~aacid/docs/qt5/classPoppler_1_1Annotation.html
page = doc.page(i)
(pwidth, pheight) = (page.pageSize().width(), page.pageSize().height())
for annotation in page.annotations():
# popplerqt5.Poppler.TextAnnotation
# popplerqt5.Poppler.HighlightAnnotation
if annotation.subType() not in [1, 4]: continue
note = {}
note["page"] = basepage+i
note["note"] = annotation.contents()
note["text"] = ""
try: note["context"] = color_dict[annotation.style().color().red()]
except KeyError:
if annotation.style().color().red() == 0 and \
annotation.style().color().green() == 0 and \
annotation.style().color().blue() == 0: continue
print("Do not have color (r, g, b) = ({}, {}, {})".format(
annotation.style().color().red(),
annotation.style().color().green(),
annotation.style().color().blue()))
print("page", basepage+i, " >> ", note["text"])
continue
# retrieving the highlight is a bit more finnicky,
# since PDFs aren't really designed for this.
if isinstance(annotation, popplerqt5.Poppler.HighlightAnnotation):
for quad in annotation.highlightQuads():
rect = (quad.points[0].x() * pwidth, quad.points[0].y() * pheight,
quad.points[2].x() * pwidth, quad.points[2].y() * pheight)
bdy = PyQt5.QtCore.QRectF()
bdy.setCoords(*rect)
if quotes:
txt = page.text(bdy).strip()
# print(txt)
if not txt: continue
if txt[-1] == "-": txt = txt[:-1].strip()
txt = txt.replace("-\n", "").replace("\n", " ")
txt = txt.replace("$", "\$").replace("#", "\#")
note["text"] += txt + " "
annotations.append(note)
with open(text + filename.replace(".pdf", ".txt").split("/")[-1], "w") as out:
out.write(">* Notes for " + filename.replace(".pdf", "").split("/")[-1] + "\n\n")
if any([v["context"] == SUMM for v in annotations]):
out.write(">>>* Summary Impressions :: \\\\ \n")
for v in annotations:
if v["context"] is SUMM:
if v["note"]: out.write("{note}\n".format(**v))
if v["text"]: out.write("- {text}\n\n".format(**v))
out.write("\n\n")
if any([v["context"] == CITE for v in annotations]):
out.write(">>>* Additional sources to check out :: \\\\ \n")
for v in annotations:
if v["context"] is CITE:
if v["note"]: out.write("{note}\n\n".format(**v))
if v["text"]: out.write("- {text}\n\n".format(**v))
out.write("\n\n")
if any([v["context"] == DNU for v in annotations]):
out.write(">>>* Did not understand :: \\\\ \n")
for v in annotations:
if v["context"] is DNU:
if v["note"]: out.write("{note}\n".format(**v))
if v["text"]: out.write("- {text}\n\n".format(**v))
out.write("\n\n")
if any([v["context"] in [NOTE, QUOTE] for v in annotations]):
out.write(">>>* Notes and Quotes :: \n")
for v in annotations:
if v["context"] in [NOTE, QUOTE]:
s = "-[{page:>4d}]".format(**v)
## if v["context"] == NOTE:
## if latex: s+= " \\textbf{N} "
## else: s += " :N: "
## if v["context"] == QUOTE:
## if latex: s+= " \\textbf{Q} "
## else: s += " :Q: "
if v["note"]: s += "{note}".format(**v)
if v["text"]:
s += "\n\\begin{{lquote}}\n{text}\n\end{{lquote}}\n".format(**v)
s += "\n\n"
out.write(s)
if jtex:
with open(jtex + "/".join(filename.replace(".pdf", "").split("/")[-2:]), "w") as out:
for line in open(text + filename.replace(".pdf", ".txt").split("/")[-1], "r"):
if ">* Notes for" in line:
source = filename.replace(".pdf", "").split("/")[-1]
section = " :: ".join(filename.replace(".pdf", "").split("/")[-2:]).replace("_", "\_")
line = "\\lrsection{{{}}}{{{}}}\n".format(source, section)
out.write(line)
if not silent:
with open(text + filename.replace(".pdf", ".txt").split("/")[-1], "r") as out:
for l in out: print(l[:-1])
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("filename")
parser.add_argument("-p", "--pages", type=int, default = 1)
parser.add_argument("-s", "--silent", action="store_true")
parser.add_argument("-q", "--quotes", action="store_true")
parser.add_argument("-t", "--text", type=str, default = "notes/")
parser.add_argument("-j", "--jtex", type=str, default = "jtex/")
args = parser.parse_args()
main(args.filename, args.text, args.jtex, args.pages, args.quotes, args.silent)