-
Notifications
You must be signed in to change notification settings - Fork 2
/
convertWin.py
112 lines (90 loc) · 26 KB
/
convertWin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/python
import os
import base64
def show_exception_and_exit(exc_type, exc_value, tb):
import traceback
traceback.print_exception(exc_type, exc_value, tb)
raw_input("Press key to exit.")
sys.exit(-1)
import sys
sys.excepthook = show_exception_and_exit
TMP_IMAGE_PATH = os.getcwd()+'\\tmpimage'
#SVG_START = '''<svg class="write-page" color-interpolation="linearRGB" width="1240px" height="1755px" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
# <g class="write-content write-v3" width="1240" height="1755" xruling="0" yruling="35" marginLeft="100" papercolor="#FFFFFF" rulecolor="#9F0000FF">
# <g class="ruleline write-std-ruling write-scale-down" fill="none" stroke="#0000FF" stroke-opacity="0.624" stroke-width="1" shape-rendering="crispEdges" vector-effect="non-scaling-stroke">
# <rect class="pagerect" fill="#FFFFFF" stroke="none" x="0" y="0" width="1240" height="1755" />
# <path class="yrule_1" d="M0 35 l1240 0" />
# <path class="leftmargin" stroke="#FF0000" d="M100 0 l0 1755" />'''
SVG_START = '''<svg width='1240px' height='1755px' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink'>
<defs> <style type='text/css'><![CDATA[
path { stroke-linecap: round; stroke-linejoin: round; }
.ruleline { shape-rendering: crispEdges; }
]]></style> </defs>
<g id='page_1' width='1240' height='1755' xruling='0.000' yruling='35' marginLeft='100.000' papercolor='#FFFFFF' rulecolor='#FF0000FF'>
'''
SVG_END = '''
</g>
</svg>
'''
while True:
pdf_path = input('Give me a PDF Path: ')
pdf_name = pdf_path.split('\\')[-1].split('.')[0]
page_count = int(input("How many pages? "))
if os.path.isfile(pdf_path):
abs_path = pdf_path
elif os.path.isfile(os.path.join(os.path.expanduser("~"), pdf_path)):
abs_path = os.path.join(os.path.expanduser("~"), pdf_path)
elif os.path.isfile(os.path.join(os.getcwd(), pdf_path)):
abs_path = os.path.join(os.getcwd(), pdf_path)
else:
print("Unable to find file in home directory, script directory or absolute")
if not os.path.exists(TMP_IMAGE_PATH):
os.mkdir(TMP_IMAGE_PATH)
print('Converting PDF to images - may take some time')
print('If this fails, try installing imagemagick')
for page in range(page_count):
print('Converting page', page)
command = "convert -density 600 \"" + abs_path + "[" + str(page) + "]\" \"" + os.path.join(TMP_IMAGE_PATH, "tmp{:03}.jpg\"".format(page))
print(command)
#print('convert -density 150 \"'+abs_path+'[{}] '.format(page)+"\" \""+os.path.join(TMP_IMAGE_PATH, 'tmp{:03}.jpg'.format(page))+"\"")
#os.system('convert -density 150 \"'+abs_path+'[{}] '.format(page)+"\" \""+os.path.join(TMP_IMAGE_PATH, 'tmp{:03}.jpg'.format(page))+"\"")
os.system(command)
#input()
obj_string = ''
i = 0
for img in os.listdir(TMP_IMAGE_PATH):
print("Encoding Image: "+img)
i += 1
img_full = os.path.join(TMP_IMAGE_PATH, img)
png_file = open(img_full, 'rb')
base64data = base64.b64encode(png_file.read())
base64string = '<image xlink:href="data:image/jpg;base64,{0}" width="1240" height="1755" x="0" y="0" />'.format(str(base64data)[2:-1])
png_file.close()
os.remove(img_full)
svg_string = SVG_START + base64string + SVG_END
page_name = pdf_name + '_page{:03}.svg'.format(i)
with open(os.path.join(TMP_IMAGE_PATH, page_name), 'w') as sv:
sv.write(svg_string)
obj_string += '''<object data="{0}" type="image/svg+xml" width="1240" height="1755"></object>\n'''.format(page_name)
html_string = '''<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>{}</title>
<script type="text/writeconfig">
<int name="pageNum" value="{}" />
<float name="xOffset" value="-1948.8418" />
<float name="yOffset" value="-436.347839" />
</script>
</head>
<body>
<img id='thumbnail' style='display:none;' src=''/>
{}
</body>
</html>'''.format(pdf_name, i, obj_string)
print("Saving HTML")
with open(os.path.join(TMP_IMAGE_PATH, pdf_name+'.html'), 'w') as ht:
ht.write(html_string)
os.rename(TMP_IMAGE_PATH, TMP_IMAGE_PATH.replace('tmpimage', pdf_name))
print("Converted document is located at "+TMP_IMAGE_PATH.replace('tmpimage', pdf_name))
input()