-
Notifications
You must be signed in to change notification settings - Fork 58
/
update_windows_mappings.py
executable file
·115 lines (92 loc) · 3.63 KB
/
update_windows_mappings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python3
# This script generates the mapping between MS Windows timezone names and
# tzdata/Olsen timezone names, by retrieving a file:
# http://unicode.org/cldr/data/common/supplemental/supplementalData.xml
# and parsing it, and from this generating the file windows_tz.py.
#
# It must be run with Python 3.
import ftplib
import logging
import tarfile
from io import BytesIO
from pprint import pprint
from urllib.parse import urlparse
from urllib.request import urlopen
from xml.dom import minidom
WIN_ZONES_URL = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml"
ZONEINFO_URL = "ftp://ftp.iana.org/tz/tzdata-latest.tar.gz"
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("tzlocal")
def update_old_names():
"""Fetches the list of old tz names and returns a mapping"""
url = urlparse(ZONEINFO_URL)
log.info("Connecting to %s" % url.netloc)
ftp = ftplib.FTP(url.netloc)
ftp.login()
gzfile = BytesIO()
log.info("Fetching zoneinfo database")
ftp.retrbinary("RETR " + url.path, gzfile.write)
gzfile.seek(0)
log.info("Extracting backwards data")
archive = tarfile.open(mode="r:gz", fileobj=gzfile)
backward = {}
for line in archive.extractfile("backward").readlines():
if line[0] == "#":
continue
if len(line.strip()) == 0:
continue
parts = line.split()
if parts[0] != b"Link":
continue
backward[parts[2].decode("ascii")] = parts[1].decode("ascii")
return backward
def update_windows_zones():
backward = update_old_names()
log.info("Fetching Windows mapping info from unicode.org")
source = urlopen(WIN_ZONES_URL).read()
dom = minidom.parseString(source)
for element in dom.getElementsByTagName("mapTimezones"):
if element.getAttribute("type") == "windows":
break
log.info("Making windows mapping")
win_tz = {}
tz_win = {}
for mapping in element.getElementsByTagName("mapZone"):
if mapping.getAttribute("territory") == "001":
win_tz[mapping.getAttribute("other")] = mapping.getAttribute("type").split(
" "
)[0]
if win_tz[mapping.getAttribute("other")].startswith("Etc"):
print(
win_tz[mapping.getAttribute("other")],
mapping.getAttribute("type").split(" ")[0],
)
for tz_name in mapping.getAttribute("type").split(" "):
tz_win[tz_name] = mapping.getAttribute("other")
log.info("Adding backwards and forwards data")
# Map in the backwards (or forwards) compatible zone names
for backward_compat_name, standard_name in backward.items():
if backward_compat_name not in tz_win:
win_zone = tz_win.get(standard_name, None)
if win_zone:
tz_win[backward_compat_name] = win_zone
if standard_name not in tz_win:
win_zone = tz_win.get(backward_compat_name, None)
if win_zone:
tz_win[standard_name] = win_zone
# Etc/UTC is a common but non-standard alias for Etc/GMT:
tz_win["Etc/UTC"] = "UTC"
log.info("Writing mapping")
with open("tzlocal/windows_tz.py", "w") as out:
out.write(
"# This file is autogenerated by the update_windows_mapping.py script\n"
"# Do not edit.\nwin_tz = "
)
pprint(win_tz, out)
out.write(
"\n# Old name for the win_tz variable:\ntz_names = win_tz\n\ntz_win = "
)
pprint(tz_win, out)
log.info("Done")
if __name__ == "__main__":
update_windows_zones()