This repository has been archived by the owner on Dec 25, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
dailyimagel.py
177 lines (141 loc) · 4.51 KB
/
dailyimagel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Initally written by https://commons.wikimedia.org/wiki/User:Pfctdayelise under Creative Commons Attribution-ShareAlike 3.0 license
# <https://creativecommons.org/licenses/by-sa/3.0/legalcode>
# Mostly re-written by https://www.mediawiki.org/wiki/User:Legoktm to
# use modern APIs and not wget.
import datetime
import mwparserfromhell
import os
import sys
import requests
import traceback
def api(**kwargs):
kwargs['formatversion'] = 2
kwargs['format'] = 'json'
r = requests.get('https://commons.wikimedia.org/w/api.php', params=kwargs)
if not r.ok:
r.raise_for_status()
return r.json()
def page_content(title):
params = {
'action': 'query',
'prop': 'revisions',
'rvprop': 'content',
'titles': title,
}
data = api(**params)
return data['query']['pages'][0]['revisions'][0]['content']
def get_today_potd_title():
d = datetime.datetime.utcnow().strftime('%Y-%m-%d')
return 'Template:Potd/' + d
def get_today_potd():
title = get_today_potd_title()
content = page_content(title)
code = mwparserfromhell.parse(content)
name = str(code.filter_templates()[0].get(1).value)
return 'File:' + name
def file_url(title):
return 'https://commons.wikimedia.org/wiki/' + title.replace(' ', '_')
def get_metadata(title):
params = {
'action': 'query',
'prop': 'imageinfo',
'iiprop': 'extmetadata',
'iilimit': '10',
'titles': title,
}
data = api(**params)
return data['query']['pages'][0]['imageinfo'][0]['extmetadata']
def expand_templates(text):
params = {
'action': 'expandtemplates',
'text': text
}
data = api(**params)
return data['expandtemplates']['wikitext']
def get_language_name(lang):
return expand_templates('{{#language:%s}}' % lang)
def get_captions(title):
params = {
'action': 'query',
'list': 'allpages',
'apfrom': title.split(':', 1)[1],
'aplimit': '100',
'apnamespace': '10'
}
data = api(**params)
langs = {}
prefix = title + ' '
for item in data['query']['allpages']:
if item['title'].startswith(prefix):
lang = item['title'].split('(')[1].split(')')[0]
langs[lang] = item['title']
text = ''
for lang in sorted(langs):
lang_name = get_language_name(lang)
content = page_content(langs[lang])
if content.strip().startswith('#REDIRECT'):
# ???
continue
code = mwparserfromhell.parse(content)
try:
temp = code.filter_templates()[0]
except IndexError:
continue
caption_code = temp.get(1).value
# We want templates like {{w|FooBar}} to render, so expand them
expanded = expand_templates(str(caption_code))
caption = str(mwparserfromhell.parse(expanded).strip_code())
text += '%s: %s\n' % (lang_name, caption)
return text
SENDMAIL = "/usr/sbin/sendmail"
mailfrom = 'Wikimedia Commons Picture of the Day <[email protected]>'
# mailto = "[email protected]"
mailto = "[email protected]"
# mailto = "[email protected]"
# mailto = '[email protected]'
if len(sys.argv) > 1:
mailto = sys.argv[1]
def createmail():
title = get_today_potd()
imageurl = file_url(title)
metadata = get_metadata(title)
captions = get_captions(get_today_potd_title())
if 'UsageTerms' in metadata:
lic = 'Licensed under the %s.' % metadata['UsageTerms']['value']
else:
# ????
lic = None
text = ''
text += 'From: ' + mailfrom + '\r\n'
text += "To: " + mailto + '\r\n'
text += 'Content-Type: text/plain; charset=utf-8\r\n'
text += "Subject: " + str(datetime.date.today()) + '\r\n\r\n'
text += "Picture of the day:\r\n"
text += imageurl + '\n'
if lic:
text += 'Copyright status: ' + lic + '\n'
text += 'Descriptions:\n'
text += captions
return text
def main():
error = None
try:
mail = createmail()
except:
# TODO: We should email this to someone
traceback.print_exc()
raise
if error:
mail += "Error information: " + str(error)
# open a pipe to the mail program and
# write the data to the pipe
p = os.popen("%s -t" % SENDMAIL, 'w')
p.write(mail.encode('utf-8'))
exitcode = p.close()
if exitcode:
print("sendmail error: Exit code: %s" % exitcode)
if __name__ == '__main__':
main()