-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
155 lines (133 loc) · 5.22 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import sys
import shutil
import urllib.parse
import logging
import yaml
import tqdm
import dateparser
from feedgen.feed import FeedGenerator
from FeedFox.browser import Browser
from FeedFox.parser import Parser
from jinja2 import Environment, FileSystemLoader
public_base_path = "public"
if not os.path.exists(public_base_path):
os.makedirs(public_base_path)
if os.environ.get("CI", False):
level = logging.INFO
else:
level = logging.DEBUG
logging.basicConfig(
level=level,
stream=sys.stdout,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
logger.debug("Starting FeedFox")
if __name__ == "__main__":
with open("config.yaml") as f:
config = yaml.safe_load(f)
templates = {
tname: Parser(**config["templates"][tname]) for tname in config["templates"]
}
default = config["default"]
feeds = config["feeds"]
browser = Browser()
browser.start()
bundles = {}
for feed_id in tqdm.tqdm(feeds, desc="Fetching feeds"):
feed = feeds[feed_id]
logger.info(f"Processing feed: {feed_id} - {feed['title']}")
if feed["bundle"] is None:
feed["bundle"] = default["bundle"]
if type(feed["bundle"]) == str:
feed["bundle"] = [feed["bundle"]]
paths = feed["bundle"]
logger.debug(f"Feed {feed_id} will be bundled in {paths}")
for path in paths:
if path not in bundles:
bundles[path] = FeedGenerator()
logger.debug(f"Initialize feed to bundle {path}")
path = os.path.join(public_base_path, path)
if not os.path.exists(path):
os.makedirs(path)
fg = FeedGenerator()
fg.id(feed_id)
fg.title(feed.get("title", feed_id))
fg.description(feed.get("description", default.get("description", feed_id)))
fg.updated(dateparser.parse("now").astimezone())
fg.image(url=feed.get("image", default.get("image", None)))
if type(feed["link"]) == str:
links = [feed["link"]]
fg.link(href=feed["link"], rel="alternate")
elif type(feed["link"]) == list:
links = feed["link"]
fg.link(href=feed["link"][0], rel="alternate")
else:
raise ValueError("Invalid feed link")
if type(feed["template"]) == str:
template = templates[feed["template"]]
else:
template = Parser(**feed["template"])
timeout = feed.get("timeout", default.get("timeout", 30))
for link in links:
logger.info(f"Fetching {link}")
try:
html = browser.fetch(link, template.wait_for, timeout=timeout)
except Exception as e:
logger.error(f"Error while fetching {link}: {e}")
logger.info("Skipping...")
continue
entries = template.parse(html)
logger.info(f"Found {len(entries)} entries")
for entry in entries:
if not urllib.parse.urlparse(entry.link).netloc:
entry.link = urllib.parse.urljoin(link, entry.link)
fg.add_entry(entry.entry)
for path in paths:
bundles[path].add_entry(entry.entry)
for path in paths:
path = os.path.join(public_base_path, path)
fg.atom_file(os.path.join(path, f"{fg.id()}.atom"))
fg.rss_file(os.path.join(path, f"{fg.id()}.rss"))
for path in tqdm.tqdm(bundles, desc="Generating bundles"):
logger.info(f"Generating bundle {path}")
bundle = bundles[path]
bundle.id(f"{path}-all")
bundle.title(f"{path}")
bundle.description(f"{path}")
bundle.updated(dateparser.parse("now").astimezone())
bundle.link(href=f"{path}/all.atom", rel="self")
path = os.path.join(public_base_path, path)
bundle.atom_file(os.path.join(path, "all.atom"))
bundle.rss_file(os.path.join(path, "all.rss"))
browser.stop()
# Generate `index.html` files for bundles and feeds indexing
bundles = {}
for feed_id in tqdm.tqdm(feeds, desc="Generating index.html"):
feed = feeds[feed_id]
if feed["bundle"] is None:
feed["bundle"] = default["bundle"]
if type(feed["bundle"]) == str:
feed["bundle"] = [feed["bundle"]]
paths = feed["bundle"]
for path in paths:
if path not in bundles:
bundles[path] = []
bundles[path].append(
{"href": os.path.join(path, f"{feed_id}.atom"), "title": feed["title"]}
)
for path in bundles:
bundles[path].sort(key=lambda x: x["title"])
bundles[path].insert(
0, {"href": os.path.join(path, "all.atom"), "title": "All"}
)
env = Environment(loader=FileSystemLoader("web/templates"))
template = env.get_template("index.html")
with open(os.path.join(public_base_path, "index.html"), "w") as f:
f.write(template.render(title="FeedFox RSS", bundles=bundles))
static_path = os.path.join(public_base_path, "static")
if os.path.exists(static_path):
shutil.rmtree(static_path)
shutil.copytree("web/static", static_path)