diff --git a/gazouilleur/bot.py b/gazouilleur/bot.py index d801afd..387d9a0 100644 --- a/gazouilleur/bot.py +++ b/gazouilleur/bot.py @@ -166,7 +166,7 @@ def joined(self, channel): # Follow RSS Feeds matching url queries set for this channel with !follow self.feeders[lowchan]['news'] = FeederFactory(self, channel, 'news', 299, pagetimeout=35) # Monitor webpages set for this channel with !monitor - self.feeders[lowchan]['pages'] = FeederFactory(self, channel, 'pages', 299, pagetimeout=35) + self.feeders[lowchan]['pages'] = FeederFactory(self, channel, 'pages', 299, timeout=900, pagetimeout=95) twuser = get_chan_twitter_user(channel, conf) if twuser: # Get OAuth2 tokens for twitter search extra limitrate diff --git a/gazouilleur/lib/feeds.py b/gazouilleur/lib/feeds.py index c143edb..4b95d2a 100644 --- a/gazouilleur/lib/feeds.py +++ b/gazouilleur/lib/feeds.py @@ -30,6 +30,7 @@ from gazouilleur.lib.utils import * from gazouilleur.lib.microblog import Microblog, check_twitter_results, grab_extra_meta, reformat_extended_tweets from gazouilleur.lib.stats import Stats +from gazouilleur.lib.webmonitor import WebMonitor class FeederProtocol(object): @@ -146,7 +147,10 @@ def process_elements(self, data, url, name=None): if not data: returnD(False) if self.fact.name == "pages": - self.log(name+ ": \n"+"\n".join(data.split('\n')[:3]), hint=True) + differ = WebMonitor(name) + info = differ.check_diff(url, data) + if info: + self.fact.ircclient._send_message(info, self.fact.channel) returnD(True) if not data.entries: returnD(False) diff --git a/gazouilleur/lib/stats.py b/gazouilleur/lib/stats.py index 110d781..274debc 100644 --- a/gazouilleur/lib/stats.py +++ b/gazouilleur/lib/stats.py @@ -8,17 +8,14 @@ from gazouilleur import config from gazouilleur.lib.mongo import SingleMongo, find_stats, count_followers, find_last_followers, sortasc, sortdesc from gazouilleur.lib.log import loggerr +from gazouilleur.lib.templater import Templater from gazouilleur.lib.utils import * -class Stats(object): +class Stats(Templater): def __init__(self, user): self.user = user - try: - self.url = '%s/' % config.URL_STATS.rstrip('/') - except: - self.url = None - self.templates = os.path.join("web", "templates") + Templater.__init__(self) @inlineCallbacks def print_last(self): @@ -131,21 +128,6 @@ def dump_data(self): self.render_template("static_stats.html", self.user, data) returnValue(True) - def render_template(self, template, name, data): - outfile = template.replace('.html', '_%s.html' % name) - try: - import codecs - import pystache - from contextlib import nested - ofile = os.path.join("web", outfile) - with nested(open(os.path.join(self.templates, template), "r"), codecs.open(ofile, "w", encoding="utf-8")) as (temp, generated): - generated.write(pystache.Renderer(string_encoding='utf8').render(temp.read(), data)) - os.chmod(ofile, 0o644) - return True - except IOError as e: - loggerr("Could not write web/%s from %s/%s : %s" % (outfile, self.templates, template, e), action="stats") - return False - @inlineCallbacks def digest(self, hours, channel): now = datetime.today() diff --git a/gazouilleur/lib/templater.py b/gazouilleur/lib/templater.py new file mode 100644 index 0000000..1ab700d --- /dev/null +++ b/gazouilleur/lib/templater.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os, codecs +from pystache import Renderer +from contextlib import nested +from gazouilleur.lib.log import loggerr +try: + from gazouilleur.config import URL_STATS +except: + URL_STATS = None + +class Templater(object): + + def __init__(self): + self.url = '%s/' % URL_STATS.rstrip('/') if URL_STATS else None + self.templates = os.path.join("web", "templates") + + def render_template(self, template, name, data): + outfile = template.replace('.html', '_%s.html' % name) + try: + ofile = os.path.join("web", outfile) + with nested(open(os.path.join(self.templates, template), "r"), codecs.open(ofile, "w", encoding="utf-8")) as (temp, generated): + generated.write(Renderer(string_encoding='utf8').render(temp.read(), data)) + os.chmod(ofile, 0o644) + return True + except IOError as e: + loggerr("Could not write web/%s from %s/%s : %s" % (outfile, self.templates, template, e), action="stats") + return False + diff --git a/gazouilleur/lib/tests.py b/gazouilleur/lib/tests.py index 556b117..dc76f9e 100644 --- a/gazouilleur/lib/tests.py +++ b/gazouilleur/lib/tests.py @@ -65,7 +65,7 @@ exit(1) try: - from gazouilleur.lib import ircclient_with_names, irccolors, feeds, filelogger, httpget, log, microblog, mongo, stats, utils + from gazouilleur.lib import ircclient_with_names, irccolors, feeds, filelogger, httpget, log, microblog, mongo, stats, utils, templater except Exception as e: logerr("Oups, looks like something is wrong somewhere in the code, shouldn't be committed...") logerr("%s\n%s" % (e, "\n".join(format_exc().splitlines()[-3:-1]))) diff --git a/gazouilleur/lib/webmonitor.py b/gazouilleur/lib/webmonitor.py new file mode 100644 index 0000000..24a9d25 --- /dev/null +++ b/gazouilleur/lib/webmonitor.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os, time +from gazouilleur.lib.templater import Templater + +class WebMonitor(Templater): + + def __init__(self, name): + Templater.__init__(self) + self.name = name + self.path = os.path.join('web', 'monitor', name) + if not os.path.exists(self.path): + os.makedirs(self.path) + + def check_diff(self, url, data): + # TODO: + # apply url_rewrite for '<[^>]*=['"]/([^/]|$)' et '<[^>]*=['"](!:http)' + # - check if file -last exists + # - if so diff md5 current/last + # - check if exist and not diff + if False: + return None + for name in ["last", time.strftime("%y%m%d-%H%M")]: + fil = os.path.join(self.path, "%s.html" % name) + with open(fil, "w") as f: + f.write(data) + os.chmod(fil, 0o644) + msg = "Looks like the webpage %s at %s just changed!" % (self.name, url) + if self.url: + self.build_diff_page(url) + msg += "\nYou can check the different versions and diffs at %smonitor_%s.html" % (self.url, self.name) + return msg + + def build_diff_page(self, url): + data = { + "name": self.name, + "url": url, + } + data["versions"] = sorted(os.listdir(os.path.join('web', 'monitor', self.name)), reverse=True) + self.render_template("monitor.html", self.name, data) diff --git a/web/templates/digest.html b/web/templates/digest.html index 428353f..ba331e0 100644 --- a/web/templates/digest.html +++ b/web/templates/digest.html @@ -2,9 +2,9 @@ - - Digest gazouilleur {{channel}} - + + +

WebMonitor by gazouilleur for {{name}} at {{url}}

+

Available versions

+ +

Actual version

+ +

Diff view

+ Vs. +
+ + +