X-Git-Url: http://git.veekun.com/zzz-spline-frontpage.git/blobdiff_plain/9be4bb2b3da075385c748f33488d316ebcbdd9ff..4bb3ad19bae18a454509588df79c8b1470727a18:/splinext/frontpage/sources.py diff --git a/splinext/frontpage/sources.py b/splinext/frontpage/sources.py index 1afc484..43d8db8 100644 --- a/splinext/frontpage/sources.py +++ b/splinext/frontpage/sources.py @@ -6,12 +6,27 @@ from collections import namedtuple import datetime import subprocess from subprocess import PIPE +from urllib2 import URLError import feedparser import lxml.html +from pylons import cache + from spline.lib import helpers +def max_age_to_datetime(max_age): + """``max_age`` is specified in config as a number of seconds old. This + function takes that number and returns a corresponding datetime object. + """ + if max_age == None: + return None + + dt = datetime.datetime.now() + dt -= datetime.timedelta(seconds=int(max_age)) + + return dt + class Source(object): """Represents a source to be polled for updates. Sources are populated @@ -44,18 +59,88 @@ class Source(object): self.title = title self.icon = icon self.link = link - self.limit = limit - self.max_age = max_age + self.limit = int(limit) + self.max_age = max_age_to_datetime(max_age) + + def do_cron(self, *args, **kwargs): + return + + def poll(self, global_limit, global_max_age): + """Public wrapper that takes care of reconciling global and source item + limit and max age. - def poll(self): - """Poll for updates. Must return an iterable. Each element should be - an Update object. + Subclasses should implement ``_poll``, below. """ + # Smallest limit wins + limit = min(self.limit, global_limit) + + # Latest max age wins. Note that either could be None, but that's + # fine, because None is less than everything else + max_age = max(self.max_age, global_max_age) + + return self._poll(limit, max_age) + + def _poll(self, limit, max_age): + """Implementation of polling for updates. Must return an iterable. + Each element should be an object with ``source`` and ``time`` + properties. A namedtuple works well. + """ + raise NotImplementedError + +class CachedSource(Source): + """Supports caching a source's updates in memcache. + + On the surface, this functions just like any other ``Source``. Calling + ``poll`` still returns a list of updates. However, ``poll`` does not call + your ``_poll``; instead, your implementation is called by the spline cron, + and the results are cached. ``poll`` then returns the contents of the + cache. + + You must define a ``_cache_key`` method that returns a key uniquely + identifying this object. Your key will be combined with the class name, so + it only needs to be unique for that source, not globally. + + You may also override ``poll_frequency``, the number of minutes between + pollings. By default, this is a rather conservative 60. + + Note that it may take up to a minute after server startup for updates + from a cached source to appear. + """ + + poll_frequency = 60 + + def cache_key(self): + return repr(type(self)) + ':' + self._cache_key() + + def _cache_key(self): raise NotImplementedError + def do_cron(self, tic, *args, **kwargs): + if tic % self.poll_frequency != 0: + # Too early! + return + + try: + updates = self._poll(self.limit, self.max_age) + cache.get_cache('spline-frontpage')[self.cache_key()] = updates + except Exception: + # Hmm, polling broke. Be conservative and don't do anything; old + # data is probably still OK for now + pass + + return + + def poll(self, global_limit, global_max_age): + """Fetches cached updates.""" + try: + return cache.get_cache('spline-frontpage')[self.cache_key()] + except KeyError: + # Haven't cached anything yet, apparently + return [] + FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content']) -class FeedSource(Source): +class FeedSource(CachedSource): """Represents an RSS or Atom feed. Extra properties: @@ -68,15 +153,25 @@ class FeedSource(Source): SUMMARY_LENGTH = 1000 + poll_frequency = 15 + def __init__(self, feed_url, **kwargs): kwargs.setdefault('title', None) super(FeedSource, self).__init__(**kwargs) self.feed_url = feed_url - def poll(self, limit, max_age): + def _cache_key(self): + return self.feed_url + + def _poll(self, limit, max_age): feed = feedparser.parse(self.feed_url) + if feed.bozo and isinstance(feed.bozo_exception, URLError): + # Feed is DOWN. Bail here; otherwise, old entries might be lost + # just because, say, Bulbanews is down yet again + raise feed.bozo_exception + if not self.title: self.title = feed.feed.title @@ -145,9 +240,9 @@ class FeedSource(Source): FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag']) FrontPageGitCommit = namedtuple('FrontPageGitCommit', - ['hash', 'author', 'time', 'subject', 'repo']) + ['hash', 'author', 'email', 'time', 'subject', 'repo']) -class GitSource(Source): +class GitSource(CachedSource): """Represents a git repository. The main repository is checked for annotated tags, and an update is @@ -187,8 +282,10 @@ class GitSource(Source): self.gitweb = gitweb self.tag_pattern = tag_pattern - def poll(self, limit, max_age): + def _cache_key(self): + return self.repo_paths[0] + def _poll(self, limit, max_age): # Fetch the main repo's git tags git_dir = '--git-dir=' + self.repo_paths[0] args = [ @@ -236,16 +333,17 @@ class GitSource(Source): 'git', '--git-dir=' + repo_path, 'log', - '--pretty=%h%x00%an%x00%at%x00%s', + '--pretty=%h%x00%an%x00%aE%x00%at%x00%s', "{0}..{1}".format(since_tag, tag), ] proc = subprocess.Popen(git_log_args, stdout=PIPE) for line in proc.stdout: - hash, author, time, subject = line.strip().split('\x00') + hash, author, email, time, subject = line.strip().split('\x00') commits.append( FrontPageGitCommit( hash = hash, author = author, + email = email, time = datetime.datetime.fromtimestamp(int(time)), subject = subject, repo = repo_name,