X-Git-Url: http://git.veekun.com/zzz-spline-frontpage.git/blobdiff_plain/2ff43fe9bb4cb9425c192f336ac30804a11520a2..e1e19c27da0c22e6b65a6e245f53f8a26739778e:/splinext/frontpage/sources.py?ds=sidebyside diff --git a/splinext/frontpage/sources.py b/splinext/frontpage/sources.py index d082ada..e28dc38 100644 --- a/splinext/frontpage/sources.py +++ b/splinext/frontpage/sources.py @@ -6,10 +6,13 @@ from collections import namedtuple import datetime import subprocess from subprocess import PIPE +from urllib2 import URLError import feedparser import lxml.html +from pylons import cache + from spline.lib import helpers def max_age_to_datetime(max_age): @@ -19,8 +22,10 @@ def max_age_to_datetime(max_age): if max_age == None: return None - seconds = int(max_age) + dt = datetime.datetime.now() + dt -= datetime.timedelta(seconds=int(max_age)) + return dt class Source(object): @@ -57,6 +62,9 @@ class Source(object): self.limit = int(limit) self.max_age = max_age_to_datetime(max_age) + def do_cron(self, *args, **kwargs): + return + def poll(self, global_limit, global_max_age): """Public wrapper that takes care of reconciling global and source item limit and max age. @@ -79,9 +87,60 @@ class Source(object): """ raise NotImplementedError +class CachedSource(Source): + """Supports caching a source's updates in memcache. + + On the surface, this functions just like any other ``Source``. Calling + ``poll`` still returns a list of updates. However, ``poll`` does not call + your ``_poll``; instead, your implementation is called by the spline cron, + and the results are cached. ``poll`` then returns the contents of the + cache. + + You must define a ``_cache_key`` method that returns a key uniquely + identifying this object. Your key will be combined with the class name, so + it only needs to be unique for that source, not globally. + + You may also override ``poll_frequency``, the number of minutes between + pollings. By default, this is a rather conservative 60. + + Note that it may take up to a minute after server startup for updates + from a cached source to appear. + """ + + poll_frequency = 60 + + def cache_key(self): + return repr(type(self)) + ':' + self._cache_key() + + def _cache_key(self): + raise NotImplementedError + + def do_cron(self, tic, *args, **kwargs): + if tic % self.poll_frequency != 0: + # Too early! + return + + try: + updates = self._poll(self.limit, self.max_age) + cache.get_cache('spline-frontpage')[self.cache_key()] = updates + except Exception: + # Hmm, polling broke. Be conservative and don't do anything; old + # data is probably still OK for now + pass + + return + + def poll(self, global_limit, global_max_age): + """Fetches cached updates.""" + try: + return cache.get_cache('spline-frontpage')[self.cache_key()] + except KeyError: + # Haven't cached anything yet, apparently + return [] + FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content']) -class FeedSource(Source): +class FeedSource(CachedSource): """Represents an RSS or Atom feed. Extra properties: @@ -94,15 +153,25 @@ class FeedSource(Source): SUMMARY_LENGTH = 1000 + poll_frequency = 15 + def __init__(self, feed_url, **kwargs): kwargs.setdefault('title', None) super(FeedSource, self).__init__(**kwargs) self.feed_url = feed_url + def _cache_key(self): + return self.feed_url + def _poll(self, limit, max_age): feed = feedparser.parse(self.feed_url) + if feed.bozo and isinstance(feed.bozo_exception, URLError): + # Feed is DOWN. Bail here; otherwise, old entries might be lost + # just because, say, Bulbanews is down yet again + raise feed.bozo_exception + if not self.title: self.title = feed.feed.title @@ -173,7 +242,7 @@ FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag']) FrontPageGitCommit = namedtuple('FrontPageGitCommit', ['hash', 'author', 'time', 'subject', 'repo']) -class GitSource(Source): +class GitSource(CachedSource): """Represents a git repository. The main repository is checked for annotated tags, and an update is @@ -213,6 +282,9 @@ class GitSource(Source): self.gitweb = gitweb self.tag_pattern = tag_pattern + def _cache_key(self): + return self.repo_paths[0] + def _poll(self, limit, max_age): # Fetch the main repo's git tags git_dir = '--git-dir=' + self.repo_paths[0]