X-Git-Url: http://git.veekun.com/zzz-spline-frontpage.git/blobdiff_plain/2ff43fe9bb4cb9425c192f336ac30804a11520a2..HEAD:/splinext/frontpage/sources.py diff --git a/splinext/frontpage/sources.py b/splinext/frontpage/sources.py index d082ada..c4c2c54 100644 --- a/splinext/frontpage/sources.py +++ b/splinext/frontpage/sources.py @@ -4,12 +4,16 @@ implementations. from collections import namedtuple import datetime +import re import subprocess from subprocess import PIPE +from urllib2 import URLError import feedparser import lxml.html +from pylons import cache + from spline.lib import helpers def max_age_to_datetime(max_age): @@ -19,8 +23,10 @@ def max_age_to_datetime(max_age): if max_age == None: return None - seconds = int(max_age) + dt = datetime.datetime.now() + dt -= datetime.timedelta(seconds=int(max_age)) + return dt class Source(object): @@ -50,13 +56,16 @@ class Source(object): The template will be passed one parameter: the update object, ``update``. """ - def __init__(self, title, icon, link, limit=None, max_age=None): + def __init__(self, config, title, icon, link, limit=None, max_age=None): self.title = title self.icon = icon self.link = link self.limit = int(limit) self.max_age = max_age_to_datetime(max_age) + def do_cron(self, *args, **kwargs): + return + def poll(self, global_limit, global_max_age): """Public wrapper that takes care of reconciling global and source item limit and max age. @@ -79,9 +88,58 @@ class Source(object): """ raise NotImplementedError +class CachedSource(Source): + """Supports caching a source's updates in memcache. + + On the surface, this functions just like any other ``Source``. Calling + ``poll`` still returns a list of updates. However, ``poll`` does not call + your ``_poll``; instead, your implementation is called by the spline cron, + and the results are cached. ``poll`` then returns the contents of the + cache. + + ``_poll`` may return None, in which case the cache will be left unchanged. + + You must define a ``_cache_key`` method that returns a key uniquely + identifying this object. Your key will be combined with the class name, so + it only needs to be unique for that source, not globally. + + You may also override ``poll_frequency``, the number of minutes between + pollings. By default, this is a rather conservative 60. + + Note that it may take up to a minute after server startup for updates + from a cached source to appear. + """ + + poll_frequency = 60 + + def cache_key(self): + return repr(type(self)) + ':' + self._cache_key() + + def _cache_key(self): + raise NotImplementedError + + def do_cron(self, tic, *args, **kwargs): + if tic % self.poll_frequency != 0: + # Too early! + return + + updates = self._poll(self.limit, self.max_age) + if updates is not None: + cache.get_cache('spline-frontpage')[self.cache_key()] = updates + + return + + def poll(self, global_limit, global_max_age): + """Fetches cached updates.""" + try: + return cache.get_cache('spline-frontpage')[self.cache_key()] + except KeyError: + # Haven't cached anything yet, apparently + return [] + FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content']) -class FeedSource(Source): +class FeedSource(CachedSource): """Represents an RSS or Atom feed. Extra properties: @@ -94,15 +152,25 @@ class FeedSource(Source): SUMMARY_LENGTH = 1000 + poll_frequency = 15 + def __init__(self, feed_url, **kwargs): kwargs.setdefault('title', None) super(FeedSource, self).__init__(**kwargs) self.feed_url = feed_url + def _cache_key(self): + return self.feed_url + def _poll(self, limit, max_age): feed = feedparser.parse(self.feed_url) + if feed.bozo and isinstance(feed.bozo_exception, URLError): + # Feed is DOWN. Bail here; otherwise, old entries might be lost + # just because, say, Bulbanews is down yet again + return None + if not self.title: self.title = feed.feed.title @@ -128,6 +196,11 @@ class FeedSource(Source): # If there be a summary, cheerfully trust that it's actually a # summary content = entry.summary + elif 'content' in entry and \ + len(entry.content[0].value) <= self.SUMMARY_LENGTH: + + # Full content is short; use as-is! + content = entry.content[0].value elif 'content' in entry: # Full content is way too much, especially for my giant blog posts. # Cut this down to some arbitrary number of characters, then feed @@ -171,9 +244,9 @@ class FeedSource(Source): FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag']) FrontPageGitCommit = namedtuple('FrontPageGitCommit', - ['hash', 'author', 'time', 'subject', 'repo']) + ['hash', 'author', 'email', 'time', 'subject', 'repo']) -class GitSource(Source): +class GitSource(CachedSource): """Represents a git repository. The main repository is checked for annotated tags, and an update is @@ -196,13 +269,20 @@ class GitSource(Source): Base URL to a gitweb installation, so commit ids can be linked to the commit proper. + ``bug_tracker`` + URL to a bug tracker; anything matching "#xxx" will be converted into a + link to this. Should contain a "{0}", which will be replaced by the + bug number. + ``tag_pattern`` Optional. A shell glob pattern used to filter the tags. """ template = '/front_page/git.mako' - def __init__(self, repo_paths, repo_names, gitweb, tag_pattern=None, **kwargs): + def __init__(self, repo_paths, repo_names, gitweb, bug_tracker=None, + tag_pattern=None, **kwargs): + kwargs.setdefault('title', None) super(GitSource, self).__init__(**kwargs) @@ -211,8 +291,12 @@ class GitSource(Source): self.repo_names = repo_names.split() self.gitweb = gitweb + self.bug_tracker = bug_tracker self.tag_pattern = tag_pattern + def _cache_key(self): + return self.repo_paths[0] + def _poll(self, limit, max_age): # Fetch the main repo's git tags git_dir = '--git-dir=' + self.repo_paths[0] @@ -261,16 +345,25 @@ class GitSource(Source): 'git', '--git-dir=' + repo_path, 'log', - '--pretty=%h%x00%an%x00%at%x00%s', + '--pretty=%h%x00%an%x00%aE%x00%at%x00%s', "{0}..{1}".format(since_tag, tag), ] proc = subprocess.Popen(git_log_args, stdout=PIPE) for line in proc.stdout: - hash, author, time, subject = line.strip().split('\x00') + hash, author, email, time, subject \ + = line.strip().decode('utf8').split('\x00') + + # Convert bug numbers in subject to URLs + if self.bug_tracker: + subject = helpers.literal( + re.sub(u'#(\d+)', self._linkify_bug_number, subject) + ) + commits.append( FrontPageGitCommit( hash = hash, author = author, + email = email, time = datetime.datetime.fromtimestamp(int(time)), subject = subject, repo = repo_name, @@ -286,3 +379,10 @@ class GitSource(Source): updates.append(update) return updates + + def _linkify_bug_number(self, match): + """Regex replace function for changing bug numbers into links.""" + n = match.group(1) + bug_url = self.bug_tracker.format(match.group(1)) + return helpers.literal( + u"""{1}""".format(bug_url, match.group(0)))