from collections import namedtuple
import datetime
+import re
import subprocess
from subprocess import PIPE
+from urllib2 import URLError
import feedparser
import lxml.html
+from pylons import cache
+
from spline.lib import helpers
+def max_age_to_datetime(max_age):
+ """``max_age`` is specified in config as a number of seconds old. This
+ function takes that number and returns a corresponding datetime object.
+ """
+ if max_age == None:
+ return None
+
+ dt = datetime.datetime.now()
+ dt -= datetime.timedelta(seconds=int(max_age))
+
+ return dt
+
class Source(object):
"""Represents a source to be polled for updates. Sources are populated
The template will be passed one parameter: the update object, ``update``.
"""
- def __init__(self, title, icon, link, limit=None, max_age=None):
+ def __init__(self, config, title, icon, link, limit=None, max_age=None):
self.title = title
self.icon = icon
self.link = link
- self.limit = limit
- self.max_age = max_age
+ self.limit = int(limit)
+ self.max_age = max_age_to_datetime(max_age)
+
+ def do_cron(self, *args, **kwargs):
+ return
+
+ def poll(self, global_limit, global_max_age):
+ """Public wrapper that takes care of reconciling global and source item
+ limit and max age.
+
+ Subclasses should implement ``_poll``, below.
+ """
+ # Smallest limit wins
+ limit = min(self.limit, global_limit)
- def poll(self):
- """Poll for updates. Must return an iterable. Each element should be
- an Update object.
+ # Latest max age wins. Note that either could be None, but that's
+ # fine, because None is less than everything else
+ max_age = max(self.max_age, global_max_age)
+
+ return self._poll(limit, max_age)
+
+ def _poll(self, limit, max_age):
+ """Implementation of polling for updates. Must return an iterable.
+ Each element should be an object with ``source`` and ``time``
+ properties. A namedtuple works well.
"""
raise NotImplementedError
+class CachedSource(Source):
+ """Supports caching a source's updates in memcache.
+
+ On the surface, this functions just like any other ``Source``. Calling
+ ``poll`` still returns a list of updates. However, ``poll`` does not call
+ your ``_poll``; instead, your implementation is called by the spline cron,
+ and the results are cached. ``poll`` then returns the contents of the
+ cache.
+
+ You must define a ``_cache_key`` method that returns a key uniquely
+ identifying this object. Your key will be combined with the class name, so
+ it only needs to be unique for that source, not globally.
+
+ You may also override ``poll_frequency``, the number of minutes between
+ pollings. By default, this is a rather conservative 60.
+
+ Note that it may take up to a minute after server startup for updates
+ from a cached source to appear.
+ """
+
+ poll_frequency = 60
+
+ def cache_key(self):
+ return repr(type(self)) + ':' + self._cache_key()
+
+ def _cache_key(self):
+ raise NotImplementedError
+
+ def do_cron(self, tic, *args, **kwargs):
+ if tic % self.poll_frequency != 0:
+ # Too early!
+ return
+
+ updates = self._poll(self.limit, self.max_age)
+ cache.get_cache('spline-frontpage')[self.cache_key()] = updates
+
+ return
+
+ def poll(self, global_limit, global_max_age):
+ """Fetches cached updates."""
+ try:
+ return cache.get_cache('spline-frontpage')[self.cache_key()]
+ except KeyError:
+ # Haven't cached anything yet, apparently
+ return []
+
FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
-class FeedSource(Source):
+class FeedSource(CachedSource):
"""Represents an RSS or Atom feed.
Extra properties:
SUMMARY_LENGTH = 1000
+ poll_frequency = 15
+
def __init__(self, feed_url, **kwargs):
kwargs.setdefault('title', None)
super(FeedSource, self).__init__(**kwargs)
self.feed_url = feed_url
- def poll(self, limit, max_age):
+ def _cache_key(self):
+ return self.feed_url
+
+ def _poll(self, limit, max_age):
feed = feedparser.parse(self.feed_url)
+ if feed.bozo and isinstance(feed.bozo_exception, URLError):
+ # Feed is DOWN. Bail here; otherwise, old entries might be lost
+ # just because, say, Bulbanews is down yet again
+ raise feed.bozo_exception
+
if not self.title:
self.title = feed.feed.title
# If there be a summary, cheerfully trust that it's actually a
# summary
content = entry.summary
+ elif 'content' in entry and \
+ len(entry.content[0].value) <= self.SUMMARY_LENGTH:
+
+ # Full content is short; use as-is!
+ content = entry.content[0].value
elif 'content' in entry:
# Full content is way too much, especially for my giant blog posts.
# Cut this down to some arbitrary number of characters, then feed
FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
FrontPageGitCommit = namedtuple('FrontPageGitCommit',
- ['hash', 'author', 'time', 'subject', 'repo'])
+ ['hash', 'author', 'email', 'time', 'subject', 'repo'])
-class GitSource(Source):
+class GitSource(CachedSource):
"""Represents a git repository.
The main repository is checked for annotated tags, and an update is
Base URL to a gitweb installation, so commit ids can be linked to the
commit proper.
+ ``bug_tracker``
+ URL to a bug tracker; anything matching "#xxx" will be converted into a
+ link to this. Should contain a "{0}", which will be replaced by the
+ bug number.
+
``tag_pattern``
Optional. A shell glob pattern used to filter the tags.
"""
template = '/front_page/git.mako'
- def __init__(self, repo_paths, repo_names, gitweb, tag_pattern=None, **kwargs):
+ def __init__(self, repo_paths, repo_names, gitweb, bug_tracker=None,
+ tag_pattern=None, **kwargs):
+
kwargs.setdefault('title', None)
super(GitSource, self).__init__(**kwargs)
self.repo_names = repo_names.split()
self.gitweb = gitweb
+ self.bug_tracker = bug_tracker
self.tag_pattern = tag_pattern
- def poll(self, limit, max_age):
+ def _cache_key(self):
+ return self.repo_paths[0]
+ def _poll(self, limit, max_age):
# Fetch the main repo's git tags
git_dir = '--git-dir=' + self.repo_paths[0]
args = [
'git',
'--git-dir=' + repo_path,
'log',
- '--pretty=%h%x00%an%x00%at%x00%s',
+ '--pretty=%h%x00%an%x00%aE%x00%at%x00%s',
"{0}..{1}".format(since_tag, tag),
]
proc = subprocess.Popen(git_log_args, stdout=PIPE)
for line in proc.stdout:
- hash, author, time, subject = line.strip().split('\x00')
+ hash, author, email, time, subject \
+ = line.strip().decode('utf8').split('\x00')
+
+ # Convert bug numbers in subject to URLs
+ if self.bug_tracker:
+ subject = helpers.literal(
+ re.sub(u'#(\d+)', self._linkify_bug_number, subject)
+ )
+
commits.append(
FrontPageGitCommit(
hash = hash,
author = author,
+ email = email,
time = datetime.datetime.fromtimestamp(int(time)),
subject = subject,
repo = repo_name,
updates.append(update)
return updates
+
+ def _linkify_bug_number(self, match):
+ """Regex replace function for changing bug numbers into links."""
+ n = match.group(1)
+ bug_url = self.bug_tracker.format(match.group(1))
+ return helpers.literal(
+ u"""<a href="{0}">{1}</a>""".format(bug_url, match.group(0)))