Support stuff-other-than-updates.

[zzz-spline-frontpage.git] / splinext / frontpage / sources.py
diff --git a/splinext/frontpage/sources.py b/splinext/frontpage/sources.py

index 1afc484..069b4b6 100644 (file)
--- a/splinext/frontpage/sources.py
+++ b/splinext/frontpage/sources.py
@@ -4,14 +4,30 @@ implementations.
  
  from collections import namedtuple
  import datetime
+import re
  import subprocess
  from subprocess import PIPE
+from urllib2 import URLError
  
  import feedparser
  import lxml.html
  
+from pylons import cache
+
  from spline.lib import helpers
  
+def max_age_to_datetime(max_age):
+    """``max_age`` is specified in config as a number of seconds old.  This
+    function takes that number and returns a corresponding datetime object.
+    """
+    if max_age == None:
+        return None
+
+    dt = datetime.datetime.now()
+    dt -= datetime.timedelta(seconds=int(max_age))
+
+    return dt
+
  
  class Source(object):
      """Represents a source to be polled for updates.  Sources are populated
@@ -40,22 +56,87 @@ class Source(object):
      The template will be passed one parameter: the update object, ``update``.
      """
  
-    def __init__(self, title, icon, link, limit=None, max_age=None):
+    def __init__(self, config, title, icon, link, limit=None, max_age=None):
          self.title = title
          self.icon = icon
          self.link = link
-        self.limit = limit
-        self.max_age = max_age
+        self.limit = int(limit)
+        self.max_age = max_age_to_datetime(max_age)
+
+    def do_cron(self, *args, **kwargs):
+        return
+
+    def poll(self, global_limit, global_max_age):
+        """Public wrapper that takes care of reconciling global and source item
+        limit and max age.
+
+        Subclasses should implement ``_poll``, below.
+        """
+        # Smallest limit wins
+        limit = min(self.limit, global_limit)
  
-    def poll(self):
-        """Poll for updates.  Must return an iterable.  Each element should be
-        an Update object.
+        # Latest max age wins.  Note that either could be None, but that's
+        # fine, because None is less than everything else
+        max_age = max(self.max_age, global_max_age)
+
+        return self._poll(limit, max_age)
+
+    def _poll(self, limit, max_age):
+        """Implementation of polling for updates.  Must return an iterable.
+        Each element should be an object with ``source`` and ``time``
+        properties.  A namedtuple works well.
          """
          raise NotImplementedError
  
+class CachedSource(Source):
+    """Supports caching a source's updates in memcache.
+
+    On the surface, this functions just like any other ``Source``.  Calling
+    ``poll`` still returns a list of updates.  However, ``poll`` does not call
+    your ``_poll``; instead, your implementation is called by the spline cron,
+    and the results are cached.  ``poll`` then returns the contents of the
+    cache.
+
+    You must define a ``_cache_key`` method that returns a key uniquely
+    identifying this object.  Your key will be combined with the class name, so
+    it only needs to be unique for that source, not globally.
+
+    You may also override ``poll_frequency``, the number of minutes between
+    pollings.  By default, this is a rather conservative 60.
+
+    Note that it may take up to a minute after server startup for updates
+    from a cached source to appear.
+    """
+
+    poll_frequency = 60
+
+    def cache_key(self):
+        return repr(type(self)) + ':' + self._cache_key()
+
+    def _cache_key(self):
+        raise NotImplementedError
+
+    def do_cron(self, tic, *args, **kwargs):
+        if tic % self.poll_frequency != 0:
+            # Too early!
+            return
+
+        updates = self._poll(self.limit, self.max_age)
+        cache.get_cache('spline-frontpage')[self.cache_key()] = updates
+
+        return
+
+    def poll(self, global_limit, global_max_age):
+        """Fetches cached updates."""
+        try:
+            return cache.get_cache('spline-frontpage')[self.cache_key()]
+        except KeyError:
+            # Haven't cached anything yet, apparently
+            return []
+
  
  FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
-class FeedSource(Source):
+class FeedSource(CachedSource):
      """Represents an RSS or Atom feed.
  
      Extra properties:
@@ -68,15 +149,25 @@ class FeedSource(Source):
  
      SUMMARY_LENGTH = 1000
  
+    poll_frequency = 15
+
      def __init__(self, feed_url, **kwargs):
          kwargs.setdefault('title', None)
          super(FeedSource, self).__init__(**kwargs)
  
          self.feed_url = feed_url
  
-    def poll(self, limit, max_age):
+    def _cache_key(self):
+        return self.feed_url
+
+    def _poll(self, limit, max_age):
          feed = feedparser.parse(self.feed_url)
  
+        if feed.bozo and isinstance(feed.bozo_exception, URLError):
+            # Feed is DOWN.  Bail here; otherwise, old entries might be lost
+            # just because, say, Bulbanews is down yet again
+            raise feed.bozo_exception
+
          if not self.title:
              self.title = feed.feed.title
  
@@ -102,6 +193,11 @@ class FeedSource(Source):
                  # If there be a summary, cheerfully trust that it's actually a
                  # summary
                  content = entry.summary
+            elif 'content' in entry and \
+                len(entry.content[0].value) <= self.SUMMARY_LENGTH:
+
+                # Full content is short; use as-is!
+                content = entry.content[0].value
              elif 'content' in entry:
                  # Full content is way too much, especially for my giant blog posts.
                  # Cut this down to some arbitrary number of characters, then feed
@@ -145,9 +241,9 @@ class FeedSource(Source):
  
  FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
  FrontPageGitCommit = namedtuple('FrontPageGitCommit',
-    ['hash', 'author', 'time', 'subject', 'repo'])
+    ['hash', 'author', 'email', 'time', 'subject', 'repo'])
  
-class GitSource(Source):
+class GitSource(CachedSource):
      """Represents a git repository.
  
      The main repository is checked for annotated tags, and an update is
@@ -170,13 +266,20 @@ class GitSource(Source):
          Base URL to a gitweb installation, so commit ids can be linked to the
          commit proper.
  
+    ``bug_tracker``
+        URL to a bug tracker; anything matching "#xxx" will be converted into a
+        link to this.  Should contain a "{0}", which will be replaced by the
+        bug number.
+
      ``tag_pattern``
          Optional.  A shell glob pattern used to filter the tags.
      """
  
      template = '/front_page/git.mako'
  
-    def __init__(self, repo_paths, repo_names, gitweb, tag_pattern=None, **kwargs):
+    def __init__(self, repo_paths, repo_names, gitweb, bug_tracker=None,
+        tag_pattern=None, **kwargs):
+
          kwargs.setdefault('title', None)
          super(GitSource, self).__init__(**kwargs)
  
@@ -185,10 +288,13 @@ class GitSource(Source):
          self.repo_names = repo_names.split()
  
          self.gitweb = gitweb
+        self.bug_tracker = bug_tracker
          self.tag_pattern = tag_pattern
  
-    def poll(self, limit, max_age):
+    def _cache_key(self):
+        return self.repo_paths[0]
  
+    def _poll(self, limit, max_age):
          # Fetch the main repo's git tags
          git_dir = '--git-dir=' + self.repo_paths[0]
          args = [
@@ -236,16 +342,25 @@ class GitSource(Source):
                      'git',
                      '--git-dir=' + repo_path,
                      'log',
-                    '--pretty=%h%x00%an%x00%at%x00%s',
+                    '--pretty=%h%x00%an%x00%aE%x00%at%x00%s',
                      "{0}..{1}".format(since_tag, tag),
                  ]
                  proc = subprocess.Popen(git_log_args, stdout=PIPE)
                  for line in proc.stdout:
-                    hash, author, time, subject = line.strip().split('\x00')
+                    hash, author, email, time, subject \
+                        = line.strip().decode('utf8').split('\x00')
+
+                    # Convert bug numbers in subject to URLs
+                    if self.bug_tracker:
+                        subject = helpers.literal(
+                            re.sub(u'#(\d+)', self._linkify_bug_number, subject)
+                        )
+
                      commits.append(
                          FrontPageGitCommit(
                              hash = hash,
                              author = author,
+                            email = email,
                              time = datetime.datetime.fromtimestamp(int(time)),
                              subject = subject,
                              repo = repo_name,
@@ -261,3 +376,10 @@ class GitSource(Source):
              updates.append(update)
  
          return updates
+
+    def _linkify_bug_number(self, match):
+        """Regex replace function for changing bug numbers into links."""
+        n = match.group(1)
+        bug_url = self.bug_tracker.format(match.group(1))
+        return helpers.literal(
+            u"""<a href="{0}">{1}</a>""".format(bug_url, match.group(0)))