Crash fix: Stupid typo in RSS code.
[zzz-spline-frontpage.git] / splinext / frontpage / sources.py
index d082ada..069b4b6 100644 (file)
@@ -4,12 +4,16 @@ implementations.
 
 from collections import namedtuple
 import datetime
+import re
 import subprocess
 from subprocess import PIPE
+from urllib2 import URLError
 
 import feedparser
 import lxml.html
 
+from pylons import cache
+
 from spline.lib import helpers
 
 def max_age_to_datetime(max_age):
@@ -19,8 +23,10 @@ def max_age_to_datetime(max_age):
     if max_age == None:
         return None
 
-    seconds = int(max_age)
+    dt = datetime.datetime.now()
+    dt -= datetime.timedelta(seconds=int(max_age))
 
+    return dt
 
 
 class Source(object):
@@ -50,13 +56,16 @@ class Source(object):
     The template will be passed one parameter: the update object, ``update``.
     """
 
-    def __init__(self, title, icon, link, limit=None, max_age=None):
+    def __init__(self, config, title, icon, link, limit=None, max_age=None):
         self.title = title
         self.icon = icon
         self.link = link
         self.limit = int(limit)
         self.max_age = max_age_to_datetime(max_age)
 
+    def do_cron(self, *args, **kwargs):
+        return
+
     def poll(self, global_limit, global_max_age):
         """Public wrapper that takes care of reconciling global and source item
         limit and max age.
@@ -79,9 +88,55 @@ class Source(object):
         """
         raise NotImplementedError
 
+class CachedSource(Source):
+    """Supports caching a source's updates in memcache.
+
+    On the surface, this functions just like any other ``Source``.  Calling
+    ``poll`` still returns a list of updates.  However, ``poll`` does not call
+    your ``_poll``; instead, your implementation is called by the spline cron,
+    and the results are cached.  ``poll`` then returns the contents of the
+    cache.
+
+    You must define a ``_cache_key`` method that returns a key uniquely
+    identifying this object.  Your key will be combined with the class name, so
+    it only needs to be unique for that source, not globally.
+
+    You may also override ``poll_frequency``, the number of minutes between
+    pollings.  By default, this is a rather conservative 60.
+
+    Note that it may take up to a minute after server startup for updates
+    from a cached source to appear.
+    """
+
+    poll_frequency = 60
+
+    def cache_key(self):
+        return repr(type(self)) + ':' + self._cache_key()
+
+    def _cache_key(self):
+        raise NotImplementedError
+
+    def do_cron(self, tic, *args, **kwargs):
+        if tic % self.poll_frequency != 0:
+            # Too early!
+            return
+
+        updates = self._poll(self.limit, self.max_age)
+        cache.get_cache('spline-frontpage')[self.cache_key()] = updates
+
+        return
+
+    def poll(self, global_limit, global_max_age):
+        """Fetches cached updates."""
+        try:
+            return cache.get_cache('spline-frontpage')[self.cache_key()]
+        except KeyError:
+            # Haven't cached anything yet, apparently
+            return []
+
 
 FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
-class FeedSource(Source):
+class FeedSource(CachedSource):
     """Represents an RSS or Atom feed.
 
     Extra properties:
@@ -94,15 +149,25 @@ class FeedSource(Source):
 
     SUMMARY_LENGTH = 1000
 
+    poll_frequency = 15
+
     def __init__(self, feed_url, **kwargs):
         kwargs.setdefault('title', None)
         super(FeedSource, self).__init__(**kwargs)
 
         self.feed_url = feed_url
 
+    def _cache_key(self):
+        return self.feed_url
+
     def _poll(self, limit, max_age):
         feed = feedparser.parse(self.feed_url)
 
+        if feed.bozo and isinstance(feed.bozo_exception, URLError):
+            # Feed is DOWN.  Bail here; otherwise, old entries might be lost
+            # just because, say, Bulbanews is down yet again
+            raise feed.bozo_exception
+
         if not self.title:
             self.title = feed.feed.title
 
@@ -128,6 +193,11 @@ class FeedSource(Source):
                 # If there be a summary, cheerfully trust that it's actually a
                 # summary
                 content = entry.summary
+            elif 'content' in entry and \
+                len(entry.content[0].value) <= self.SUMMARY_LENGTH:
+
+                # Full content is short; use as-is!
+                content = entry.content[0].value
             elif 'content' in entry:
                 # Full content is way too much, especially for my giant blog posts.
                 # Cut this down to some arbitrary number of characters, then feed
@@ -171,9 +241,9 @@ class FeedSource(Source):
 
 FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
 FrontPageGitCommit = namedtuple('FrontPageGitCommit',
-    ['hash', 'author', 'time', 'subject', 'repo'])
+    ['hash', 'author', 'email', 'time', 'subject', 'repo'])
 
-class GitSource(Source):
+class GitSource(CachedSource):
     """Represents a git repository.
 
     The main repository is checked for annotated tags, and an update is
@@ -196,13 +266,20 @@ class GitSource(Source):
         Base URL to a gitweb installation, so commit ids can be linked to the
         commit proper.
 
+    ``bug_tracker``
+        URL to a bug tracker; anything matching "#xxx" will be converted into a
+        link to this.  Should contain a "{0}", which will be replaced by the
+        bug number.
+
     ``tag_pattern``
         Optional.  A shell glob pattern used to filter the tags.
     """
 
     template = '/front_page/git.mako'
 
-    def __init__(self, repo_paths, repo_names, gitweb, tag_pattern=None, **kwargs):
+    def __init__(self, repo_paths, repo_names, gitweb, bug_tracker=None,
+        tag_pattern=None, **kwargs):
+
         kwargs.setdefault('title', None)
         super(GitSource, self).__init__(**kwargs)
 
@@ -211,8 +288,12 @@ class GitSource(Source):
         self.repo_names = repo_names.split()
 
         self.gitweb = gitweb
+        self.bug_tracker = bug_tracker
         self.tag_pattern = tag_pattern
 
+    def _cache_key(self):
+        return self.repo_paths[0]
+
     def _poll(self, limit, max_age):
         # Fetch the main repo's git tags
         git_dir = '--git-dir=' + self.repo_paths[0]
@@ -261,16 +342,25 @@ class GitSource(Source):
                     'git',
                     '--git-dir=' + repo_path,
                     'log',
-                    '--pretty=%h%x00%an%x00%at%x00%s',
+                    '--pretty=%h%x00%an%x00%aE%x00%at%x00%s',
                     "{0}..{1}".format(since_tag, tag),
                 ]
                 proc = subprocess.Popen(git_log_args, stdout=PIPE)
                 for line in proc.stdout:
-                    hash, author, time, subject = line.strip().split('\x00')
+                    hash, author, email, time, subject \
+                        = line.strip().decode('utf8').split('\x00')
+
+                    # Convert bug numbers in subject to URLs
+                    if self.bug_tracker:
+                        subject = helpers.literal(
+                            re.sub(u'#(\d+)', self._linkify_bug_number, subject)
+                        )
+
                     commits.append(
                         FrontPageGitCommit(
                             hash = hash,
                             author = author,
+                            email = email,
                             time = datetime.datetime.fromtimestamp(int(time)),
                             subject = subject,
                             repo = repo_name,
@@ -286,3 +376,10 @@ class GitSource(Source):
             updates.append(update)
 
         return updates
+
+    def _linkify_bug_number(self, match):
+        """Regex replace function for changing bug numbers into links."""
+        n = match.group(1)
+        bug_url = self.bug_tracker.format(match.group(1))
+        return helpers.literal(
+            u"""<a href="{0}">{1}</a>""".format(bug_url, match.group(0)))