Added support for max_age.
[zzz-spline-frontpage.git] / splinext / frontpage / __init__.py
index 45b1587..e4caec2 100644 (file)
@@ -4,6 +4,7 @@ from pkg_resources import resource_filename
 import subprocess
 
 import feedparser
 import subprocess
 
 import feedparser
+import lxml.html
 
 from spline.lib import helpers
 from spline.lib.plugin import PluginBase, PluginLink, Priority
 
 from spline.lib import helpers
 from spline.lib.plugin import PluginBase, PluginLink, Priority
@@ -19,27 +20,72 @@ class FrontPageUpdate(object):
     pass
 
 
     pass
 
 
+RSS_SUMMARY_LENGTH = 1000
+
 FrontPageRSS = namedtuple('FrontPageRSS',
     ['time', 'entry', 'template', 'category', 'content', 'icon'])
 
 FrontPageRSS = namedtuple('FrontPageRSS',
     ['time', 'entry', 'template', 'category', 'content', 'icon'])
 
-def rss_hook(limit, url, title, icon=None):
+def rss_hook(limit, max_age, url, title=None, icon=None):
     """Front page handler for news feeds."""
     feed = feedparser.parse(url)
 
     """Front page handler for news feeds."""
     feed = feedparser.parse(url)
 
+    if not title:
+        title = feed.feed.title
+
     updates = []
     updates = []
-    for entry in feed.entries:
+    for entry in feed.entries[:limit]:
+        # Grab a date -- Atom has published, RSS usually just has updated.
+        # Both come out as time tuples, which datetime.datetime() can read
+        try:
+            timestamp_tuple = entry.published_parsed
+        except AttributeError:
+            timestamp_tuple = entry.updated_parsed
+        timestamp = datetime.datetime(*timestamp_tuple[:6])
+
+        if max_age and timestamp < max_age:
+            # Entries should be oldest-first, so we can bail after the first
+            # expired entry
+            break
+
         # Try to find something to show!  Default to the summary, if there is
         # one, or try to generate one otherwise
         content = u''
         if 'summary' in entry:
         # Try to find something to show!  Default to the summary, if there is
         # one, or try to generate one otherwise
         content = u''
         if 'summary' in entry:
+            # If there be a summary, cheerfully trust that it's actually a
+            # summary
             content = entry.summary
         elif 'content' in entry:
             content = entry.summary
         elif 'content' in entry:
-            content = entry.content[0].value
+            # Full content is way too much, especially for my giant blog posts.
+            # Cut this down to some arbitrary number of characters, then feed
+            # it to lxml.html to fix tag nesting
+            broken_html = entry.content[0].value[:RSS_SUMMARY_LENGTH]
+            fragment = lxml.html.fromstring(broken_html)
+
+            # Insert an ellipsis at the end of the last node with text
+            last_text_node = None
+            last_tail_node = None
+            # Need to find the last node with a tail, OR the last node with
+            # text if it's later
+            for node in fragment.iter():
+                if node.tail:
+                    last_tail_node = node
+                    last_text_node = None
+                elif node.text:
+                    last_text_node = node
+                    last_tail_node = None
+
+            if last_text_node is not None:
+                last_text_node.text += '...'
+            if last_tail_node is not None:
+                last_tail_node.tail += '...'
+
+            # Serialize
+            content = lxml.html.tostring(fragment)
 
         content = helpers.literal(content)
 
         update = FrontPageRSS(
 
         content = helpers.literal(content)
 
         update = FrontPageRSS(
-            time = datetime.datetime(*entry.published_parsed[:6]),
+            time = timestamp,
             entry = entry,
             template = '/front_page/rss.mako',
             category = title,
             entry = entry,
             template = '/front_page/rss.mako',
             category = title,
@@ -56,7 +102,7 @@ FrontPageGit = namedtuple('FrontPageGit',
 FrontPageGitCommit = namedtuple('FrontPageGitCommit',
     ['hash', 'author', 'time', 'subject', 'repo'])
 
 FrontPageGitCommit = namedtuple('FrontPageGitCommit',
     ['hash', 'author', 'time', 'subject', 'repo'])
 
-def git_hook(limit, title, gitweb, repo_paths, repo_names,
+def git_hook(limit, max_age, title, gitweb, repo_paths, repo_names,
     tag_pattern=None, icon=None):
 
     """Front page handler for repository history."""
     tag_pattern=None, icon=None):
 
     """Front page handler for repository history."""
@@ -86,6 +132,22 @@ def git_hook(limit, title, gitweb, repo_paths, repo_names,
 
     updates = []
     for tag, since_tag in zip(interesting_tags, tags[1:]):
 
     updates = []
     for tag, since_tag in zip(interesting_tags, tags[1:]):
+        # Get the date when this tag was actually created
+        args = [
+            'git',
+            '--git-dir=' + repo_paths[0],
+            'for-each-ref',
+            '--format=%(taggerdate:raw)',
+            'refs/tags/' + tag,
+        ]
+        tag_timestamp, _ = subprocess.Popen(args, stdout=subprocess.PIPE) \
+            .communicate()
+        tag_unixtime, tag_timezone = tag_timestamp.split(None, 1)
+        tagged_timestamp = datetime.datetime.fromtimestamp(int(tag_unixtime))
+
+        if max_age and tagged_timestamp < max_age:
+            break
+
         commits = []
 
         for repo_path, repo_name in zip(repo_paths, repo_names):
         commits = []
 
         for repo_path, repo_name in zip(repo_paths, repo_names):
@@ -111,19 +173,8 @@ def git_hook(limit, title, gitweb, repo_paths, repo_names,
                     )
                 )
 
                     )
                 )
 
-        # LASTLY, get the date when this tag was actually created
-        args = [
-            'git',
-            'for-each-ref',
-            '--format=%(taggerdate:raw)',
-            'refs/tags/' + tag,
-        ]
-        tag_timestamp, _ = subprocess.Popen(args, stdout=subprocess.PIPE) \
-            .communicate()
-        tag_unixtime, tag_timezone = tag_timestamp.split(None, 1)
-
         update = FrontPageGit(
         update = FrontPageGit(
-            time = datetime.datetime.fromtimestamp(int(tag_unixtime)),
+            time = tagged_timestamp,
             gitweb = gitweb,
             log = commits,
             template = '/front_page/git.mako',
             gitweb = gitweb,
             log = commits,
             template = '/front_page/git.mako',