Feeds without summaries now have them auto-generated.

author Eevee <git@veekun.com>

Sun, 18 Jul 2010 21:10:24 +0000 (14:10 -0700)

committer Eevee <git@veekun.com>

Sun, 18 Jul 2010 21:10:24 +0000 (14:10 -0700)
author Eevee <git@veekun.com>
Sun, 18 Jul 2010 21:10:24 +0000 (14:10 -0700)
committer Eevee <git@veekun.com>
Sun, 18 Jul 2010 21:10:24 +0000 (14:10 -0700)
diff --git a/setup.py b/setup.py

index f1ef216..57b4f77 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -7,6 +7,7 @@ setup(
      install_requires = [
          'spline',
          'feedparser',
+        'lxml',
      ],
  
      include_package_data = True,
diff --git a/splinext/frontpage/__init__.py b/splinext/frontpage/__init__.py

index 45b1587..2154f6e 100644 (file)
--- a/splinext/frontpage/__init__.py
+++ b/splinext/frontpage/__init__.py
@@ -4,6 +4,7 @@ from pkg_resources import resource_filename
  import subprocess
  
  import feedparser
+import lxml.html
  
  from spline.lib import helpers
  from spline.lib.plugin import PluginBase, PluginLink, Priority
@@ -19,27 +20,67 @@ class FrontPageUpdate(object):
      pass
  
  
+RSS_SUMMARY_LENGTH = 1000
+
  FrontPageRSS = namedtuple('FrontPageRSS',
      ['time', 'entry', 'template', 'category', 'content', 'icon'])
  
-def rss_hook(limit, url, title, icon=None):
+def rss_hook(limit, url, title=None, icon=None):
      """Front page handler for news feeds."""
      feed = feedparser.parse(url)
  
+    if not title:
+        title = feed.feed.title
+
      updates = []
-    for entry in feed.entries:
+    for entry in feed.entries[:limit]:
+        # Grab a date -- Atom has published, RSS usually just has updated.
+        # Both come out as time tuples, which datetime.datetime() can read
+        try:
+            timestamp_tuple = entry.published_parsed
+        except AttributeError:
+            timestamp_tuple = entry.updated_parsed
+        timestamp = datetime.datetime(*timestamp_tuple[:6])
+
          # Try to find something to show!  Default to the summary, if there is
          # one, or try to generate one otherwise
          content = u''
          if 'summary' in entry:
+            # If there be a summary, cheerfully trust that it's actually a
+            # summary
              content = entry.summary
          elif 'content' in entry:
-            content = entry.content[0].value
+            # Full content is way too much, especially for my giant blog posts.
+            # Cut this down to some arbitrary number of characters, then feed
+            # it to lxml.html to fix tag nesting
+            broken_html = entry.content[0].value[:RSS_SUMMARY_LENGTH]
+            fragment = lxml.html.fromstring(broken_html)
+
+            # Insert an ellipsis at the end of the last node with text
+            last_text_node = None
+            last_tail_node = None
+            # Need to find the last node with a tail, OR the last node with
+            # text if it's later
+            for node in fragment.iter():
+                if node.tail:
+                    last_tail_node = node
+                    last_text_node = None
+                elif node.text:
+                    last_text_node = node
+                    last_tail_node = None
+
+            if last_text_node is not None:
+                last_text_node.text += '...'
+            if last_tail_node is not None:
+                last_tail_node.tail += '...'
+
+            # Serialize
+            content = lxml.html.tostring(fragment)
  
          content = helpers.literal(content)
  
          update = FrontPageRSS(
-            time = datetime.datetime(*entry.published_parsed[:6]),
+            time = timestamp,
              entry = entry,
              template = '/front_page/rss.mako',
              category = title,
author	Eevee <git@veekun.com>
	Sun, 18 Jul 2010 21:10:24 +0000 (14:10 -0700)
committer	Eevee <git@veekun.com>
	Sun, 18 Jul 2010 21:10:24 +0000 (14:10 -0700)
setup.py		patch \| blob \| history
splinext/frontpage/__init__.py		patch \| blob \| history