-from collections import namedtuple
-import datetime
+from collections import defaultdict, namedtuple
from pkg_resources import resource_filename
+import re
import subprocess
-import feedparser
-import lxml.html
+from pylons import config
from spline.lib import helpers
from spline.lib.plugin import PluginBase, PluginLink, Priority
+from spline.lib.plugin.load import run_hooks
import splinext.frontpage.controllers.frontpage
-
-class FrontPageUpdate(object):
- """Base class ('interface') for an updated thing that may appear on the
- front page.
-
- Subclasses should implement the `time` and `template` properties.
- """
- pass
-
-
-RSS_SUMMARY_LENGTH = 1000
-
-FrontPageRSS = namedtuple('FrontPageRSS',
- ['time', 'entry', 'template', 'category', 'content', 'icon'])
-
-def rss_hook(limit, max_age, url, title=None, icon=None):
- """Front page handler for news feeds."""
- feed = feedparser.parse(url)
-
- if not title:
- title = feed.feed.title
-
- updates = []
- for entry in feed.entries[:limit]:
- # Grab a date -- Atom has published, RSS usually just has updated.
- # Both come out as time tuples, which datetime.datetime() can read
- try:
- timestamp_tuple = entry.published_parsed
- except AttributeError:
- timestamp_tuple = entry.updated_parsed
- timestamp = datetime.datetime(*timestamp_tuple[:6])
-
- if max_age and timestamp < max_age:
- # Entries should be oldest-first, so we can bail after the first
- # expired entry
- break
-
- # Try to find something to show! Default to the summary, if there is
- # one, or try to generate one otherwise
- content = u''
- if 'summary' in entry:
- # If there be a summary, cheerfully trust that it's actually a
- # summary
- content = entry.summary
- elif 'content' in entry:
- # Full content is way too much, especially for my giant blog posts.
- # Cut this down to some arbitrary number of characters, then feed
- # it to lxml.html to fix tag nesting
- broken_html = entry.content[0].value[:RSS_SUMMARY_LENGTH]
- fragment = lxml.html.fromstring(broken_html)
-
- # Insert an ellipsis at the end of the last node with text
- last_text_node = None
- last_tail_node = None
- # Need to find the last node with a tail, OR the last node with
- # text if it's later
- for node in fragment.iter():
- if node.tail:
- last_tail_node = node
- last_text_node = None
- elif node.text:
- last_text_node = node
- last_tail_node = None
-
- if last_text_node is not None:
- last_text_node.text += '...'
- if last_tail_node is not None:
- last_tail_node.tail += '...'
-
- # Serialize
- content = lxml.html.tostring(fragment)
-
- content = helpers.literal(content)
-
- update = FrontPageRSS(
- time = timestamp,
- entry = entry,
- template = '/front_page/rss.mako',
- category = title,
- content = content,
- icon = icon,
- )
- updates.append(update)
-
- return updates
-
-
-FrontPageGit = namedtuple('FrontPageGit',
- ['time', 'gitweb', 'log', 'tag', 'template', 'category', 'icon'])
-FrontPageGitCommit = namedtuple('FrontPageGitCommit',
- ['hash', 'author', 'time', 'subject', 'repo'])
-
-def git_hook(limit, max_age, title, gitweb, repo_paths, repo_names,
- tag_pattern=None, icon=None):
-
- """Front page handler for repository history."""
- # Repo stuff can be space-delimited lists...
- repo_paths = repo_paths.split()
- repo_names = repo_names.split()
-
- # Fetch the main repo's git tags
- args = [
- 'git',
- '--git-dir=' + repo_paths[0],
- 'tag', '-l',
- ]
- if tag_pattern:
- args.append(tag_pattern)
-
- proc = subprocess.Popen(args, stdout=subprocess.PIPE)
- git_output, _ = proc.communicate()
- tags = git_output.strip().split('\n')
-
- # Tags come out in alphabetical order, which means earliest first. Reverse
- # it to make the slicing easier
- tags.reverse()
- # Only history from tag to tag is actually interesting, so get the most
- # recent $limit tags but skip the earliest
- interesting_tags = tags[:-1][:limit]
-
- updates = []
- for tag, since_tag in zip(interesting_tags, tags[1:]):
- # Get the date when this tag was actually created
- args = [
- 'git',
- '--git-dir=' + repo_paths[0],
- 'for-each-ref',
- '--format=%(taggerdate:raw)',
- 'refs/tags/' + tag,
- ]
- tag_timestamp, _ = subprocess.Popen(args, stdout=subprocess.PIPE) \
- .communicate()
- tag_unixtime, tag_timezone = tag_timestamp.split(None, 1)
- tagged_timestamp = datetime.datetime.fromtimestamp(int(tag_unixtime))
-
- if max_age and tagged_timestamp < max_age:
- break
-
- commits = []
-
- for repo_path, repo_name in zip(repo_paths, repo_names):
- # Grab an easily-parsed history: fields delimited by nulls.
- # Hash, author's name, commit timestamp, subject.
- git_log_args = [
- 'git',
- '--git-dir=' + repo_path,
- 'log',
- '--pretty=%h%x00%an%x00%at%x00%s',
- "{0}..{1}".format(since_tag, tag),
- ]
- proc = subprocess.Popen(git_log_args, stdout=subprocess.PIPE)
- for line in proc.stdout:
- hash, author, time, subject = line.strip().split('\x00')
- commits.append(
- FrontPageGitCommit(
- hash = hash,
- author = author,
- time = datetime.datetime.fromtimestamp(int(time)),
- subject = subject,
- repo = repo_name,
- )
- )
-
- update = FrontPageGit(
- time = tagged_timestamp,
- gitweb = gitweb,
- log = commits,
- template = '/front_page/git.mako',
- category = title,
- tag = tag,
- icon = icon,
- )
- updates.append(update)
-
- return updates
-
+from splinext.frontpage.sources import FeedSource, GitSource
def add_routes_hook(map, *args, **kwargs):
"""Hook to inject some of our behavior into the routes configuration."""
map.connect('/', controller='frontpage', action='index')
+def load_sources_hook(*args, **kwargs):
+ """Hook to load all the known sources and stuff them in config. Run once,
+ on server startup.
+ """
+ # Extract source definitions from config and store as source_name => config
+ update_config = defaultdict(dict)
+ key_rx = re.compile(
+ '(?x) ^ spline-frontpage [.] sources [.] (\w+) (?: [.] (\w+) )? $')
+ for key, val in config.iteritems():
+ # Match against spline-frontpage.source.(source).(key)
+ match = key_rx.match(key)
+ if not match:
+ continue
+
+ source_name, subkey = match.groups()
+ if not subkey:
+ # This is the type declaration; use a special key
+ subkey = '__type__'
+
+ update_config[source_name][subkey] = val
+
+ # Figure out the global limit and expiration time, with reasonable
+ # defaults. Make sure they're integers.
+ global_limit = int(config.get('spline-frontpage.limit', 10))
+ # max_age is optional and can be None
+ try:
+ global_max_age = int(config['spline-frontpage.max_age'])
+ except KeyError:
+ global_max_age = None
+
+ config['spline-frontpage.limit'] = global_limit
+ config['spline-frontpage.max_age'] = global_max_age
+
+ # Ask plugins to turn configuration into source objects
+ sources = []
+ for source, source_config in update_config.iteritems():
+ hook_name = 'frontpage_updates_' + source_config['__type__']
+ del source_config['__type__'] # don't feed this to constructor!
+
+ # Default to global limit and max age. Source takes care of making
+ # integers and whatnot
+ source_config.setdefault('limit', global_limit)
+ source_config.setdefault('max_age', global_max_age)
+
+ # Hooks return a list of sources; combine with running list
+ sources += run_hooks(hook_name, **source_config)
+
+ # Save the list of sources, and done
+ config['spline-frontpage.sources'] = sources
+
+def source_cron_hook(*args, **kwargs):
+ """Hook to pass on cron tics to all sources, should they need it for e.g.
+ caching.
+ """
+ for source in config['spline-frontpage.sources']:
+ source.do_cron(*args, **kwargs)
class FrontPagePlugin(PluginBase):
def controllers(self):
def hooks(self):
return [
('routes_mapping', Priority.NORMAL, add_routes_hook),
- ('frontpage_updates_rss', Priority.NORMAL, rss_hook),
- ('frontpage_updates_git', Priority.NORMAL, git_hook),
+ ('after_setup', Priority.NORMAL, load_sources_hook),
+ ('cron', Priority.NORMAL, source_cron_hook),
+ ('frontpage_updates_rss', Priority.NORMAL, FeedSource),
+ ('frontpage_updates_git', Priority.NORMAL, GitSource),
]