X-Git-Url: http://git.veekun.com/zzz-spline-frontpage.git/blobdiff_plain/69e763a9763c70b88ccfbbd2ab126212b5754f63..bde9c45f06b381b10a6fb44879c5a0bf31b1f9ac:/splinext/frontpage/__init__.py diff --git a/splinext/frontpage/__init__.py b/splinext/frontpage/__init__.py index e4caec2..9821f9c 100644 --- a/splinext/frontpage/__init__.py +++ b/splinext/frontpage/__init__.py @@ -1,196 +1,80 @@ -from collections import namedtuple -import datetime +from collections import defaultdict, namedtuple from pkg_resources import resource_filename +import re import subprocess -import feedparser -import lxml.html +from pylons import config from spline.lib import helpers from spline.lib.plugin import PluginBase, PluginLink, Priority +from spline.lib.plugin.load import run_hooks import splinext.frontpage.controllers.frontpage - -class FrontPageUpdate(object): - """Base class ('interface') for an updated thing that may appear on the - front page. - - Subclasses should implement the `time` and `template` properties. - """ - pass - - -RSS_SUMMARY_LENGTH = 1000 - -FrontPageRSS = namedtuple('FrontPageRSS', - ['time', 'entry', 'template', 'category', 'content', 'icon']) - -def rss_hook(limit, max_age, url, title=None, icon=None): - """Front page handler for news feeds.""" - feed = feedparser.parse(url) - - if not title: - title = feed.feed.title - - updates = [] - for entry in feed.entries[:limit]: - # Grab a date -- Atom has published, RSS usually just has updated. - # Both come out as time tuples, which datetime.datetime() can read - try: - timestamp_tuple = entry.published_parsed - except AttributeError: - timestamp_tuple = entry.updated_parsed - timestamp = datetime.datetime(*timestamp_tuple[:6]) - - if max_age and timestamp < max_age: - # Entries should be oldest-first, so we can bail after the first - # expired entry - break - - # Try to find something to show! Default to the summary, if there is - # one, or try to generate one otherwise - content = u'' - if 'summary' in entry: - # If there be a summary, cheerfully trust that it's actually a - # summary - content = entry.summary - elif 'content' in entry: - # Full content is way too much, especially for my giant blog posts. - # Cut this down to some arbitrary number of characters, then feed - # it to lxml.html to fix tag nesting - broken_html = entry.content[0].value[:RSS_SUMMARY_LENGTH] - fragment = lxml.html.fromstring(broken_html) - - # Insert an ellipsis at the end of the last node with text - last_text_node = None - last_tail_node = None - # Need to find the last node with a tail, OR the last node with - # text if it's later - for node in fragment.iter(): - if node.tail: - last_tail_node = node - last_text_node = None - elif node.text: - last_text_node = node - last_tail_node = None - - if last_text_node is not None: - last_text_node.text += '...' - if last_tail_node is not None: - last_tail_node.tail += '...' - - # Serialize - content = lxml.html.tostring(fragment) - - content = helpers.literal(content) - - update = FrontPageRSS( - time = timestamp, - entry = entry, - template = '/front_page/rss.mako', - category = title, - content = content, - icon = icon, - ) - updates.append(update) - - return updates - - -FrontPageGit = namedtuple('FrontPageGit', - ['time', 'gitweb', 'log', 'tag', 'template', 'category', 'icon']) -FrontPageGitCommit = namedtuple('FrontPageGitCommit', - ['hash', 'author', 'time', 'subject', 'repo']) - -def git_hook(limit, max_age, title, gitweb, repo_paths, repo_names, - tag_pattern=None, icon=None): - - """Front page handler for repository history.""" - # Repo stuff can be space-delimited lists... - repo_paths = repo_paths.split() - repo_names = repo_names.split() - - # Fetch the main repo's git tags - args = [ - 'git', - '--git-dir=' + repo_paths[0], - 'tag', '-l', - ] - if tag_pattern: - args.append(tag_pattern) - - proc = subprocess.Popen(args, stdout=subprocess.PIPE) - git_output, _ = proc.communicate() - tags = git_output.strip().split('\n') - - # Tags come out in alphabetical order, which means earliest first. Reverse - # it to make the slicing easier - tags.reverse() - # Only history from tag to tag is actually interesting, so get the most - # recent $limit tags but skip the earliest - interesting_tags = tags[:-1][:limit] - - updates = [] - for tag, since_tag in zip(interesting_tags, tags[1:]): - # Get the date when this tag was actually created - args = [ - 'git', - '--git-dir=' + repo_paths[0], - 'for-each-ref', - '--format=%(taggerdate:raw)', - 'refs/tags/' + tag, - ] - tag_timestamp, _ = subprocess.Popen(args, stdout=subprocess.PIPE) \ - .communicate() - tag_unixtime, tag_timezone = tag_timestamp.split(None, 1) - tagged_timestamp = datetime.datetime.fromtimestamp(int(tag_unixtime)) - - if max_age and tagged_timestamp < max_age: - break - - commits = [] - - for repo_path, repo_name in zip(repo_paths, repo_names): - # Grab an easily-parsed history: fields delimited by nulls. - # Hash, author's name, commit timestamp, subject. - git_log_args = [ - 'git', - '--git-dir=' + repo_path, - 'log', - '--pretty=%h%x00%an%x00%at%x00%s', - "{0}..{1}".format(since_tag, tag), - ] - proc = subprocess.Popen(git_log_args, stdout=subprocess.PIPE) - for line in proc.stdout: - hash, author, time, subject = line.strip().split('\x00') - commits.append( - FrontPageGitCommit( - hash = hash, - author = author, - time = datetime.datetime.fromtimestamp(int(time)), - subject = subject, - repo = repo_name, - ) - ) - - update = FrontPageGit( - time = tagged_timestamp, - gitweb = gitweb, - log = commits, - template = '/front_page/git.mako', - category = title, - tag = tag, - icon = icon, - ) - updates.append(update) - - return updates - +from splinext.frontpage.sources import FeedSource, GitSource def add_routes_hook(map, *args, **kwargs): """Hook to inject some of our behavior into the routes configuration.""" map.connect('/', controller='frontpage', action='index') +def load_sources_hook(config, *args, **kwargs): + """Hook to load all the known sources and stuff them in config. Run once, + on server startup. + + Frontpage hooks are also passed the `config` hash, as it's not available + during setup. + """ + # Extract source definitions from config and store as source_name => config + update_config = defaultdict(dict) + key_rx = re.compile( + '(?x) ^ spline-frontpage [.] sources [.] (\w+) (?: [.] (\w+) )? $') + for key, val in config.iteritems(): + # Match against spline-frontpage.source.(source).(key) + match = key_rx.match(key) + if not match: + continue + + source_name, subkey = match.groups() + if not subkey: + # This is the type declaration; use a special key + subkey = '__type__' + + update_config[source_name][subkey] = val + + # Figure out the global limit and expiration time, with reasonable + # defaults. Make sure they're integers. + global_limit = int(config.get('spline-frontpage.limit', 10)) + # max_age is optional and can be None + try: + global_max_age = int(config['spline-frontpage.max_age']) + except KeyError: + global_max_age = None + + config['spline-frontpage.limit'] = global_limit + config['spline-frontpage.max_age'] = global_max_age + + # Ask plugins to turn configuration into source objects + sources = [] + for source, source_config in update_config.iteritems(): + hook_name = 'frontpage_updates_' + source_config['__type__'] + del source_config['__type__'] # don't feed this to constructor! + + # Default to global limit and max age. Source takes care of making + # integers and whatnot + source_config.setdefault('limit', global_limit) + source_config.setdefault('max_age', global_max_age) + + # Hooks return a list of sources; combine with running list + sources += run_hooks(hook_name, config=config, **source_config) + + # Save the list of sources, and done + config['spline-frontpage.sources'] = sources + +def source_cron_hook(*args, **kwargs): + """Hook to pass on cron tics to all sources, should they need it for e.g. + caching. + """ + for source in config['spline-frontpage.sources']: + source.do_cron(*args, **kwargs) class FrontPagePlugin(PluginBase): def controllers(self): @@ -206,6 +90,8 @@ class FrontPagePlugin(PluginBase): def hooks(self): return [ ('routes_mapping', Priority.NORMAL, add_routes_hook), - ('frontpage_updates_rss', Priority.NORMAL, rss_hook), - ('frontpage_updates_git', Priority.NORMAL, git_hook), + ('after_setup', Priority.NORMAL, load_sources_hook), + ('cron', Priority.NORMAL, source_cron_hook), + ('frontpage_updates_rss', Priority.NORMAL, FeedSource), + ('frontpage_updates_git', Priority.NORMAL, GitSource), ]