From: Eevee Date: Sun, 25 Jul 2010 00:42:09 +0000 (-0700) Subject: Big ol refactor: make sources into first-class objects. X-Git-Tag: veekun-promotions/2010080801~4 X-Git-Url: http://git.veekun.com/zzz-spline-frontpage.git/commitdiff_plain/9be4bb2b3da075385c748f33488d316ebcbdd9ff?hp=69e763a9763c70b88ccfbbd2ab126212b5754f63 Big ol refactor: make sources into first-class objects. --- diff --git a/splinext/frontpage/__init__.py b/splinext/frontpage/__init__.py index e4caec2..ea80f05 100644 --- a/splinext/frontpage/__init__.py +++ b/splinext/frontpage/__init__.py @@ -3,189 +3,11 @@ import datetime from pkg_resources import resource_filename import subprocess -import feedparser -import lxml.html - from spline.lib import helpers from spline.lib.plugin import PluginBase, PluginLink, Priority import splinext.frontpage.controllers.frontpage - -class FrontPageUpdate(object): - """Base class ('interface') for an updated thing that may appear on the - front page. - - Subclasses should implement the `time` and `template` properties. - """ - pass - - -RSS_SUMMARY_LENGTH = 1000 - -FrontPageRSS = namedtuple('FrontPageRSS', - ['time', 'entry', 'template', 'category', 'content', 'icon']) - -def rss_hook(limit, max_age, url, title=None, icon=None): - """Front page handler for news feeds.""" - feed = feedparser.parse(url) - - if not title: - title = feed.feed.title - - updates = [] - for entry in feed.entries[:limit]: - # Grab a date -- Atom has published, RSS usually just has updated. - # Both come out as time tuples, which datetime.datetime() can read - try: - timestamp_tuple = entry.published_parsed - except AttributeError: - timestamp_tuple = entry.updated_parsed - timestamp = datetime.datetime(*timestamp_tuple[:6]) - - if max_age and timestamp < max_age: - # Entries should be oldest-first, so we can bail after the first - # expired entry - break - - # Try to find something to show! Default to the summary, if there is - # one, or try to generate one otherwise - content = u'' - if 'summary' in entry: - # If there be a summary, cheerfully trust that it's actually a - # summary - content = entry.summary - elif 'content' in entry: - # Full content is way too much, especially for my giant blog posts. - # Cut this down to some arbitrary number of characters, then feed - # it to lxml.html to fix tag nesting - broken_html = entry.content[0].value[:RSS_SUMMARY_LENGTH] - fragment = lxml.html.fromstring(broken_html) - - # Insert an ellipsis at the end of the last node with text - last_text_node = None - last_tail_node = None - # Need to find the last node with a tail, OR the last node with - # text if it's later - for node in fragment.iter(): - if node.tail: - last_tail_node = node - last_text_node = None - elif node.text: - last_text_node = node - last_tail_node = None - - if last_text_node is not None: - last_text_node.text += '...' - if last_tail_node is not None: - last_tail_node.tail += '...' - - # Serialize - content = lxml.html.tostring(fragment) - - content = helpers.literal(content) - - update = FrontPageRSS( - time = timestamp, - entry = entry, - template = '/front_page/rss.mako', - category = title, - content = content, - icon = icon, - ) - updates.append(update) - - return updates - - -FrontPageGit = namedtuple('FrontPageGit', - ['time', 'gitweb', 'log', 'tag', 'template', 'category', 'icon']) -FrontPageGitCommit = namedtuple('FrontPageGitCommit', - ['hash', 'author', 'time', 'subject', 'repo']) - -def git_hook(limit, max_age, title, gitweb, repo_paths, repo_names, - tag_pattern=None, icon=None): - - """Front page handler for repository history.""" - # Repo stuff can be space-delimited lists... - repo_paths = repo_paths.split() - repo_names = repo_names.split() - - # Fetch the main repo's git tags - args = [ - 'git', - '--git-dir=' + repo_paths[0], - 'tag', '-l', - ] - if tag_pattern: - args.append(tag_pattern) - - proc = subprocess.Popen(args, stdout=subprocess.PIPE) - git_output, _ = proc.communicate() - tags = git_output.strip().split('\n') - - # Tags come out in alphabetical order, which means earliest first. Reverse - # it to make the slicing easier - tags.reverse() - # Only history from tag to tag is actually interesting, so get the most - # recent $limit tags but skip the earliest - interesting_tags = tags[:-1][:limit] - - updates = [] - for tag, since_tag in zip(interesting_tags, tags[1:]): - # Get the date when this tag was actually created - args = [ - 'git', - '--git-dir=' + repo_paths[0], - 'for-each-ref', - '--format=%(taggerdate:raw)', - 'refs/tags/' + tag, - ] - tag_timestamp, _ = subprocess.Popen(args, stdout=subprocess.PIPE) \ - .communicate() - tag_unixtime, tag_timezone = tag_timestamp.split(None, 1) - tagged_timestamp = datetime.datetime.fromtimestamp(int(tag_unixtime)) - - if max_age and tagged_timestamp < max_age: - break - - commits = [] - - for repo_path, repo_name in zip(repo_paths, repo_names): - # Grab an easily-parsed history: fields delimited by nulls. - # Hash, author's name, commit timestamp, subject. - git_log_args = [ - 'git', - '--git-dir=' + repo_path, - 'log', - '--pretty=%h%x00%an%x00%at%x00%s', - "{0}..{1}".format(since_tag, tag), - ] - proc = subprocess.Popen(git_log_args, stdout=subprocess.PIPE) - for line in proc.stdout: - hash, author, time, subject = line.strip().split('\x00') - commits.append( - FrontPageGitCommit( - hash = hash, - author = author, - time = datetime.datetime.fromtimestamp(int(time)), - subject = subject, - repo = repo_name, - ) - ) - - update = FrontPageGit( - time = tagged_timestamp, - gitweb = gitweb, - log = commits, - template = '/front_page/git.mako', - category = title, - tag = tag, - icon = icon, - ) - updates.append(update) - - return updates - +from splinext.frontpage.sources import FeedSource, GitSource def add_routes_hook(map, *args, **kwargs): """Hook to inject some of our behavior into the routes configuration.""" @@ -206,6 +28,6 @@ class FrontPagePlugin(PluginBase): def hooks(self): return [ ('routes_mapping', Priority.NORMAL, add_routes_hook), - ('frontpage_updates_rss', Priority.NORMAL, rss_hook), - ('frontpage_updates_git', Priority.NORMAL, git_hook), + ('frontpage_updates_rss', Priority.NORMAL, FeedSource), + ('frontpage_updates_git', Priority.NORMAL, GitSource), ] diff --git a/splinext/frontpage/controllers/frontpage.py b/splinext/frontpage/controllers/frontpage.py index 5aba8c5..04386fd 100644 --- a/splinext/frontpage/controllers/frontpage.py +++ b/splinext/frontpage/controllers/frontpage.py @@ -100,10 +100,10 @@ class FrontPageController(BaseController): else: merged_config['max_age'] = global_max_age or local_max_age - # Hooks should return a list of FrontPageUpdate-like objects, - # making this return value a list of lists + # XXX bleh updates_lol = run_hooks(hook_name, **merged_config) - updates += sum(updates_lol, []) + source_obj = updates_lol[0] + updates += source_obj.poll(merged_config['limit'], merged_config['max_age']) # Little optimization: maximum age effectively becomes the age of # the oldest thing that would still appear on the page, as anything @@ -112,7 +112,7 @@ class FrontPageController(BaseController): updates.sort(key=lambda obj: obj.time, reverse=True) updates = updates[:global_limit] - if updates: + if updates and len(updates) == global_limit: global_max_age = updates[-1].time c.updates = updates diff --git a/splinext/frontpage/sources.py b/splinext/frontpage/sources.py new file mode 100644 index 0000000..1afc484 --- /dev/null +++ b/splinext/frontpage/sources.py @@ -0,0 +1,263 @@ +"""Base class for a front page source, as well as a handful of specific +implementations. +""" + +from collections import namedtuple +import datetime +import subprocess +from subprocess import PIPE + +import feedparser +import lxml.html + +from spline.lib import helpers + + +class Source(object): + """Represents a source to be polled for updates. Sources are populated + directly from the configuration file. + + Properties: + + ``title`` + A name to identify this specific source. + + ``icon`` + Name of a Fugue icon to show next to the name. + + ``link`` + A URL where the full history of this source can be found. + + ``limit`` + The maximum number of items from this source to show at a time. + Optional. + + ``max_age`` + Items older than this age (in seconds) will be excluded. Optional. + + Additionally, subclasses **must** define a ``template`` property -- a path + to a Mako template that knows how to render an update from this source. + The template will be passed one parameter: the update object, ``update``. + """ + + def __init__(self, title, icon, link, limit=None, max_age=None): + self.title = title + self.icon = icon + self.link = link + self.limit = limit + self.max_age = max_age + + def poll(self): + """Poll for updates. Must return an iterable. Each element should be + an Update object. + """ + raise NotImplementedError + + +FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content']) +class FeedSource(Source): + """Represents an RSS or Atom feed. + + Extra properties: + + ``feed_url`` + URL for the feed. + """ + + template = '/front_page/rss.mako' + + SUMMARY_LENGTH = 1000 + + def __init__(self, feed_url, **kwargs): + kwargs.setdefault('title', None) + super(FeedSource, self).__init__(**kwargs) + + self.feed_url = feed_url + + def poll(self, limit, max_age): + feed = feedparser.parse(self.feed_url) + + if not self.title: + self.title = feed.feed.title + + updates = [] + for entry in feed.entries[:limit]: + # Grab a date -- Atom has published, RSS usually just has updated. + # Both come out as time tuples, which datetime.datetime() can read + try: + timestamp_tuple = entry.published_parsed + except AttributeError: + timestamp_tuple = entry.updated_parsed + timestamp = datetime.datetime(*timestamp_tuple[:6]) + + if max_age and timestamp < max_age: + # Entries should be oldest-first, so we can bail after the first + # expired entry + break + + # Try to find something to show! Default to the summary, if there is + # one, or try to generate one otherwise + content = u'' + if 'summary' in entry: + # If there be a summary, cheerfully trust that it's actually a + # summary + content = entry.summary + elif 'content' in entry: + # Full content is way too much, especially for my giant blog posts. + # Cut this down to some arbitrary number of characters, then feed + # it to lxml.html to fix tag nesting + broken_html = entry.content[0].value[:self.SUMMARY_LENGTH] + fragment = lxml.html.fromstring(broken_html) + + # Insert an ellipsis at the end of the last node with text + last_text_node = None + last_tail_node = None + # Need to find the last node with a tail, OR the last node with + # text if it's later + for node in fragment.iter(): + if node.tail: + last_tail_node = node + last_text_node = None + elif node.text: + last_text_node = node + last_tail_node = None + + if last_text_node is not None: + last_text_node.text += '...' + if last_tail_node is not None: + last_tail_node.tail += '...' + + # Serialize + content = lxml.html.tostring(fragment) + + content = helpers.literal(content) + + update = FrontPageRSS( + source = self, + time = timestamp, + content = content, + entry = entry, + ) + updates.append(update) + + return updates + + +FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag']) +FrontPageGitCommit = namedtuple('FrontPageGitCommit', + ['hash', 'author', 'time', 'subject', 'repo']) + +class GitSource(Source): + """Represents a git repository. + + The main repository is checked for annotated tags, and an update is + considered to be the list of commits between them. If any other + repositories are listed and have the same tags, their commits will be + included as well. + + Extra properties: + + ``repo_paths`` + Space-separated list of repositories. These must be repository PATHS, + not arbitrary git URLs. Only the first one will be checked for the + list of tags. + + ``repo_names`` + A list of names for the repositories, in parallel with ``repo_paths``. + Used for constructing gitweb URLs and identifying the repositories. + + ``gitweb`` + Base URL to a gitweb installation, so commit ids can be linked to the + commit proper. + + ``tag_pattern`` + Optional. A shell glob pattern used to filter the tags. + """ + + template = '/front_page/git.mako' + + def __init__(self, repo_paths, repo_names, gitweb, tag_pattern=None, **kwargs): + kwargs.setdefault('title', None) + super(GitSource, self).__init__(**kwargs) + + # Repo stuff can be space-delimited lists + self.repo_paths = repo_paths.split() + self.repo_names = repo_names.split() + + self.gitweb = gitweb + self.tag_pattern = tag_pattern + + def poll(self, limit, max_age): + + # Fetch the main repo's git tags + git_dir = '--git-dir=' + self.repo_paths[0] + args = [ + 'git', + git_dir, + 'tag', '-l', + ] + if self.tag_pattern: + args.append(self.tag_pattern) + + git_output, _ = subprocess.Popen(args, stdout=PIPE).communicate() + tags = git_output.strip().split('\n') + + # Tags come out in alphabetical order, which means earliest first. Reverse + # it to make the slicing easier + tags.reverse() + # Only history from tag to tag is actually interesting, so get the most + # recent $limit tags but skip the earliest + interesting_tags = tags[:-1][:limit] + + updates = [] + for tag, since_tag in zip(interesting_tags, tags[1:]): + # Get the date when this tag was actually created. + # 'raw' format gives unixtime followed by timezone offset + args = [ + 'git', + git_dir, + 'for-each-ref', + '--format=%(taggerdate:raw)', + 'refs/tags/' + tag, + ] + tag_timestamp, _ = subprocess.Popen(args, stdout=PIPE).communicate() + tag_unixtime, tag_timezone = tag_timestamp.split(None, 1) + tagged_timestamp = datetime.datetime.fromtimestamp(int(tag_unixtime)) + + if max_age and tagged_timestamp < max_age: + break + + commits = [] + + for repo_path, repo_name in zip(self.repo_paths, self.repo_names): + # Grab an easily-parsed history: fields delimited by nulls. + # Hash, author's name, commit timestamp, subject. + git_log_args = [ + 'git', + '--git-dir=' + repo_path, + 'log', + '--pretty=%h%x00%an%x00%at%x00%s', + "{0}..{1}".format(since_tag, tag), + ] + proc = subprocess.Popen(git_log_args, stdout=PIPE) + for line in proc.stdout: + hash, author, time, subject = line.strip().split('\x00') + commits.append( + FrontPageGitCommit( + hash = hash, + author = author, + time = datetime.datetime.fromtimestamp(int(time)), + subject = subject, + repo = repo_name, + ) + ) + + update = FrontPageGit( + source = self, + time = tagged_timestamp, + log = commits, + tag = tag, + ) + updates.append(update) + + return updates diff --git a/splinext/frontpage/templates/css/frontpage.mako b/splinext/frontpage/templates/css/frontpage.mako index d7012d3..124d73e 100644 --- a/splinext/frontpage/templates/css/frontpage.mako +++ b/splinext/frontpage/templates/css/frontpage.mako @@ -2,6 +2,7 @@ .frontpage-update:nth-child(2n) { background: #f0f0f0; } .frontpage-update .header { white-space: nowrap; padding: 0.5em 1em; border: 1px solid #b4c7e6; background: url(${h.static_uri('local', 'images/layout/th-background.png')}) left bottom repeat-x; -moz-border-radius-topleft: 1em; -moz-border-radius-topright: 1em; -webkit-border-top-left-radius: 0.5em; -webkit-border-top-right-radius: 0.5em; } .frontpage-update .header .category { float: left; font-size: 1.33em; margin-right: 0.25em; font-style: italic; color: #404040; vertical-align: bottom; } +.frontpage-update .header .category a { font-weight: normal; } .frontpage-update .header .category img { vertical-align: bottom; } .frontpage-update .header .date { float: right; white-space: nowrap; line-height: 1.33; margin-left: 0.33em; vertical-align: bottom; } .frontpage-update .header .title { overflow: hidden; font-size: 1.33em; height: 1em; vertical-align: bottom; text-overflow: ellipsis; font-weight: bold; color: #303030; } diff --git a/splinext/frontpage/templates/front_page/git.mako b/splinext/frontpage/templates/front_page/git.mako index ad3bcb4..119aab3 100644 --- a/splinext/frontpage/templates/front_page/git.mako +++ b/splinext/frontpage/templates/front_page/git.mako @@ -2,7 +2,9 @@
-
${update.category}:
+
${update.time}
${update.tag}
@@ -17,7 +19,7 @@ % endif - ${commit.hash} + ${commit.hash} ${commit.author} ${commit.subject} ${commit.time} diff --git a/splinext/frontpage/templates/front_page/rss.mako b/splinext/frontpage/templates/front_page/rss.mako index 108455a..3e7cc06 100644 --- a/splinext/frontpage/templates/front_page/rss.mako +++ b/splinext/frontpage/templates/front_page/rss.mako @@ -3,7 +3,9 @@
-
${update.category}:
+
${update.time}
${update.entry.title | n} diff --git a/splinext/frontpage/templates/index.mako b/splinext/frontpage/templates/index.mako index 1651be7..677c58c 100644 --- a/splinext/frontpage/templates/index.mako +++ b/splinext/frontpage/templates/index.mako @@ -4,5 +4,5 @@

Updates

% for update in c.updates: -<%include file="${update.template}" args="update=update" /> +<%include file="${update.source.template}" args="update=update" /> % endfor