1 """Base class for a front page source, as well as a handful of specific
5 from collections
import namedtuple
9 from subprocess
import PIPE
10 from urllib2
import URLError
15 from pylons
import cache
17 from spline
.lib
import helpers
19 def max_age_to_datetime(max_age
):
20 """``max_age`` is specified in config as a number of seconds old. This
21 function takes that number and returns a corresponding datetime object.
26 dt
= datetime
.datetime
.now()
27 dt
-= datetime
.timedelta(seconds
=int(max_age
))
33 """Represents a source to be polled for updates. Sources are populated
34 directly from the configuration file.
39 A name to identify this specific source.
42 Name of a Fugue icon to show next to the name.
45 A URL where the full history of this source can be found.
48 The maximum number of items from this source to show at a time.
52 Items older than this age (in seconds) will be excluded. Optional.
54 Additionally, subclasses **must** define a ``template`` property -- a path
55 to a Mako template that knows how to render an update from this source.
56 The template will be passed one parameter: the update object, ``update``.
59 def __init__(self
, config
, title
, icon
, link
, limit
=None, max_age
=None):
63 self
.limit
= int(limit
)
64 self
.max_age
= max_age_to_datetime(max_age
)
66 def do_cron(self
, *args
, **kwargs
):
69 def poll(self
, global_limit
, global_max_age
):
70 """Public wrapper that takes care of reconciling global and source item
73 Subclasses should implement ``_poll``, below.
76 limit
= min(self
.limit
, global_limit
)
78 # Latest max age wins. Note that either could be None, but that's
79 # fine, because None is less than everything else
80 max_age
= max(self
.max_age
, global_max_age
)
82 return self
._poll(limit
, max_age
)
84 def _poll(self
, limit
, max_age
):
85 """Implementation of polling for updates. Must return an iterable.
86 Each element should be an object with ``source`` and ``time``
87 properties. A namedtuple works well.
89 raise NotImplementedError
91 class CachedSource(Source
):
92 """Supports caching a source's updates in memcache.
94 On the surface, this functions just like any other ``Source``. Calling
95 ``poll`` still returns a list of updates. However, ``poll`` does not call
96 your ``_poll``; instead, your implementation is called by the spline cron,
97 and the results are cached. ``poll`` then returns the contents of the
100 ``_poll`` may return None, in which case the cache will be left unchanged.
102 You must define a ``_cache_key`` method that returns a key uniquely
103 identifying this object. Your key will be combined with the class name, so
104 it only needs to be unique for that source, not globally.
106 You may also override ``poll_frequency``, the number of minutes between
107 pollings. By default, this is a rather conservative 60.
109 Note that it may take up to a minute after server startup for updates
110 from a cached source to appear.
116 return repr(type(self
)) + ':' + self
._cache_key()
118 def _cache_key(self
):
119 raise NotImplementedError
121 def do_cron(self
, tic
, *args
, **kwargs
):
122 if tic % self
.poll_frequency
!= 0:
126 updates
= self
._poll(self
.limit
, self
.max_age
)
127 if updates
is not None:
128 cache
.get_cache('spline-frontpage')[self
.cache_key()] = updates
132 def poll(self
, global_limit
, global_max_age
):
133 """Fetches cached updates."""
135 return cache
.get_cache('spline-frontpage')[self
.cache_key()]
137 # Haven't cached anything yet, apparently
141 FrontPageRSS
= namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
142 class FeedSource(CachedSource
):
143 """Represents an RSS or Atom feed.
151 template
= '/front_page/rss.mako'
153 SUMMARY_LENGTH
= 1000
157 def __init__(self
, feed_url
, **kwargs
):
158 kwargs
.setdefault('title', None)
159 super(FeedSource
, self
).__init__(**kwargs
)
161 self
.feed_url
= feed_url
163 def _cache_key(self
):
166 def _poll(self
, limit
, max_age
):
167 feed
= feedparser
.parse(self
.feed_url
)
169 if feed
.bozo
and isinstance(feed
.bozo_exception
, URLError
):
170 # Feed is DOWN. Bail here; otherwise, old entries might be lost
171 # just because, say, Bulbanews is down yet again
175 self
.title
= feed
.feed
.title
178 for entry
in feed
.entries
[:limit
]:
179 # Grab a date -- Atom has published, RSS usually just has updated.
180 # Both come out as time tuples, which datetime.datetime() can read
182 timestamp_tuple
= entry
.published_parsed
183 except AttributeError:
184 timestamp_tuple
= entry
.updated_parsed
185 timestamp
= datetime
.datetime(*timestamp_tuple
[:6])
187 if max_age
and timestamp
< max_age
:
188 # Entries should be oldest-first, so we can bail after the first
192 # Try to find something to show! Default to the summary, if there is
193 # one, or try to generate one otherwise
195 if 'summary' in entry
:
196 # If there be a summary, cheerfully trust that it's actually a
198 content
= entry
.summary
199 elif 'content' in entry
and \
200 len(entry
.content
[0].value
) <= self
.SUMMARY_LENGTH
:
202 # Full content is short; use as-is!
203 content
= entry
.content
[0].value
204 elif 'content' in entry
:
205 # Full content is way too much, especially for my giant blog posts.
206 # Cut this down to some arbitrary number of characters, then feed
207 # it to lxml.html to fix tag nesting
208 broken_html
= entry
.content
[0].value
[:self
.SUMMARY_LENGTH
]
209 fragment
= lxml
.html
.fromstring(broken_html
)
211 # Insert an ellipsis at the end of the last node with text
212 last_text_node
= None
213 last_tail_node
= None
214 # Need to find the last node with a tail, OR the last node with
216 for node
in fragment
.iter():
218 last_tail_node
= node
219 last_text_node
= None
221 last_text_node
= node
222 last_tail_node
= None
224 if last_text_node
is not None:
225 last_text_node
.text
+= '...'
226 if last_tail_node
is not None:
227 last_tail_node
.tail
+= '...'
230 content
= lxml
.html
.tostring(fragment
)
232 content
= helpers
.literal(content
)
234 update
= FrontPageRSS(
240 updates
.append(update
)
245 FrontPageGit
= namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
246 FrontPageGitCommit
= namedtuple('FrontPageGitCommit',
247 ['hash', 'author', 'email', 'time', 'subject', 'repo'])
249 class GitSource(CachedSource
):
250 """Represents a git repository.
252 The main repository is checked for annotated tags, and an update is
253 considered to be the list of commits between them. If any other
254 repositories are listed and have the same tags, their commits will be
260 Space-separated list of repositories. These must be repository PATHS,
261 not arbitrary git URLs. Only the first one will be checked for the
265 A list of names for the repositories, in parallel with ``repo_paths``.
266 Used for constructing gitweb URLs and identifying the repositories.
269 Base URL to a gitweb installation, so commit ids can be linked to the
273 URL to a bug tracker; anything matching "#xxx" will be converted into a
274 link to this. Should contain a "{0}", which will be replaced by the
278 Optional. A shell glob pattern used to filter the tags.
281 template
= '/front_page/git.mako'
283 def __init__(self
, repo_paths
, repo_names
, gitweb
, bug_tracker
=None,
284 tag_pattern
=None, **kwargs
):
286 kwargs
.setdefault('title', None)
287 super(GitSource
, self
).__init__(**kwargs
)
289 # Repo stuff can be space-delimited lists
290 self
.repo_paths
= repo_paths
.split()
291 self
.repo_names
= repo_names
.split()
294 self
.bug_tracker
= bug_tracker
295 self
.tag_pattern
= tag_pattern
297 def _cache_key(self
):
298 return self
.repo_paths
[0]
300 def _poll(self
, limit
, max_age
):
301 # Fetch the main repo's git tags
302 git_dir
= '--git-dir=' + self
.repo_paths
[0]
309 args
.append(self
.tag_pattern
)
311 git_output
, _
= subprocess
.Popen(args
, stdout
=PIPE
).communicate()
312 tags
= git_output
.strip().split('\n')
314 # Tags come out in alphabetical order, which means earliest first. Reverse
315 # it to make the slicing easier
317 # Only history from tag to tag is actually interesting, so get the most
318 # recent $limit tags but skip the earliest
319 interesting_tags
= tags
[:-1][:limit
]
322 for tag
, since_tag
in zip(interesting_tags
, tags
[1:]):
323 # Get the date when this tag was actually created.
324 # 'raw' format gives unixtime followed by timezone offset
329 '--format=%(taggerdate:raw)',
332 tag_timestamp
, _
= subprocess
.Popen(args
, stdout
=PIPE
).communicate()
333 tag_unixtime
, tag_timezone
= tag_timestamp
.split(None, 1)
334 tagged_timestamp
= datetime
.datetime
.fromtimestamp(int(tag_unixtime
))
336 if max_age
and tagged_timestamp
< max_age
:
341 for repo_path
, repo_name
in zip(self
.repo_paths
, self
.repo_names
):
342 # Grab an easily-parsed history: fields delimited by nulls.
343 # Hash, author's name, commit timestamp, subject.
346 '--git-dir=' + repo_path
,
348 '--pretty=%h%x00%an%x00%aE%x00%at%x00%s',
349 "{0}..{1}".format(since_tag
, tag
),
351 proc
= subprocess
.Popen(git_log_args
, stdout
=PIPE
)
352 for line
in proc
.stdout
:
353 hash, author
, email
, time
, subject \
354 = line
.strip().decode('utf8').split('\x00')
356 # Convert bug numbers in subject to URLs
358 subject
= helpers
.literal(
359 re
.sub(u
'#(\d+)', self
._linkify_bug_number
, subject
)
367 time
= datetime
.datetime
.fromtimestamp(int(time
)),
373 update
= FrontPageGit(
375 time
= tagged_timestamp
,
379 updates
.append(update
)
383 def _linkify_bug_number(self
, match
):
384 """Regex replace function for changing bug numbers into links."""
386 bug_url
= self
.bug_tracker
.format(match
.group(1))
387 return helpers
.literal(
388 u
"""<a href="{0}">{1}</a>""".format(bug_url
, match
.group(0)))