a65e3e88f642952b381da64e91c3c3426bdeb563
1 """Base class for a front page source, as well as a handful of specific
5 from collections
import namedtuple
9 from subprocess
import PIPE
10 from urllib2
import URLError
15 from pylons
import cache
17 from spline
.lib
import helpers
19 def max_age_to_datetime(max_age
):
20 """``max_age`` is specified in config as a number of seconds old. This
21 function takes that number and returns a corresponding datetime object.
26 dt
= datetime
.datetime
.now()
27 dt
-= datetime
.timedelta(seconds
=int(max_age
))
33 """Represents a source to be polled for updates. Sources are populated
34 directly from the configuration file.
39 A name to identify this specific source.
42 Name of a Fugue icon to show next to the name.
45 A URL where the full history of this source can be found.
48 The maximum number of items from this source to show at a time.
52 Items older than this age (in seconds) will be excluded. Optional.
54 Additionally, subclasses **must** define a ``template`` property -- a path
55 to a Mako template that knows how to render an update from this source.
56 The template will be passed one parameter: the update object, ``update``.
59 def __init__(self
, title
, icon
, link
, limit
=None, max_age
=None):
63 self
.limit
= int(limit
)
64 self
.max_age
= max_age_to_datetime(max_age
)
66 def do_cron(self
, *args
, **kwargs
):
69 def poll(self
, global_limit
, global_max_age
):
70 """Public wrapper that takes care of reconciling global and source item
73 Subclasses should implement ``_poll``, below.
76 limit
= min(self
.limit
, global_limit
)
78 # Latest max age wins. Note that either could be None, but that's
79 # fine, because None is less than everything else
80 max_age
= max(self
.max_age
, global_max_age
)
82 return self
._poll(limit
, max_age
)
84 def _poll(self
, limit
, max_age
):
85 """Implementation of polling for updates. Must return an iterable.
86 Each element should be an object with ``source`` and ``time``
87 properties. A namedtuple works well.
89 raise NotImplementedError
91 class CachedSource(Source
):
92 """Supports caching a source's updates in memcache.
94 On the surface, this functions just like any other ``Source``. Calling
95 ``poll`` still returns a list of updates. However, ``poll`` does not call
96 your ``_poll``; instead, your implementation is called by the spline cron,
97 and the results are cached. ``poll`` then returns the contents of the
100 You must define a ``_cache_key`` method that returns a key uniquely
101 identifying this object. Your key will be combined with the class name, so
102 it only needs to be unique for that source, not globally.
104 You may also override ``poll_frequency``, the number of minutes between
105 pollings. By default, this is a rather conservative 60.
107 Note that it may take up to a minute after server startup for updates
108 from a cached source to appear.
114 return repr(type(self
)) + ':' + self
._cache_key()
116 def _cache_key(self
):
117 raise NotImplementedError
119 def do_cron(self
, tic
, *args
, **kwargs
):
120 if tic % self
.poll_frequency
!= 0:
124 updates
= self
._poll(self
.limit
, self
.max_age
)
125 cache
.get_cache('spline-frontpage')[self
.cache_key()] = updates
129 def poll(self
, global_limit
, global_max_age
):
130 """Fetches cached updates."""
132 return cache
.get_cache('spline-frontpage')[self
.cache_key()]
134 # Haven't cached anything yet, apparently
138 FrontPageRSS
= namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
139 class FeedSource(CachedSource
):
140 """Represents an RSS or Atom feed.
148 template
= '/front_page/rss.mako'
150 SUMMARY_LENGTH
= 1000
154 def __init__(self
, feed_url
, **kwargs
):
155 kwargs
.setdefault('title', None)
156 super(FeedSource
, self
).__init__(**kwargs
)
158 self
.feed_url
= feed_url
160 def _cache_key(self
):
163 def _poll(self
, limit
, max_age
):
164 feed
= feedparser
.parse(self
.feed_url
)
166 if feed
.bozo
and isinstance(feed
.bozo_exception
, URLError
):
167 # Feed is DOWN. Bail here; otherwise, old entries might be lost
168 # just because, say, Bulbanews is down yet again
169 raise feed
.bozo_exception
172 self
.title
= feed
.feed
.title
175 for entry
in feed
.entries
[:limit
]:
176 # Grab a date -- Atom has published, RSS usually just has updated.
177 # Both come out as time tuples, which datetime.datetime() can read
179 timestamp_tuple
= entry
.published_parsed
180 except AttributeError:
181 timestamp_tuple
= entry
.updated_parsed
182 timestamp
= datetime
.datetime(*timestamp_tuple
[:6])
184 if max_age
and timestamp
< max_age
:
185 # Entries should be oldest-first, so we can bail after the first
189 # Try to find something to show! Default to the summary, if there is
190 # one, or try to generate one otherwise
192 if 'summary' in entry
:
193 # If there be a summary, cheerfully trust that it's actually a
195 content
= entry
.summary
196 elif 'content' in entry
:
197 # Full content is way too much, especially for my giant blog posts.
198 # Cut this down to some arbitrary number of characters, then feed
199 # it to lxml.html to fix tag nesting
200 broken_html
= entry
.content
[0].value
[:self
.SUMMARY_LENGTH
]
201 fragment
= lxml
.html
.fromstring(broken_html
)
203 # Insert an ellipsis at the end of the last node with text
204 last_text_node
= None
205 last_tail_node
= None
206 # Need to find the last node with a tail, OR the last node with
208 for node
in fragment
.iter():
210 last_tail_node
= node
211 last_text_node
= None
213 last_text_node
= node
214 last_tail_node
= None
216 if last_text_node
is not None:
217 last_text_node
.text
+= '...'
218 if last_tail_node
is not None:
219 last_tail_node
.tail
+= '...'
222 content
= lxml
.html
.tostring(fragment
)
224 content
= helpers
.literal(content
)
226 update
= FrontPageRSS(
232 updates
.append(update
)
237 FrontPageGit
= namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
238 FrontPageGitCommit
= namedtuple('FrontPageGitCommit',
239 ['hash', 'author', 'email', 'time', 'subject', 'repo'])
241 class GitSource(CachedSource
):
242 """Represents a git repository.
244 The main repository is checked for annotated tags, and an update is
245 considered to be the list of commits between them. If any other
246 repositories are listed and have the same tags, their commits will be
252 Space-separated list of repositories. These must be repository PATHS,
253 not arbitrary git URLs. Only the first one will be checked for the
257 A list of names for the repositories, in parallel with ``repo_paths``.
258 Used for constructing gitweb URLs and identifying the repositories.
261 Base URL to a gitweb installation, so commit ids can be linked to the
265 URL to a bug tracker; anything matching "#xxx" will be converted into a
266 link to this. Should contain a "{0}", which will be replaced by the
270 Optional. A shell glob pattern used to filter the tags.
273 template
= '/front_page/git.mako'
275 def __init__(self
, repo_paths
, repo_names
, gitweb
, bug_tracker
=None,
276 tag_pattern
=None, **kwargs
):
278 kwargs
.setdefault('title', None)
279 super(GitSource
, self
).__init__(**kwargs
)
281 # Repo stuff can be space-delimited lists
282 self
.repo_paths
= repo_paths
.split()
283 self
.repo_names
= repo_names
.split()
286 self
.bug_tracker
= bug_tracker
287 self
.tag_pattern
= tag_pattern
289 def _cache_key(self
):
290 return self
.repo_paths
[0]
292 def _poll(self
, limit
, max_age
):
293 # Fetch the main repo's git tags
294 git_dir
= '--git-dir=' + self
.repo_paths
[0]
301 args
.append(self
.tag_pattern
)
303 git_output
, _
= subprocess
.Popen(args
, stdout
=PIPE
).communicate()
304 tags
= git_output
.strip().split('\n')
306 # Tags come out in alphabetical order, which means earliest first. Reverse
307 # it to make the slicing easier
309 # Only history from tag to tag is actually interesting, so get the most
310 # recent $limit tags but skip the earliest
311 interesting_tags
= tags
[:-1][:limit
]
314 for tag
, since_tag
in zip(interesting_tags
, tags
[1:]):
315 # Get the date when this tag was actually created.
316 # 'raw' format gives unixtime followed by timezone offset
321 '--format=%(taggerdate:raw)',
324 tag_timestamp
, _
= subprocess
.Popen(args
, stdout
=PIPE
).communicate()
325 tag_unixtime
, tag_timezone
= tag_timestamp
.split(None, 1)
326 tagged_timestamp
= datetime
.datetime
.fromtimestamp(int(tag_unixtime
))
328 if max_age
and tagged_timestamp
< max_age
:
333 for repo_path
, repo_name
in zip(self
.repo_paths
, self
.repo_names
):
334 # Grab an easily-parsed history: fields delimited by nulls.
335 # Hash, author's name, commit timestamp, subject.
338 '--git-dir=' + repo_path
,
340 '--pretty=%h%x00%an%x00%aE%x00%at%x00%s',
341 "{0}..{1}".format(since_tag
, tag
),
343 proc
= subprocess
.Popen(git_log_args
, stdout
=PIPE
)
344 for line
in proc
.stdout
:
345 hash, author
, email
, time
, subject \
346 = line
.strip().decode('utf8').split('\x00')
348 # Convert bug numbers in subject to URLs
350 subject
= helpers
.literal(
351 re
.sub(u
'#(\d+)', self
._linkify_bug_number
, subject
)
359 time
= datetime
.datetime
.fromtimestamp(int(time
)),
365 update
= FrontPageGit(
367 time
= tagged_timestamp
,
371 updates
.append(update
)
375 def _linkify_bug_number(self
, match
):
376 """Regex replace function for changing bug numbers into links."""
378 bug_url
= self
.bug_tracker
.format(match
.group(1))
379 return helpers
.literal(
380 u
"""<a href="{0}">{1}</a>""".format(bug_url
, match
.group(0)))