1 """Base class for a front page source, as well as a handful of specific
5 from collections
import namedtuple
8 from subprocess
import PIPE
13 from pylons
import cache
15 from spline
.lib
import helpers
17 def max_age_to_datetime(max_age
):
18 """``max_age`` is specified in config as a number of seconds old. This
19 function takes that number and returns a corresponding datetime object.
24 seconds
= int(max_age
)
29 """Represents a source to be polled for updates. Sources are populated
30 directly from the configuration file.
35 A name to identify this specific source.
38 Name of a Fugue icon to show next to the name.
41 A URL where the full history of this source can be found.
44 The maximum number of items from this source to show at a time.
48 Items older than this age (in seconds) will be excluded. Optional.
50 Additionally, subclasses **must** define a ``template`` property -- a path
51 to a Mako template that knows how to render an update from this source.
52 The template will be passed one parameter: the update object, ``update``.
55 def __init__(self
, title
, icon
, link
, limit
=None, max_age
=None):
59 self
.limit
= int(limit
)
60 self
.max_age
= max_age_to_datetime(max_age
)
62 def do_cron(self
, *args
, **kwargs
):
65 def poll(self
, global_limit
, global_max_age
):
66 """Public wrapper that takes care of reconciling global and source item
69 Subclasses should implement ``_poll``, below.
72 limit
= min(self
.limit
, global_limit
)
74 # Latest max age wins. Note that either could be None, but that's
75 # fine, because None is less than everything else
76 max_age
= max(self
.max_age
, global_max_age
)
78 return self
._poll(limit
, max_age
)
80 def _poll(self
, limit
, max_age
):
81 """Implementation of polling for updates. Must return an iterable.
82 Each element should be an object with ``source`` and ``time``
83 properties. A namedtuple works well.
85 raise NotImplementedError
87 class CachedSource(Source
):
88 """Supports caching a source's updates in memcache.
90 On the surface, this functions just like any other ``Source``. Calling
91 ``poll`` still returns a list of updates. However, ``poll`` does not call
92 your ``_poll``; instead, your implementation is called by the spline cron,
93 and the results are cached. ``poll`` then returns the contents of the
96 You must define a ``_cache_key`` method that returns a key uniquely
97 identifying this object. Your key will be combined with the class name, so
98 it only needs to be unique for that source, not globally.
100 You may also override ``poll_frequency``, the number of minutes between
101 pollings. By default, this is a rather conservative 60.
103 Note that it may take up to a minute after server startup for updates
104 from a cached source to appear.
110 return repr(type(self
)) + ':' + self
._cache_key()
112 def _cache_key(self
):
113 raise NotImplementedError
115 def do_cron(self
, tic
, *args
, **kwargs
):
116 if tic % self
.poll_frequency
!= 0:
120 updates
= self
._poll(self
.limit
, self
.max_age
)
121 cache
.get_cache('spline-frontpage')[self
.cache_key()] = updates
125 def poll(self
, global_limit
, global_max_age
):
126 """Fetches cached updates."""
128 return cache
.get_cache('spline-frontpage')[self
.cache_key()]
130 # Haven't cached anything yet, apparently
134 FrontPageRSS
= namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
135 class FeedSource(CachedSource
):
136 """Represents an RSS or Atom feed.
144 template
= '/front_page/rss.mako'
146 SUMMARY_LENGTH
= 1000
150 def __init__(self
, feed_url
, **kwargs
):
151 kwargs
.setdefault('title', None)
152 super(FeedSource
, self
).__init__(**kwargs
)
154 self
.feed_url
= feed_url
156 def _cache_key(self
):
159 def _poll(self
, limit
, max_age
):
160 feed
= feedparser
.parse(self
.feed_url
)
163 self
.title
= feed
.feed
.title
166 for entry
in feed
.entries
[:limit
]:
167 # Grab a date -- Atom has published, RSS usually just has updated.
168 # Both come out as time tuples, which datetime.datetime() can read
170 timestamp_tuple
= entry
.published_parsed
171 except AttributeError:
172 timestamp_tuple
= entry
.updated_parsed
173 timestamp
= datetime
.datetime(*timestamp_tuple
[:6])
175 if max_age
and timestamp
< max_age
:
176 # Entries should be oldest-first, so we can bail after the first
180 # Try to find something to show! Default to the summary, if there is
181 # one, or try to generate one otherwise
183 if 'summary' in entry
:
184 # If there be a summary, cheerfully trust that it's actually a
186 content
= entry
.summary
187 elif 'content' in entry
:
188 # Full content is way too much, especially for my giant blog posts.
189 # Cut this down to some arbitrary number of characters, then feed
190 # it to lxml.html to fix tag nesting
191 broken_html
= entry
.content
[0].value
[:self
.SUMMARY_LENGTH
]
192 fragment
= lxml
.html
.fromstring(broken_html
)
194 # Insert an ellipsis at the end of the last node with text
195 last_text_node
= None
196 last_tail_node
= None
197 # Need to find the last node with a tail, OR the last node with
199 for node
in fragment
.iter():
201 last_tail_node
= node
202 last_text_node
= None
204 last_text_node
= node
205 last_tail_node
= None
207 if last_text_node
is not None:
208 last_text_node
.text
+= '...'
209 if last_tail_node
is not None:
210 last_tail_node
.tail
+= '...'
213 content
= lxml
.html
.tostring(fragment
)
215 content
= helpers
.literal(content
)
217 update
= FrontPageRSS(
223 updates
.append(update
)
228 FrontPageGit
= namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
229 FrontPageGitCommit
= namedtuple('FrontPageGitCommit',
230 ['hash', 'author', 'time', 'subject', 'repo'])
232 class GitSource(CachedSource
):
233 """Represents a git repository.
235 The main repository is checked for annotated tags, and an update is
236 considered to be the list of commits between them. If any other
237 repositories are listed and have the same tags, their commits will be
243 Space-separated list of repositories. These must be repository PATHS,
244 not arbitrary git URLs. Only the first one will be checked for the
248 A list of names for the repositories, in parallel with ``repo_paths``.
249 Used for constructing gitweb URLs and identifying the repositories.
252 Base URL to a gitweb installation, so commit ids can be linked to the
256 Optional. A shell glob pattern used to filter the tags.
259 template
= '/front_page/git.mako'
261 def __init__(self
, repo_paths
, repo_names
, gitweb
, tag_pattern
=None, **kwargs
):
262 kwargs
.setdefault('title', None)
263 super(GitSource
, self
).__init__(**kwargs
)
265 # Repo stuff can be space-delimited lists
266 self
.repo_paths
= repo_paths
.split()
267 self
.repo_names
= repo_names
.split()
270 self
.tag_pattern
= tag_pattern
272 def _cache_key(self
):
273 return self
.repo_paths
[0]
275 def _poll(self
, limit
, max_age
):
276 # Fetch the main repo's git tags
277 git_dir
= '--git-dir=' + self
.repo_paths
[0]
284 args
.append(self
.tag_pattern
)
286 git_output
, _
= subprocess
.Popen(args
, stdout
=PIPE
).communicate()
287 tags
= git_output
.strip().split('\n')
289 # Tags come out in alphabetical order, which means earliest first. Reverse
290 # it to make the slicing easier
292 # Only history from tag to tag is actually interesting, so get the most
293 # recent $limit tags but skip the earliest
294 interesting_tags
= tags
[:-1][:limit
]
297 for tag
, since_tag
in zip(interesting_tags
, tags
[1:]):
298 # Get the date when this tag was actually created.
299 # 'raw' format gives unixtime followed by timezone offset
304 '--format=%(taggerdate:raw)',
307 tag_timestamp
, _
= subprocess
.Popen(args
, stdout
=PIPE
).communicate()
308 tag_unixtime
, tag_timezone
= tag_timestamp
.split(None, 1)
309 tagged_timestamp
= datetime
.datetime
.fromtimestamp(int(tag_unixtime
))
311 if max_age
and tagged_timestamp
< max_age
:
316 for repo_path
, repo_name
in zip(self
.repo_paths
, self
.repo_names
):
317 # Grab an easily-parsed history: fields delimited by nulls.
318 # Hash, author's name, commit timestamp, subject.
321 '--git-dir=' + repo_path
,
323 '--pretty=%h%x00%an%x00%at%x00%s',
324 "{0}..{1}".format(since_tag
, tag
),
326 proc
= subprocess
.Popen(git_log_args
, stdout
=PIPE
)
327 for line
in proc
.stdout
:
328 hash, author
, time
, subject
= line
.strip().split('\x00')
333 time
= datetime
.datetime
.fromtimestamp(int(time
)),
339 update
= FrontPageGit(
341 time
= tagged_timestamp
,
345 updates
.append(update
)