d082adaad369c43d955202aa426a34bc79c46443
[zzz-spline-frontpage.git] / splinext / frontpage / sources.py
1 """Base class for a front page source, as well as a handful of specific
2 implementations.
3 """
4
5 from collections import namedtuple
6 import datetime
7 import subprocess
8 from subprocess import PIPE
9
10 import feedparser
11 import lxml.html
12
13 from spline.lib import helpers
14
15 def max_age_to_datetime(max_age):
16 """``max_age`` is specified in config as a number of seconds old. This
17 function takes that number and returns a corresponding datetime object.
18 """
19 if max_age == None:
20 return None
21
22 seconds = int(max_age)
23
24
25
26 class Source(object):
27 """Represents a source to be polled for updates. Sources are populated
28 directly from the configuration file.
29
30 Properties:
31
32 ``title``
33 A name to identify this specific source.
34
35 ``icon``
36 Name of a Fugue icon to show next to the name.
37
38 ``link``
39 A URL where the full history of this source can be found.
40
41 ``limit``
42 The maximum number of items from this source to show at a time.
43 Optional.
44
45 ``max_age``
46 Items older than this age (in seconds) will be excluded. Optional.
47
48 Additionally, subclasses **must** define a ``template`` property -- a path
49 to a Mako template that knows how to render an update from this source.
50 The template will be passed one parameter: the update object, ``update``.
51 """
52
53 def __init__(self, title, icon, link, limit=None, max_age=None):
54 self.title = title
55 self.icon = icon
56 self.link = link
57 self.limit = int(limit)
58 self.max_age = max_age_to_datetime(max_age)
59
60 def poll(self, global_limit, global_max_age):
61 """Public wrapper that takes care of reconciling global and source item
62 limit and max age.
63
64 Subclasses should implement ``_poll``, below.
65 """
66 # Smallest limit wins
67 limit = min(self.limit, global_limit)
68
69 # Latest max age wins. Note that either could be None, but that's
70 # fine, because None is less than everything else
71 max_age = max(self.max_age, global_max_age)
72
73 return self._poll(limit, max_age)
74
75 def _poll(self, limit, max_age):
76 """Implementation of polling for updates. Must return an iterable.
77 Each element should be an object with ``source`` and ``time``
78 properties. A namedtuple works well.
79 """
80 raise NotImplementedError
81
82
83 FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
84 class FeedSource(Source):
85 """Represents an RSS or Atom feed.
86
87 Extra properties:
88
89 ``feed_url``
90 URL for the feed.
91 """
92
93 template = '/front_page/rss.mako'
94
95 SUMMARY_LENGTH = 1000
96
97 def __init__(self, feed_url, **kwargs):
98 kwargs.setdefault('title', None)
99 super(FeedSource, self).__init__(**kwargs)
100
101 self.feed_url = feed_url
102
103 def _poll(self, limit, max_age):
104 feed = feedparser.parse(self.feed_url)
105
106 if not self.title:
107 self.title = feed.feed.title
108
109 updates = []
110 for entry in feed.entries[:limit]:
111 # Grab a date -- Atom has published, RSS usually just has updated.
112 # Both come out as time tuples, which datetime.datetime() can read
113 try:
114 timestamp_tuple = entry.published_parsed
115 except AttributeError:
116 timestamp_tuple = entry.updated_parsed
117 timestamp = datetime.datetime(*timestamp_tuple[:6])
118
119 if max_age and timestamp < max_age:
120 # Entries should be oldest-first, so we can bail after the first
121 # expired entry
122 break
123
124 # Try to find something to show! Default to the summary, if there is
125 # one, or try to generate one otherwise
126 content = u''
127 if 'summary' in entry:
128 # If there be a summary, cheerfully trust that it's actually a
129 # summary
130 content = entry.summary
131 elif 'content' in entry:
132 # Full content is way too much, especially for my giant blog posts.
133 # Cut this down to some arbitrary number of characters, then feed
134 # it to lxml.html to fix tag nesting
135 broken_html = entry.content[0].value[:self.SUMMARY_LENGTH]
136 fragment = lxml.html.fromstring(broken_html)
137
138 # Insert an ellipsis at the end of the last node with text
139 last_text_node = None
140 last_tail_node = None
141 # Need to find the last node with a tail, OR the last node with
142 # text if it's later
143 for node in fragment.iter():
144 if node.tail:
145 last_tail_node = node
146 last_text_node = None
147 elif node.text:
148 last_text_node = node
149 last_tail_node = None
150
151 if last_text_node is not None:
152 last_text_node.text += '...'
153 if last_tail_node is not None:
154 last_tail_node.tail += '...'
155
156 # Serialize
157 content = lxml.html.tostring(fragment)
158
159 content = helpers.literal(content)
160
161 update = FrontPageRSS(
162 source = self,
163 time = timestamp,
164 content = content,
165 entry = entry,
166 )
167 updates.append(update)
168
169 return updates
170
171
172 FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
173 FrontPageGitCommit = namedtuple('FrontPageGitCommit',
174 ['hash', 'author', 'time', 'subject', 'repo'])
175
176 class GitSource(Source):
177 """Represents a git repository.
178
179 The main repository is checked for annotated tags, and an update is
180 considered to be the list of commits between them. If any other
181 repositories are listed and have the same tags, their commits will be
182 included as well.
183
184 Extra properties:
185
186 ``repo_paths``
187 Space-separated list of repositories. These must be repository PATHS,
188 not arbitrary git URLs. Only the first one will be checked for the
189 list of tags.
190
191 ``repo_names``
192 A list of names for the repositories, in parallel with ``repo_paths``.
193 Used for constructing gitweb URLs and identifying the repositories.
194
195 ``gitweb``
196 Base URL to a gitweb installation, so commit ids can be linked to the
197 commit proper.
198
199 ``tag_pattern``
200 Optional. A shell glob pattern used to filter the tags.
201 """
202
203 template = '/front_page/git.mako'
204
205 def __init__(self, repo_paths, repo_names, gitweb, tag_pattern=None, **kwargs):
206 kwargs.setdefault('title', None)
207 super(GitSource, self).__init__(**kwargs)
208
209 # Repo stuff can be space-delimited lists
210 self.repo_paths = repo_paths.split()
211 self.repo_names = repo_names.split()
212
213 self.gitweb = gitweb
214 self.tag_pattern = tag_pattern
215
216 def _poll(self, limit, max_age):
217 # Fetch the main repo's git tags
218 git_dir = '--git-dir=' + self.repo_paths[0]
219 args = [
220 'git',
221 git_dir,
222 'tag', '-l',
223 ]
224 if self.tag_pattern:
225 args.append(self.tag_pattern)
226
227 git_output, _ = subprocess.Popen(args, stdout=PIPE).communicate()
228 tags = git_output.strip().split('\n')
229
230 # Tags come out in alphabetical order, which means earliest first. Reverse
231 # it to make the slicing easier
232 tags.reverse()
233 # Only history from tag to tag is actually interesting, so get the most
234 # recent $limit tags but skip the earliest
235 interesting_tags = tags[:-1][:limit]
236
237 updates = []
238 for tag, since_tag in zip(interesting_tags, tags[1:]):
239 # Get the date when this tag was actually created.
240 # 'raw' format gives unixtime followed by timezone offset
241 args = [
242 'git',
243 git_dir,
244 'for-each-ref',
245 '--format=%(taggerdate:raw)',
246 'refs/tags/' + tag,
247 ]
248 tag_timestamp, _ = subprocess.Popen(args, stdout=PIPE).communicate()
249 tag_unixtime, tag_timezone = tag_timestamp.split(None, 1)
250 tagged_timestamp = datetime.datetime.fromtimestamp(int(tag_unixtime))
251
252 if max_age and tagged_timestamp < max_age:
253 break
254
255 commits = []
256
257 for repo_path, repo_name in zip(self.repo_paths, self.repo_names):
258 # Grab an easily-parsed history: fields delimited by nulls.
259 # Hash, author's name, commit timestamp, subject.
260 git_log_args = [
261 'git',
262 '--git-dir=' + repo_path,
263 'log',
264 '--pretty=%h%x00%an%x00%at%x00%s',
265 "{0}..{1}".format(since_tag, tag),
266 ]
267 proc = subprocess.Popen(git_log_args, stdout=PIPE)
268 for line in proc.stdout:
269 hash, author, time, subject = line.strip().split('\x00')
270 commits.append(
271 FrontPageGitCommit(
272 hash = hash,
273 author = author,
274 time = datetime.datetime.fromtimestamp(int(time)),
275 subject = subject,
276 repo = repo_name,
277 )
278 )
279
280 update = FrontPageGit(
281 source = self,
282 time = tagged_timestamp,
283 log = commits,
284 tag = tag,
285 )
286 updates.append(update)
287
288 return updates