Big ol refactor: make sources into first-class objects.
[zzz-spline-frontpage.git] / splinext / frontpage / sources.py
1 """Base class for a front page source, as well as a handful of specific
2 implementations.
3 """
4
5 from collections import namedtuple
6 import datetime
7 import subprocess
8 from subprocess import PIPE
9
10 import feedparser
11 import lxml.html
12
13 from spline.lib import helpers
14
15
16 class Source(object):
17 """Represents a source to be polled for updates. Sources are populated
18 directly from the configuration file.
19
20 Properties:
21
22 ``title``
23 A name to identify this specific source.
24
25 ``icon``
26 Name of a Fugue icon to show next to the name.
27
28 ``link``
29 A URL where the full history of this source can be found.
30
31 ``limit``
32 The maximum number of items from this source to show at a time.
33 Optional.
34
35 ``max_age``
36 Items older than this age (in seconds) will be excluded. Optional.
37
38 Additionally, subclasses **must** define a ``template`` property -- a path
39 to a Mako template that knows how to render an update from this source.
40 The template will be passed one parameter: the update object, ``update``.
41 """
42
43 def __init__(self, title, icon, link, limit=None, max_age=None):
44 self.title = title
45 self.icon = icon
46 self.link = link
47 self.limit = limit
48 self.max_age = max_age
49
50 def poll(self):
51 """Poll for updates. Must return an iterable. Each element should be
52 an Update object.
53 """
54 raise NotImplementedError
55
56
57 FrontPageRSS = namedtuple('FrontPageRSS', ['source', 'time', 'entry', 'content'])
58 class FeedSource(Source):
59 """Represents an RSS or Atom feed.
60
61 Extra properties:
62
63 ``feed_url``
64 URL for the feed.
65 """
66
67 template = '/front_page/rss.mako'
68
69 SUMMARY_LENGTH = 1000
70
71 def __init__(self, feed_url, **kwargs):
72 kwargs.setdefault('title', None)
73 super(FeedSource, self).__init__(**kwargs)
74
75 self.feed_url = feed_url
76
77 def poll(self, limit, max_age):
78 feed = feedparser.parse(self.feed_url)
79
80 if not self.title:
81 self.title = feed.feed.title
82
83 updates = []
84 for entry in feed.entries[:limit]:
85 # Grab a date -- Atom has published, RSS usually just has updated.
86 # Both come out as time tuples, which datetime.datetime() can read
87 try:
88 timestamp_tuple = entry.published_parsed
89 except AttributeError:
90 timestamp_tuple = entry.updated_parsed
91 timestamp = datetime.datetime(*timestamp_tuple[:6])
92
93 if max_age and timestamp < max_age:
94 # Entries should be oldest-first, so we can bail after the first
95 # expired entry
96 break
97
98 # Try to find something to show! Default to the summary, if there is
99 # one, or try to generate one otherwise
100 content = u''
101 if 'summary' in entry:
102 # If there be a summary, cheerfully trust that it's actually a
103 # summary
104 content = entry.summary
105 elif 'content' in entry:
106 # Full content is way too much, especially for my giant blog posts.
107 # Cut this down to some arbitrary number of characters, then feed
108 # it to lxml.html to fix tag nesting
109 broken_html = entry.content[0].value[:self.SUMMARY_LENGTH]
110 fragment = lxml.html.fromstring(broken_html)
111
112 # Insert an ellipsis at the end of the last node with text
113 last_text_node = None
114 last_tail_node = None
115 # Need to find the last node with a tail, OR the last node with
116 # text if it's later
117 for node in fragment.iter():
118 if node.tail:
119 last_tail_node = node
120 last_text_node = None
121 elif node.text:
122 last_text_node = node
123 last_tail_node = None
124
125 if last_text_node is not None:
126 last_text_node.text += '...'
127 if last_tail_node is not None:
128 last_tail_node.tail += '...'
129
130 # Serialize
131 content = lxml.html.tostring(fragment)
132
133 content = helpers.literal(content)
134
135 update = FrontPageRSS(
136 source = self,
137 time = timestamp,
138 content = content,
139 entry = entry,
140 )
141 updates.append(update)
142
143 return updates
144
145
146 FrontPageGit = namedtuple('FrontPageGit', ['source', 'time', 'log', 'tag'])
147 FrontPageGitCommit = namedtuple('FrontPageGitCommit',
148 ['hash', 'author', 'time', 'subject', 'repo'])
149
150 class GitSource(Source):
151 """Represents a git repository.
152
153 The main repository is checked for annotated tags, and an update is
154 considered to be the list of commits between them. If any other
155 repositories are listed and have the same tags, their commits will be
156 included as well.
157
158 Extra properties:
159
160 ``repo_paths``
161 Space-separated list of repositories. These must be repository PATHS,
162 not arbitrary git URLs. Only the first one will be checked for the
163 list of tags.
164
165 ``repo_names``
166 A list of names for the repositories, in parallel with ``repo_paths``.
167 Used for constructing gitweb URLs and identifying the repositories.
168
169 ``gitweb``
170 Base URL to a gitweb installation, so commit ids can be linked to the
171 commit proper.
172
173 ``tag_pattern``
174 Optional. A shell glob pattern used to filter the tags.
175 """
176
177 template = '/front_page/git.mako'
178
179 def __init__(self, repo_paths, repo_names, gitweb, tag_pattern=None, **kwargs):
180 kwargs.setdefault('title', None)
181 super(GitSource, self).__init__(**kwargs)
182
183 # Repo stuff can be space-delimited lists
184 self.repo_paths = repo_paths.split()
185 self.repo_names = repo_names.split()
186
187 self.gitweb = gitweb
188 self.tag_pattern = tag_pattern
189
190 def poll(self, limit, max_age):
191
192 # Fetch the main repo's git tags
193 git_dir = '--git-dir=' + self.repo_paths[0]
194 args = [
195 'git',
196 git_dir,
197 'tag', '-l',
198 ]
199 if self.tag_pattern:
200 args.append(self.tag_pattern)
201
202 git_output, _ = subprocess.Popen(args, stdout=PIPE).communicate()
203 tags = git_output.strip().split('\n')
204
205 # Tags come out in alphabetical order, which means earliest first. Reverse
206 # it to make the slicing easier
207 tags.reverse()
208 # Only history from tag to tag is actually interesting, so get the most
209 # recent $limit tags but skip the earliest
210 interesting_tags = tags[:-1][:limit]
211
212 updates = []
213 for tag, since_tag in zip(interesting_tags, tags[1:]):
214 # Get the date when this tag was actually created.
215 # 'raw' format gives unixtime followed by timezone offset
216 args = [
217 'git',
218 git_dir,
219 'for-each-ref',
220 '--format=%(taggerdate:raw)',
221 'refs/tags/' + tag,
222 ]
223 tag_timestamp, _ = subprocess.Popen(args, stdout=PIPE).communicate()
224 tag_unixtime, tag_timezone = tag_timestamp.split(None, 1)
225 tagged_timestamp = datetime.datetime.fromtimestamp(int(tag_unixtime))
226
227 if max_age and tagged_timestamp < max_age:
228 break
229
230 commits = []
231
232 for repo_path, repo_name in zip(self.repo_paths, self.repo_names):
233 # Grab an easily-parsed history: fields delimited by nulls.
234 # Hash, author's name, commit timestamp, subject.
235 git_log_args = [
236 'git',
237 '--git-dir=' + repo_path,
238 'log',
239 '--pretty=%h%x00%an%x00%at%x00%s',
240 "{0}..{1}".format(since_tag, tag),
241 ]
242 proc = subprocess.Popen(git_log_args, stdout=PIPE)
243 for line in proc.stdout:
244 hash, author, time, subject = line.strip().split('\x00')
245 commits.append(
246 FrontPageGitCommit(
247 hash = hash,
248 author = author,
249 time = datetime.datetime.fromtimestamp(int(time)),
250 subject = subject,
251 repo = repo_name,
252 )
253 )
254
255 update = FrontPageGit(
256 source = self,
257 time = tagged_timestamp,
258 log = commits,
259 tag = tag,
260 )
261 updates.append(update)
262
263 return updates