Feeds without summaries now have them auto-generated.
[zzz-spline-frontpage.git] / splinext / frontpage / __init__.py
1 from collections import namedtuple
2 import datetime
3 from pkg_resources import resource_filename
4 import subprocess
5
6 import feedparser
7 import lxml.html
8
9 from spline.lib import helpers
10 from spline.lib.plugin import PluginBase, PluginLink, Priority
11
12 import splinext.frontpage.controllers.frontpage
13
14 class FrontPageUpdate(object):
15 """Base class ('interface') for an updated thing that may appear on the
16 front page.
17
18 Subclasses should implement the `time` and `template` properties.
19 """
20 pass
21
22
23 RSS_SUMMARY_LENGTH = 1000
24
25 FrontPageRSS = namedtuple('FrontPageRSS',
26 ['time', 'entry', 'template', 'category', 'content', 'icon'])
27
28 def rss_hook(limit, url, title=None, icon=None):
29 """Front page handler for news feeds."""
30 feed = feedparser.parse(url)
31
32 if not title:
33 title = feed.feed.title
34
35 updates = []
36 for entry in feed.entries[:limit]:
37 # Grab a date -- Atom has published, RSS usually just has updated.
38 # Both come out as time tuples, which datetime.datetime() can read
39 try:
40 timestamp_tuple = entry.published_parsed
41 except AttributeError:
42 timestamp_tuple = entry.updated_parsed
43 timestamp = datetime.datetime(*timestamp_tuple[:6])
44
45 # Try to find something to show! Default to the summary, if there is
46 # one, or try to generate one otherwise
47 content = u''
48 if 'summary' in entry:
49 # If there be a summary, cheerfully trust that it's actually a
50 # summary
51 content = entry.summary
52 elif 'content' in entry:
53 # Full content is way too much, especially for my giant blog posts.
54 # Cut this down to some arbitrary number of characters, then feed
55 # it to lxml.html to fix tag nesting
56 broken_html = entry.content[0].value[:RSS_SUMMARY_LENGTH]
57 fragment = lxml.html.fromstring(broken_html)
58
59 # Insert an ellipsis at the end of the last node with text
60 last_text_node = None
61 last_tail_node = None
62 # Need to find the last node with a tail, OR the last node with
63 # text if it's later
64 for node in fragment.iter():
65 if node.tail:
66 last_tail_node = node
67 last_text_node = None
68 elif node.text:
69 last_text_node = node
70 last_tail_node = None
71
72 if last_text_node is not None:
73 last_text_node.text += '...'
74 if last_tail_node is not None:
75 last_tail_node.tail += '...'
76
77 # Serialize
78 content = lxml.html.tostring(fragment)
79
80 content = helpers.literal(content)
81
82 update = FrontPageRSS(
83 time = timestamp,
84 entry = entry,
85 template = '/front_page/rss.mako',
86 category = title,
87 content = content,
88 icon = icon,
89 )
90 updates.append(update)
91
92 return updates
93
94
95 FrontPageGit = namedtuple('FrontPageGit',
96 ['time', 'gitweb', 'log', 'tag', 'template', 'category', 'icon'])
97 FrontPageGitCommit = namedtuple('FrontPageGitCommit',
98 ['hash', 'author', 'time', 'subject', 'repo'])
99
100 def git_hook(limit, title, gitweb, repo_paths, repo_names,
101 tag_pattern=None, icon=None):
102
103 """Front page handler for repository history."""
104 # Repo stuff can be space-delimited lists...
105 repo_paths = repo_paths.split()
106 repo_names = repo_names.split()
107
108 # Fetch the main repo's git tags
109 args = [
110 'git',
111 '--git-dir=' + repo_paths[0],
112 'tag', '-l',
113 ]
114 if tag_pattern:
115 args.append(tag_pattern)
116
117 proc = subprocess.Popen(args, stdout=subprocess.PIPE)
118 git_output, _ = proc.communicate()
119 tags = git_output.strip().split('\n')
120
121 # Tags come out in alphabetical order, which means earliest first. Reverse
122 # it to make the slicing easier
123 tags.reverse()
124 # Only history from tag to tag is actually interesting, so get the most
125 # recent $limit tags but skip the earliest
126 interesting_tags = tags[:-1][:limit]
127
128 updates = []
129 for tag, since_tag in zip(interesting_tags, tags[1:]):
130 commits = []
131
132 for repo_path, repo_name in zip(repo_paths, repo_names):
133 # Grab an easily-parsed history: fields delimited by nulls.
134 # Hash, author's name, commit timestamp, subject.
135 git_log_args = [
136 'git',
137 '--git-dir=' + repo_path,
138 'log',
139 '--pretty=%h%x00%an%x00%at%x00%s',
140 "{0}..{1}".format(since_tag, tag),
141 ]
142 proc = subprocess.Popen(git_log_args, stdout=subprocess.PIPE)
143 for line in proc.stdout:
144 hash, author, time, subject = line.strip().split('\x00')
145 commits.append(
146 FrontPageGitCommit(
147 hash = hash,
148 author = author,
149 time = datetime.datetime.fromtimestamp(int(time)),
150 subject = subject,
151 repo = repo_name,
152 )
153 )
154
155 # LASTLY, get the date when this tag was actually created
156 args = [
157 'git',
158 'for-each-ref',
159 '--format=%(taggerdate:raw)',
160 'refs/tags/' + tag,
161 ]
162 tag_timestamp, _ = subprocess.Popen(args, stdout=subprocess.PIPE) \
163 .communicate()
164 tag_unixtime, tag_timezone = tag_timestamp.split(None, 1)
165
166 update = FrontPageGit(
167 time = datetime.datetime.fromtimestamp(int(tag_unixtime)),
168 gitweb = gitweb,
169 log = commits,
170 template = '/front_page/git.mako',
171 category = title,
172 tag = tag,
173 icon = icon,
174 )
175 updates.append(update)
176
177 return updates
178
179
180 def add_routes_hook(map, *args, **kwargs):
181 """Hook to inject some of our behavior into the routes configuration."""
182 map.connect('/', controller='frontpage', action='index')
183
184
185 class FrontPagePlugin(PluginBase):
186 def controllers(self):
187 return dict(
188 frontpage = splinext.frontpage.controllers.frontpage.FrontPageController,
189 )
190
191 def template_dirs(self):
192 return [
193 (resource_filename(__name__, 'templates'), Priority.FIRST)
194 ]
195
196 def hooks(self):
197 return [
198 ('routes_mapping', Priority.NORMAL, add_routes_hook),
199 ('frontpage_updates_rss', Priority.NORMAL, rss_hook),
200 ('frontpage_updates_git', Priority.NORMAL, git_hook),
201 ]