Add script that changes Markdown links to use identifiers
[zzz-pokedex.git] / scripts / markdown-identifiers.py
1 # Encoding: UTF-8
2 """Rewrite Markdown strings to use identifiers instead of names
3
4 This is an unmaintained one-shot script, only included in the repo for reference.
5
6 """
7
8 from functools import partial
9 import sys
10 import re
11
12 from sqlalchemy.orm.exc import MultipleResultsFound
13 from sqlalchemy.sql.expression import func
14
15 from pokedex.db import connect, tables
16
17 sanity_re = re.compile(ur"^[-A-Za-z0-9 é\[\]{}.%':;,×/()\"|–`—!*♂♀\\]$")
18
19 # RE that matches anything that might look like a link
20 fuzzy_link_re = re.compile(r"""
21 \[
22 [^]]+
23 \]
24 \{
25 [^}]+
26 \}""", re.VERBOSE)
27
28 # Very specific RE that matches links that appear in our Markdown strings
29 strict_link_re = re.compile(r"""
30 \[
31 (?P<label>
32 [-A-Za-z 0-9'.]{,30}
33 )
34 \]
35 \{
36 (?P<category>
37 [a-z]{,20}
38 )
39 (
40 :
41 (?P<target>
42 [A-Za-z 0-9]{,20}
43 )
44 )?
45 \}
46 """, re.VERBOSE)
47
48 english_id = 9
49
50 def is_md_col(column):
51 return column.info.get('format') == 'markdown'
52
53 manual_replacements = {
54 (
55 u'Used in battle\n: Attempts to [catch]{mechanic} a wild Pok\xe9mon, using a catch rate of 1.5\xd7.\n\nThis item can only be used in the [Great Marsh]{location} or [Safari Zone]{location}.',
56 u'[Safari Zone]{location}',
57 ): 'in a Safari Zone',
58 (
59 u'Used outside of battle\n: Transports the trainer to the last-entered dungeon entrance. Cannot be used outside, in buildings, or in [Distortion World]{location}, [Hall of Origin]{location}, [Spear Pillar]{location}, or [Turnback Cave]{location}.',
60 u'[Hall of Origin]{location}',
61 ): '[Hall of Origin]{location:hall-of-origin-1}',
62 (
63 u'Give to the [Wingull]{pokemon} on [Route 13]{location}, along with [Gram 2]{item} and [Gram 3]{item}, to receive [TM89]{item}.',
64 u'[Route 13]{location}',
65 ): u'[Route 13]{location:unova-route-13}',
66 (
67 u'Give to the [Wingull]{pokemon} on [Route 13]{location}, along with [Gram 1]{item} and [Gram 3]{item}, to receive [TM89]{item}.',
68 u'[Route 13]{location}',
69 ): u'[Route 13]{location:unova-route-13}',
70 (
71 u'Give to the [Wingull]{pokemon} on [Route 13]{location}, along with [Gram 1]{item} and [Gram 2]{item}, to receive [TM89]{item}.',
72 u'[Route 13]{location}',
73 ): u'[Route 13]{location:unova-route-13}',
74 (
75 u"Forms have different stats and movepools. In Generation III, Deoxys's form depends on the game: Normal Forme in Ruby and Sapphire, Attack Forme in FireRed, Defense Forme in LeafGreen, and Speed Forme in Emerald. In Generation IV, every form exists: form is preserved when transferring via [Pal Park]{location}, and meteorites in the southeast corner of [Veilstone City]{location} or at the west end of [Route 3]{location} can be used to switch between forms.",
76 u'[Route 3]{location}',
77 ): u'[Route 3]{location:kanto-route-13}',
78 }
79
80 def get_replacement(session, entire_text, matchobj):
81 print "%-30s" % matchobj.group(0),
82 label = matchobj.group('label')
83 category = matchobj.group('category')
84 target = matchobj.group('target') or label
85 try:
86 result = manual_replacements[entire_text, matchobj.group(0)]
87 except KeyError:
88 if category == 'mechanic':
89 target = target.lower()
90 else:
91 query = None
92 if category == 'item':
93 table = tables.Item
94 elif category == 'ability':
95 table = tables.Ability
96 elif category == 'move':
97 table = tables.Move
98 elif category == 'type':
99 table = tables.Type
100 elif category == 'pokemon':
101 table = tables.Pokemon
102 query = session.query(table).filter(tables.Pokemon.id < 10000)
103 elif category == 'location':
104 table = tables.Location
105 else:
106 print
107 print repr(entire_text)
108 print repr(matchobj.group(0))
109 raise ValueError('Category %s not implemented' % category)
110 if not query:
111 query = session.query(table)
112 query = query.join(table.names_local)
113 query = query.filter(func.lower(table.names_table.name) == target.lower())
114 try:
115 thingy = query.one()
116 target = thingy.identifier
117 except:
118 print
119 print repr(entire_text)
120 print repr(matchobj.group(0))
121 raise
122 result = "[%s]{%s:%s}" % (label, category, target)
123 print result
124 return result
125
126 def main(argv):
127 session = connect()
128 for cls in tables.mapped_classes:
129 for translation_class in cls.translation_classes:
130 columns = translation_class.__table__.c
131 md_columns = [c for c in columns if c.info.get('format') == 'markdown']
132 if not md_columns:
133 continue
134 for row in session.query(translation_class):
135 if row.local_language_id != english_id:
136 continue
137 for column in md_columns:
138 markdown = getattr(row, column.name)
139 if not markdown:
140 continue
141 text = unicode(markdown)
142 # Make sure everything that remotely looks like a link is one
143 links = fuzzy_link_re.findall(text)
144 if not links:
145 continue
146 for link in links:
147 assert strict_link_re.findall(link), [link]
148 # Do the replacement
149 replaced = strict_link_re.sub(
150 partial(get_replacement, session, text),
151 text,
152 )
153 setattr(row, column.name, replaced)
154
155 if argv and argv[0] == '--commit':
156 session.commit()
157 print 'Committed'
158 else:
159 print 'Run with --commit to commit changes'
160
161 if __name__ == '__main__':
162 main(sys.argv[1:])