Mercurial > hg-stable
comparison mercurial/minirst.py @ 9156:c9c7e8cdac9c
minimal reStructuredText parser
author | Martin Geisler <mg@lazybytes.net> |
---|---|
date | Thu, 16 Jul 2009 23:25:25 +0200 |
parents | |
children | cd5b6a11b607 |
comparison
equal
deleted
inserted
replaced
9155:b46063eabe98 | 9156:c9c7e8cdac9c |
---|---|
1 # minirst.py - minimal reStructuredText parser | |
2 # | |
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others | |
4 # | |
5 # This software may be used and distributed according to the terms of the | |
6 # GNU General Public License version 2, incorporated herein by reference. | |
7 | |
8 """simplified reStructuredText parser. | |
9 | |
10 This parser knows just enough about reStructuredText to parse the | |
11 Mercurial docstrings. | |
12 | |
13 It cheats in a major way: nested blocks are not really nested. They | |
14 are just indented blocks that look like they are nested. This relies | |
15 on the user to keep the right indentation for the blocks. | |
16 | |
17 It only supports a small subset of reStructuredText: | |
18 | |
19 - paragraphs | |
20 | |
21 - definition lists (must use ' ' to indent definitions) | |
22 | |
23 - lists (items must start with '-') | |
24 | |
25 - literal blocks | |
26 | |
27 - option lists (supports only long options without arguments) | |
28 | |
29 - inline markup is not recognized at all. | |
30 """ | |
31 | |
32 import re, sys, textwrap | |
33 | |
34 | |
35 def findblocks(text): | |
36 """Find continuous blocks of lines in text. | |
37 | |
38 Returns a list of dictionaries representing the blocks. Each block | |
39 has an 'indent' field and a 'lines' field. | |
40 """ | |
41 blocks = [[]] | |
42 lines = text.splitlines() | |
43 for line in lines: | |
44 if line.strip(): | |
45 blocks[-1].append(line) | |
46 elif blocks[-1]: | |
47 blocks.append([]) | |
48 if not blocks[-1]: | |
49 del blocks[-1] | |
50 | |
51 for i, block in enumerate(blocks): | |
52 indent = min((len(l) - len(l.lstrip())) for l in block) | |
53 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block]) | |
54 return blocks | |
55 | |
56 | |
57 def findliteralblocks(blocks): | |
58 """Finds literal blocks and adds a 'type' field to the blocks. | |
59 | |
60 Literal blocks are given the type 'literal', all other blocks are | |
61 given type the 'paragraph'. | |
62 """ | |
63 i = 0 | |
64 while i < len(blocks): | |
65 # Searching for a block that looks like this: | |
66 # | |
67 # +------------------------------+ | |
68 # | paragraph | | |
69 # | (ends with "::") | | |
70 # +------------------------------+ | |
71 # +---------------------------+ | |
72 # | indented literal block | | |
73 # +---------------------------+ | |
74 blocks[i]['type'] = 'paragraph' | |
75 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks): | |
76 indent = blocks[i]['indent'] | |
77 adjustment = blocks[i+1]['indent'] - indent | |
78 | |
79 if blocks[i]['lines'] == ['::']: | |
80 # Expanded form: remove block | |
81 del blocks[i] | |
82 i -= 1 | |
83 elif blocks[i]['lines'][-1].endswith(' ::'): | |
84 # Partially minimized form: remove space and both | |
85 # colons. | |
86 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3] | |
87 else: | |
88 # Fully minimized form: remove just one colon. | |
89 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1] | |
90 | |
91 # List items are formatted with a hanging indent. We must | |
92 # correct for this here while we still have the original | |
93 # information on the indentation of the subsequent literal | |
94 # blocks available. | |
95 if blocks[i]['lines'][0].startswith('- '): | |
96 indent += 2 | |
97 adjustment -= 2 | |
98 | |
99 # Mark the following indented blocks. | |
100 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent: | |
101 blocks[i+1]['type'] = 'literal' | |
102 blocks[i+1]['indent'] -= adjustment | |
103 i += 1 | |
104 i += 1 | |
105 return blocks | |
106 | |
107 | |
108 def findsections(blocks): | |
109 """Finds sections. | |
110 | |
111 The blocks must have a 'type' field, i.e., they should have been | |
112 run through findliteralblocks first. | |
113 """ | |
114 for block in blocks: | |
115 # Searching for a block that looks like this: | |
116 # | |
117 # +------------------------------+ | |
118 # | Section title | | |
119 # | ------------- | | |
120 # +------------------------------+ | |
121 if (block['type'] == 'paragraph' and | |
122 len(block['lines']) == 2 and | |
123 block['lines'][1] == '-' * len(block['lines'][0])): | |
124 block['type'] = 'section' | |
125 return blocks | |
126 | |
127 | |
128 def findbulletlists(blocks): | |
129 """Finds bullet lists. | |
130 | |
131 The blocks must have a 'type' field, i.e., they should have been | |
132 run through findliteralblocks first. | |
133 """ | |
134 i = 0 | |
135 while i < len(blocks): | |
136 # Searching for a paragraph that looks like this: | |
137 # | |
138 # +------+-----------------------+ | |
139 # | "- " | list item | | |
140 # +------| (body elements)+ | | |
141 # +-----------------------+ | |
142 if (blocks[i]['type'] == 'paragraph' and | |
143 blocks[i]['lines'][0].startswith('- ')): | |
144 items = [] | |
145 for line in blocks[i]['lines']: | |
146 if line.startswith('- '): | |
147 items.append(dict(type='bullet', lines=[], | |
148 indent=blocks[i]['indent'] + 2)) | |
149 line = line[2:] | |
150 items[-1]['lines'].append(line) | |
151 blocks[i:i+1] = items | |
152 i += len(items) - 1 | |
153 i += 1 | |
154 return blocks | |
155 | |
156 | |
157 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$') | |
158 def findoptionlists(blocks): | |
159 """Finds option lists. | |
160 | |
161 The blocks must have a 'type' field, i.e., they should have been | |
162 run through findliteralblocks first. | |
163 """ | |
164 i = 0 | |
165 while i < len(blocks): | |
166 # Searching for a paragraph that looks like this: | |
167 # | |
168 # +----------------------------+-------------+ | |
169 # | "--" option " " | description | | |
170 # +-------+--------------------+ | | |
171 # | (body elements)+ | | |
172 # +----------------------------------+ | |
173 if (blocks[i]['type'] == 'paragraph' and | |
174 _optionre.match(blocks[i]['lines'][0])): | |
175 options = [] | |
176 for line in blocks[i]['lines']: | |
177 m = _optionre.match(line) | |
178 if m: | |
179 option, arg, rest = m.groups() | |
180 width = len(option) + len(arg) | |
181 options.append(dict(type='option', lines=[], | |
182 indent=blocks[i]['indent'], | |
183 width=width)) | |
184 options[-1]['lines'].append(line) | |
185 blocks[i:i+1] = options | |
186 i += len(options) - 1 | |
187 i += 1 | |
188 return blocks | |
189 | |
190 | |
191 def finddefinitionlists(blocks): | |
192 """Finds definition lists. | |
193 | |
194 The blocks must have a 'type' field, i.e., they should have been | |
195 run through findliteralblocks first. | |
196 """ | |
197 i = 0 | |
198 while i < len(blocks): | |
199 # Searching for a paragraph that looks like this: | |
200 # | |
201 # +----------------------------+ | |
202 # | term | | |
203 # +--+-------------------------+--+ | |
204 # | definition | | |
205 # | (body elements)+ | | |
206 # +----------------------------+ | |
207 if (blocks[i]['type'] == 'paragraph' and | |
208 len(blocks[i]['lines']) > 1 and | |
209 not blocks[i]['lines'][0].startswith(' ') and | |
210 blocks[i]['lines'][1].startswith(' ')): | |
211 definitions = [] | |
212 for line in blocks[i]['lines']: | |
213 if not line.startswith(' '): | |
214 definitions.append(dict(type='definition', lines=[], | |
215 indent=blocks[i]['indent'])) | |
216 definitions[-1]['lines'].append(line) | |
217 definitions[-1]['hang'] = len(line) - len(line.lstrip()) | |
218 blocks[i:i+1] = definitions | |
219 i += len(definitions) - 1 | |
220 i += 1 | |
221 return blocks | |
222 | |
223 | |
224 def addmargins(blocks): | |
225 """Adds empty blocks for vertical spacing. | |
226 | |
227 This groups bullets, options, and definitions together with no vertical | |
228 space between them, and adds an empty block between all other blocks. | |
229 """ | |
230 i = 1 | |
231 while i < len(blocks): | |
232 if (blocks[i]['type'] == blocks[i-1]['type'] and | |
233 blocks[i]['type'] in ('bullet', 'option', 'definition')): | |
234 i += 1 | |
235 else: | |
236 blocks.insert(i, dict(lines=[''], indent=0, type='margin')) | |
237 i += 2 | |
238 return blocks | |
239 | |
240 | |
241 def formatblock(block, width): | |
242 """Format a block according to width.""" | |
243 indent = ' ' * block['indent'] | |
244 if block['type'] == 'margin': | |
245 return '' | |
246 elif block['type'] in ('literal', 'section'): | |
247 return indent + ('\n' + indent).join(block['lines']) | |
248 elif block['type'] == 'definition': | |
249 term = indent + block['lines'][0] | |
250 defindent = indent + block['hang'] * ' ' | |
251 text = ' '.join(map(str.strip, block['lines'][1:])) | |
252 return "%s\n%s" % (term, textwrap.fill(text, width=width, | |
253 initial_indent=defindent, | |
254 subsequent_indent=defindent)) | |
255 else: | |
256 initindent = subindent = indent | |
257 text = ' '.join(map(str.strip, block['lines'])) | |
258 if block['type'] == 'bullet': | |
259 initindent = indent[:-2] + '- ' | |
260 subindent = indent | |
261 elif block['type'] == 'option': | |
262 subindent = indent + block['width'] * ' ' | |
263 | |
264 return textwrap.fill(text, width=width, | |
265 initial_indent=initindent, | |
266 subsequent_indent=subindent) | |
267 | |
268 | |
269 def format(text, width): | |
270 """Parse and format the text according to width.""" | |
271 blocks = findblocks(text) | |
272 blocks = findliteralblocks(blocks) | |
273 blocks = findsections(blocks) | |
274 blocks = findbulletlists(blocks) | |
275 blocks = findoptionlists(blocks) | |
276 blocks = finddefinitionlists(blocks) | |
277 blocks = addmargins(blocks) | |
278 return '\n'.join(formatblock(b, width) for b in blocks) | |
279 | |
280 | |
281 if __name__ == "__main__": | |
282 from pprint import pprint | |
283 | |
284 def debug(func, blocks): | |
285 blocks = func(blocks) | |
286 print "*** after %s:" % func.__name__ | |
287 pprint(blocks) | |
288 print | |
289 return blocks | |
290 | |
291 text = open(sys.argv[1]).read() | |
292 blocks = debug(findblocks, text) | |
293 blocks = debug(findliteralblocks, blocks) | |
294 blocks = debug(findsections, blocks) | |
295 blocks = debug(findbulletlists, blocks) | |
296 blocks = debug(findoptionlists, blocks) | |
297 blocks = debug(finddefinitionlists, blocks) | |
298 blocks = debug(addmargins, blocks) | |
299 print '\n'.join(formatblock(b, 30) for b in blocks) |