comparison mercurial/minirst.py @ 9156:c9c7e8cdac9c

minimal reStructuredText parser
author Martin Geisler <mg@lazybytes.net>
date Thu, 16 Jul 2009 23:25:25 +0200
parents
children cd5b6a11b607
comparison
equal deleted inserted replaced
9155:b46063eabe98 9156:c9c7e8cdac9c
1 # minirst.py - minimal reStructuredText parser
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
7
8 """simplified reStructuredText parser.
9
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
12
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
16
17 It only supports a small subset of reStructuredText:
18
19 - paragraphs
20
21 - definition lists (must use ' ' to indent definitions)
22
23 - lists (items must start with '-')
24
25 - literal blocks
26
27 - option lists (supports only long options without arguments)
28
29 - inline markup is not recognized at all.
30 """
31
32 import re, sys, textwrap
33
34
35 def findblocks(text):
36 """Find continuous blocks of lines in text.
37
38 Returns a list of dictionaries representing the blocks. Each block
39 has an 'indent' field and a 'lines' field.
40 """
41 blocks = [[]]
42 lines = text.splitlines()
43 for line in lines:
44 if line.strip():
45 blocks[-1].append(line)
46 elif blocks[-1]:
47 blocks.append([])
48 if not blocks[-1]:
49 del blocks[-1]
50
51 for i, block in enumerate(blocks):
52 indent = min((len(l) - len(l.lstrip())) for l in block)
53 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
54 return blocks
55
56
57 def findliteralblocks(blocks):
58 """Finds literal blocks and adds a 'type' field to the blocks.
59
60 Literal blocks are given the type 'literal', all other blocks are
61 given type the 'paragraph'.
62 """
63 i = 0
64 while i < len(blocks):
65 # Searching for a block that looks like this:
66 #
67 # +------------------------------+
68 # | paragraph |
69 # | (ends with "::") |
70 # +------------------------------+
71 # +---------------------------+
72 # | indented literal block |
73 # +---------------------------+
74 blocks[i]['type'] = 'paragraph'
75 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
76 indent = blocks[i]['indent']
77 adjustment = blocks[i+1]['indent'] - indent
78
79 if blocks[i]['lines'] == ['::']:
80 # Expanded form: remove block
81 del blocks[i]
82 i -= 1
83 elif blocks[i]['lines'][-1].endswith(' ::'):
84 # Partially minimized form: remove space and both
85 # colons.
86 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
87 else:
88 # Fully minimized form: remove just one colon.
89 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
90
91 # List items are formatted with a hanging indent. We must
92 # correct for this here while we still have the original
93 # information on the indentation of the subsequent literal
94 # blocks available.
95 if blocks[i]['lines'][0].startswith('- '):
96 indent += 2
97 adjustment -= 2
98
99 # Mark the following indented blocks.
100 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
101 blocks[i+1]['type'] = 'literal'
102 blocks[i+1]['indent'] -= adjustment
103 i += 1
104 i += 1
105 return blocks
106
107
108 def findsections(blocks):
109 """Finds sections.
110
111 The blocks must have a 'type' field, i.e., they should have been
112 run through findliteralblocks first.
113 """
114 for block in blocks:
115 # Searching for a block that looks like this:
116 #
117 # +------------------------------+
118 # | Section title |
119 # | ------------- |
120 # +------------------------------+
121 if (block['type'] == 'paragraph' and
122 len(block['lines']) == 2 and
123 block['lines'][1] == '-' * len(block['lines'][0])):
124 block['type'] = 'section'
125 return blocks
126
127
128 def findbulletlists(blocks):
129 """Finds bullet lists.
130
131 The blocks must have a 'type' field, i.e., they should have been
132 run through findliteralblocks first.
133 """
134 i = 0
135 while i < len(blocks):
136 # Searching for a paragraph that looks like this:
137 #
138 # +------+-----------------------+
139 # | "- " | list item |
140 # +------| (body elements)+ |
141 # +-----------------------+
142 if (blocks[i]['type'] == 'paragraph' and
143 blocks[i]['lines'][0].startswith('- ')):
144 items = []
145 for line in blocks[i]['lines']:
146 if line.startswith('- '):
147 items.append(dict(type='bullet', lines=[],
148 indent=blocks[i]['indent'] + 2))
149 line = line[2:]
150 items[-1]['lines'].append(line)
151 blocks[i:i+1] = items
152 i += len(items) - 1
153 i += 1
154 return blocks
155
156
157 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
158 def findoptionlists(blocks):
159 """Finds option lists.
160
161 The blocks must have a 'type' field, i.e., they should have been
162 run through findliteralblocks first.
163 """
164 i = 0
165 while i < len(blocks):
166 # Searching for a paragraph that looks like this:
167 #
168 # +----------------------------+-------------+
169 # | "--" option " " | description |
170 # +-------+--------------------+ |
171 # | (body elements)+ |
172 # +----------------------------------+
173 if (blocks[i]['type'] == 'paragraph' and
174 _optionre.match(blocks[i]['lines'][0])):
175 options = []
176 for line in blocks[i]['lines']:
177 m = _optionre.match(line)
178 if m:
179 option, arg, rest = m.groups()
180 width = len(option) + len(arg)
181 options.append(dict(type='option', lines=[],
182 indent=blocks[i]['indent'],
183 width=width))
184 options[-1]['lines'].append(line)
185 blocks[i:i+1] = options
186 i += len(options) - 1
187 i += 1
188 return blocks
189
190
191 def finddefinitionlists(blocks):
192 """Finds definition lists.
193
194 The blocks must have a 'type' field, i.e., they should have been
195 run through findliteralblocks first.
196 """
197 i = 0
198 while i < len(blocks):
199 # Searching for a paragraph that looks like this:
200 #
201 # +----------------------------+
202 # | term |
203 # +--+-------------------------+--+
204 # | definition |
205 # | (body elements)+ |
206 # +----------------------------+
207 if (blocks[i]['type'] == 'paragraph' and
208 len(blocks[i]['lines']) > 1 and
209 not blocks[i]['lines'][0].startswith(' ') and
210 blocks[i]['lines'][1].startswith(' ')):
211 definitions = []
212 for line in blocks[i]['lines']:
213 if not line.startswith(' '):
214 definitions.append(dict(type='definition', lines=[],
215 indent=blocks[i]['indent']))
216 definitions[-1]['lines'].append(line)
217 definitions[-1]['hang'] = len(line) - len(line.lstrip())
218 blocks[i:i+1] = definitions
219 i += len(definitions) - 1
220 i += 1
221 return blocks
222
223
224 def addmargins(blocks):
225 """Adds empty blocks for vertical spacing.
226
227 This groups bullets, options, and definitions together with no vertical
228 space between them, and adds an empty block between all other blocks.
229 """
230 i = 1
231 while i < len(blocks):
232 if (blocks[i]['type'] == blocks[i-1]['type'] and
233 blocks[i]['type'] in ('bullet', 'option', 'definition')):
234 i += 1
235 else:
236 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
237 i += 2
238 return blocks
239
240
241 def formatblock(block, width):
242 """Format a block according to width."""
243 indent = ' ' * block['indent']
244 if block['type'] == 'margin':
245 return ''
246 elif block['type'] in ('literal', 'section'):
247 return indent + ('\n' + indent).join(block['lines'])
248 elif block['type'] == 'definition':
249 term = indent + block['lines'][0]
250 defindent = indent + block['hang'] * ' '
251 text = ' '.join(map(str.strip, block['lines'][1:]))
252 return "%s\n%s" % (term, textwrap.fill(text, width=width,
253 initial_indent=defindent,
254 subsequent_indent=defindent))
255 else:
256 initindent = subindent = indent
257 text = ' '.join(map(str.strip, block['lines']))
258 if block['type'] == 'bullet':
259 initindent = indent[:-2] + '- '
260 subindent = indent
261 elif block['type'] == 'option':
262 subindent = indent + block['width'] * ' '
263
264 return textwrap.fill(text, width=width,
265 initial_indent=initindent,
266 subsequent_indent=subindent)
267
268
269 def format(text, width):
270 """Parse and format the text according to width."""
271 blocks = findblocks(text)
272 blocks = findliteralblocks(blocks)
273 blocks = findsections(blocks)
274 blocks = findbulletlists(blocks)
275 blocks = findoptionlists(blocks)
276 blocks = finddefinitionlists(blocks)
277 blocks = addmargins(blocks)
278 return '\n'.join(formatblock(b, width) for b in blocks)
279
280
281 if __name__ == "__main__":
282 from pprint import pprint
283
284 def debug(func, blocks):
285 blocks = func(blocks)
286 print "*** after %s:" % func.__name__
287 pprint(blocks)
288 print
289 return blocks
290
291 text = open(sys.argv[1]).read()
292 blocks = debug(findblocks, text)
293 blocks = debug(findliteralblocks, blocks)
294 blocks = debug(findsections, blocks)
295 blocks = debug(findbulletlists, blocks)
296 blocks = debug(findoptionlists, blocks)
297 blocks = debug(finddefinitionlists, blocks)
298 blocks = debug(addmargins, blocks)
299 print '\n'.join(formatblock(b, 30) for b in blocks)