Mercurial > hg
changeset 9737:5f101af4a921
minirst: combine list parsing in one function
Bullet, option, field, and definition lists were parsed very similar
code. They are now parsed by a single function (splitparagraphs).
Some logic from the old parsing functions has been moved down to
formatblock. This simplifies the parsing while putting the logic where
it's really needed.
author | Martin Geisler <mg@lazybytes.net> |
---|---|
date | Fri, 06 Nov 2009 00:30:35 +0100 |
parents | 26d3ade60fa6 |
children | f52c4f7a4732 |
files | mercurial/minirst.py |
diffstat | 1 files changed, 64 insertions(+), 146 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/minirst.py Thu Nov 05 21:53:22 2009 +0100 +++ b/mercurial/minirst.py Fri Nov 06 00:30:35 2009 +0100 @@ -106,6 +106,53 @@ i += 1 return blocks +_bulletre = re.compile(r'- ') +_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$') +_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)') +_definitionre = re.compile(r'[^ ]') + +def splitparagraphs(blocks): + """Split paragraphs into lists.""" + # Tuples with (list type, item regexp, single line items?). Order + # matters: definition lists has the least specific regexp and must + # come last. + listtypes = [('bullet', _bulletre, True), + ('option', _optionre, True), + ('field', _fieldre, True), + ('definition', _definitionre, False)] + + def match(lines, i, itemre, singleline): + """Does itemre match an item at line i? + + A list item can be followed by an idented line or another list + item (but only if singleline is True). + """ + line1 = lines[i] + line2 = i+1 < len(lines) and lines[i+1] or '' + if not itemre.match(line1): + return False + if singleline: + return line2 == '' or line2[0] == ' ' or itemre.match(line2) + else: + return line2.startswith(' ') + + i = 0 + while i < len(blocks): + if blocks[i]['type'] == 'paragraph': + lines = blocks[i]['lines'] + for type, itemre, singleline in listtypes: + if match(lines, 0, itemre, singleline): + items = [] + for j, line in enumerate(lines): + if match(lines, j, itemre, singleline): + items.append(dict(type=type, lines=[], + indent=blocks[i]['indent'])) + items[-1]['lines'].append(line) + blocks[i:i+1] = items + break + i += 1 + return blocks + def findsections(blocks): """Finds sections. @@ -127,139 +174,6 @@ return blocks -def findbulletlists(blocks): - """Finds bullet lists. - - The blocks must have a 'type' field, i.e., they should have been - run through findliteralblocks first. - """ - i = 0 - while i < len(blocks): - # Searching for a paragraph that looks like this: - # - # +------+-----------------------+ - # | "- " | list item | - # +------| (body elements)+ | - # +-----------------------+ - if (blocks[i]['type'] == 'paragraph' and - blocks[i]['lines'][0].startswith('- ')): - items = [] - for line in blocks[i]['lines']: - if line.startswith('- '): - items.append(dict(type='bullet', lines=[], - indent=blocks[i]['indent'])) - line = line[2:] - items[-1]['lines'].append(line) - blocks[i:i+1] = items - i += len(items) - 1 - i += 1 - return blocks - - -_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$') -def findoptionlists(blocks): - """Finds option lists. - - The blocks must have a 'type' field, i.e., they should have been - run through findliteralblocks first. - """ - i = 0 - while i < len(blocks): - # Searching for a paragraph that looks like this: - # - # +----------------------------+-------------+ - # | "--" option " " | description | - # +-------+--------------------+ | - # | (body elements)+ | - # +----------------------------------+ - if (blocks[i]['type'] == 'paragraph' and - _optionre.match(blocks[i]['lines'][0])): - options = [] - for line in blocks[i]['lines']: - m = _optionre.match(line) - if m: - option, arg, rest = m.groups() - width = len(option) + len(arg) - options.append(dict(type='option', lines=[], - indent=blocks[i]['indent'], - width=width)) - options[-1]['lines'].append(line) - blocks[i:i+1] = options - i += len(options) - 1 - i += 1 - return blocks - - -_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)') -def findfieldlists(blocks): - """Finds fields lists. - - The blocks must have a 'type' field, i.e., they should have been - run through findliteralblocks first. - """ - i = 0 - while i < len(blocks): - # Searching for a paragraph that looks like this: - # - # - # +--------------------+----------------------+ - # | ":" field name ":" | field body | - # +-------+------------+ | - # | (body elements)+ | - # +-----------------------------------+ - if (blocks[i]['type'] == 'paragraph' and - _fieldre.match(blocks[i]['lines'][0])): - indent = blocks[i]['indent'] - fields = [] - for line in blocks[i]['lines']: - m = _fieldre.match(line) - if m: - key, spaces, rest = m.groups() - width = 2 + len(key) + len(spaces) - fields.append(dict(type='field', lines=[], - indent=indent, width=width)) - # Turn ":foo: bar" into "foo bar". - line = '%s %s%s' % (key, spaces, rest) - fields[-1]['lines'].append(line) - blocks[i:i+1] = fields - i += len(fields) - 1 - i += 1 - return blocks - - -def finddefinitionlists(blocks): - """Finds definition lists. - - The blocks must have a 'type' field, i.e., they should have been - run through findliteralblocks first. - """ - i = 0 - while i < len(blocks): - # Searching for a paragraph that looks like this: - # - # +----------------------------+ - # | term | - # +--+-------------------------+--+ - # | definition | - # | (body elements)+ | - # +----------------------------+ - if (blocks[i]['type'] == 'paragraph' and - len(blocks[i]['lines']) > 1 and - not blocks[i]['lines'][0].startswith(' ') and - blocks[i]['lines'][1].startswith(' ')): - definitions = [] - for line in blocks[i]['lines']: - if not line.startswith(' '): - definitions.append(dict(type='definition', lines=[], - indent=blocks[i]['indent'])) - definitions[-1]['lines'].append(line) - definitions[-1]['hang'] = len(line) - len(line.lstrip()) - blocks[i:i+1] = definitions - i += len(definitions) - 1 - i += 1 - return blocks - - def inlineliterals(blocks): for b in blocks: if b['type'] == 'paragraph': @@ -298,19 +212,29 @@ return indent + ('\n' + indent).join(block['lines']) if block['type'] == 'definition': term = indent + block['lines'][0] - defindent = indent + block['hang'] * ' ' + hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip()) + defindent = indent + hang * ' ' text = ' '.join(map(str.strip, block['lines'][1:])) return "%s\n%s" % (term, textwrap.fill(text, width=width, initial_indent=defindent, subsequent_indent=defindent)) initindent = subindent = indent - text = ' '.join(map(str.strip, block['lines'])) if block['type'] == 'bullet': - initindent = indent + '- ' subindent = indent + ' ' - elif block['type'] in ('option', 'field'): - subindent = indent + block['width'] * ' ' + elif block['type'] == 'field': + m = _fieldre.match(block['lines'][0]) + if m: + key, spaces, rest = m.groups() + # Turn ":foo: bar" into "foo bar". + block['lines'][0] = '%s %s%s' % (key, spaces, rest) + subindent = indent + (2 + len(key) + len(spaces)) * ' ' + elif block['type'] == 'option': + m = _optionre.match(block['lines'][0]) + if m: + option, arg, rest = m.groups() + subindent = indent + (len(option) + len(arg)) * ' ' + text = ' '.join(map(str.strip, block['lines'])) return textwrap.fill(text, width=width, initial_indent=initindent, subsequent_indent=subindent) @@ -323,11 +247,8 @@ b['indent'] += indent blocks = findliteralblocks(blocks) blocks = inlineliterals(blocks) + blocks = splitparagraphs(blocks) blocks = findsections(blocks) - blocks = findbulletlists(blocks) - blocks = findoptionlists(blocks) - blocks = findfieldlists(blocks) - blocks = finddefinitionlists(blocks) blocks = addmargins(blocks) return '\n'.join(formatblock(b, width) for b in blocks) @@ -345,10 +266,7 @@ text = open(sys.argv[1]).read() blocks = debug(findblocks, text) blocks = debug(findliteralblocks, blocks) + blocks = debug(splitparagraphs, blocks) blocks = debug(findsections, blocks) - blocks = debug(findbulletlists, blocks) - blocks = debug(findoptionlists, blocks) - blocks = debug(findfieldlists, blocks) - blocks = debug(finddefinitionlists, blocks) blocks = debug(addmargins, blocks) print '\n'.join(formatblock(b, 30) for b in blocks)