minirst: combine list parsing in one function
authorMartin Geisler <mg@lazybytes.net>
Fri, 06 Nov 2009 00:30:35 +0100
changeset 9737 5f101af4a921
parent 9736 26d3ade60fa6
child 9738 f52c4f7a4732
minirst: combine list parsing in one function Bullet, option, field, and definition lists were parsed very similar code. They are now parsed by a single function (splitparagraphs). Some logic from the old parsing functions has been moved down to formatblock. This simplifies the parsing while putting the logic where it's really needed.
mercurial/minirst.py
--- a/mercurial/minirst.py	Thu Nov 05 21:53:22 2009 +0100
+++ b/mercurial/minirst.py	Fri Nov 06 00:30:35 2009 +0100
@@ -106,6 +106,53 @@
         i += 1
     return blocks
 
+_bulletre = re.compile(r'- ')
+_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
+_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
+_definitionre = re.compile(r'[^ ]')
+
+def splitparagraphs(blocks):
+    """Split paragraphs into lists."""
+    # Tuples with (list type, item regexp, single line items?). Order
+    # matters: definition lists has the least specific regexp and must
+    # come last.
+    listtypes = [('bullet', _bulletre, True),
+                 ('option', _optionre, True),
+                 ('field', _fieldre, True),
+                 ('definition', _definitionre, False)]
+
+    def match(lines, i, itemre, singleline):
+        """Does itemre match an item at line i?
+
+        A list item can be followed by an idented line or another list
+        item (but only if singleline is True).
+        """
+        line1 = lines[i]
+        line2 = i+1 < len(lines) and lines[i+1] or ''
+        if not itemre.match(line1):
+            return False
+        if singleline:
+            return line2 == '' or line2[0] == ' ' or itemre.match(line2)
+        else:
+            return line2.startswith(' ')
+
+    i = 0
+    while i < len(blocks):
+        if blocks[i]['type'] == 'paragraph':
+            lines = blocks[i]['lines']
+            for type, itemre, singleline in listtypes:
+                if match(lines, 0, itemre, singleline):
+                    items = []
+                    for j, line in enumerate(lines):
+                        if match(lines, j, itemre, singleline):
+                            items.append(dict(type=type, lines=[],
+                                              indent=blocks[i]['indent']))
+                        items[-1]['lines'].append(line)
+                    blocks[i:i+1] = items
+                    break
+        i += 1
+    return blocks
+
 
 def findsections(blocks):
     """Finds sections.
@@ -127,139 +174,6 @@
     return blocks
 
 
-def findbulletlists(blocks):
-    """Finds bullet lists.
-
-    The blocks must have a 'type' field, i.e., they should have been
-    run through findliteralblocks first.
-    """
-    i = 0
-    while i < len(blocks):
-        # Searching for a paragraph that looks like this:
-        #
-        # +------+-----------------------+
-        # | "- " | list item             |
-        # +------| (body elements)+      |
-        #        +-----------------------+
-        if (blocks[i]['type'] == 'paragraph' and
-            blocks[i]['lines'][0].startswith('- ')):
-            items = []
-            for line in blocks[i]['lines']:
-                if line.startswith('- '):
-                    items.append(dict(type='bullet', lines=[],
-                                      indent=blocks[i]['indent']))
-                    line = line[2:]
-                items[-1]['lines'].append(line)
-            blocks[i:i+1] = items
-            i += len(items) - 1
-        i += 1
-    return blocks
-
-
-_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
-def findoptionlists(blocks):
-    """Finds option lists.
-
-    The blocks must have a 'type' field, i.e., they should have been
-    run through findliteralblocks first.
-    """
-    i = 0
-    while i < len(blocks):
-        # Searching for a paragraph that looks like this:
-        #
-        # +----------------------------+-------------+
-        # | "--" option "  "           | description |
-        # +-------+--------------------+             |
-        #         | (body elements)+                 |
-        #         +----------------------------------+
-        if (blocks[i]['type'] == 'paragraph' and
-            _optionre.match(blocks[i]['lines'][0])):
-            options = []
-            for line in blocks[i]['lines']:
-                m = _optionre.match(line)
-                if m:
-                    option, arg, rest = m.groups()
-                    width = len(option) + len(arg)
-                    options.append(dict(type='option', lines=[],
-                                        indent=blocks[i]['indent'],
-                                        width=width))
-                options[-1]['lines'].append(line)
-            blocks[i:i+1] = options
-            i += len(options) - 1
-        i += 1
-    return blocks
-
-
-_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
-def findfieldlists(blocks):
-    """Finds fields lists.
-
-    The blocks must have a 'type' field, i.e., they should have been
-    run through findliteralblocks first.
-    """
-    i = 0
-    while i < len(blocks):
-        # Searching for a paragraph that looks like this:
-        #
-        #
-        # +--------------------+----------------------+
-        # | ":" field name ":" | field body           |
-        # +-------+------------+                      |
-        #         | (body elements)+                  |
-        #         +-----------------------------------+
-        if (blocks[i]['type'] == 'paragraph' and
-            _fieldre.match(blocks[i]['lines'][0])):
-            indent = blocks[i]['indent']
-            fields = []
-            for line in blocks[i]['lines']:
-                m = _fieldre.match(line)
-                if m:
-                    key, spaces, rest = m.groups()
-                    width = 2 + len(key) + len(spaces)
-                    fields.append(dict(type='field', lines=[],
-                                       indent=indent, width=width))
-                    # Turn ":foo: bar" into "foo   bar".
-                    line = '%s  %s%s' % (key, spaces, rest)
-                fields[-1]['lines'].append(line)
-            blocks[i:i+1] = fields
-            i += len(fields) - 1
-        i += 1
-    return blocks
-
-
-def finddefinitionlists(blocks):
-    """Finds definition lists.
-
-    The blocks must have a 'type' field, i.e., they should have been
-    run through findliteralblocks first.
-    """
-    i = 0
-    while i < len(blocks):
-        # Searching for a paragraph that looks like this:
-        #
-        # +----------------------------+
-        # | term                       |
-        # +--+-------------------------+--+
-        #    | definition                 |
-        #    | (body elements)+           |
-        #    +----------------------------+
-        if (blocks[i]['type'] == 'paragraph' and
-            len(blocks[i]['lines']) > 1 and
-            not blocks[i]['lines'][0].startswith('  ') and
-            blocks[i]['lines'][1].startswith('  ')):
-            definitions = []
-            for line in blocks[i]['lines']:
-                if not line.startswith('  '):
-                    definitions.append(dict(type='definition', lines=[],
-                                            indent=blocks[i]['indent']))
-                definitions[-1]['lines'].append(line)
-                definitions[-1]['hang'] = len(line) - len(line.lstrip())
-            blocks[i:i+1] = definitions
-            i += len(definitions) - 1
-        i += 1
-    return blocks
-
-
 def inlineliterals(blocks):
     for b in blocks:
         if b['type'] == 'paragraph':
@@ -298,19 +212,29 @@
         return indent + ('\n' + indent).join(block['lines'])
     if block['type'] == 'definition':
         term = indent + block['lines'][0]
-        defindent = indent + block['hang'] * ' '
+        hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
+        defindent = indent + hang * ' '
         text = ' '.join(map(str.strip, block['lines'][1:]))
         return "%s\n%s" % (term, textwrap.fill(text, width=width,
                                                initial_indent=defindent,
                                                subsequent_indent=defindent))
     initindent = subindent = indent
-    text = ' '.join(map(str.strip, block['lines']))
     if block['type'] == 'bullet':
-        initindent = indent + '- '
         subindent = indent + '  '
-    elif block['type'] in ('option', 'field'):
-        subindent = indent + block['width'] * ' '
+    elif block['type'] == 'field':
+        m = _fieldre.match(block['lines'][0])
+        if m:
+            key, spaces, rest = m.groups()
+            # Turn ":foo: bar" into "foo   bar".
+            block['lines'][0] = '%s  %s%s' % (key, spaces, rest)
+            subindent = indent + (2 + len(key) + len(spaces)) * ' '
+    elif block['type'] == 'option':
+        m = _optionre.match(block['lines'][0])
+        if m:
+            option, arg, rest = m.groups()
+            subindent = indent + (len(option) + len(arg)) * ' '
 
+    text = ' '.join(map(str.strip, block['lines']))
     return textwrap.fill(text, width=width,
                          initial_indent=initindent,
                          subsequent_indent=subindent)
@@ -323,11 +247,8 @@
         b['indent'] += indent
     blocks = findliteralblocks(blocks)
     blocks = inlineliterals(blocks)
+    blocks = splitparagraphs(blocks)
     blocks = findsections(blocks)
-    blocks = findbulletlists(blocks)
-    blocks = findoptionlists(blocks)
-    blocks = findfieldlists(blocks)
-    blocks = finddefinitionlists(blocks)
     blocks = addmargins(blocks)
     return '\n'.join(formatblock(b, width) for b in blocks)
 
@@ -345,10 +266,7 @@
     text = open(sys.argv[1]).read()
     blocks = debug(findblocks, text)
     blocks = debug(findliteralblocks, blocks)
+    blocks = debug(splitparagraphs, blocks)
     blocks = debug(findsections, blocks)
-    blocks = debug(findbulletlists, blocks)
-    blocks = debug(findoptionlists, blocks)
-    blocks = debug(findfieldlists, blocks)
-    blocks = debug(finddefinitionlists, blocks)
     blocks = debug(addmargins, blocks)
     print '\n'.join(formatblock(b, 30) for b in blocks)