--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/minirst.py Thu Jul 16 23:25:25 2009 +0200
@@ -0,0 +1,299 @@
+# minirst.py - minimal reStructuredText parser
+#
+# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2, incorporated herein by reference.
+
+"""simplified reStructuredText parser.
+
+This parser knows just enough about reStructuredText to parse the
+Mercurial docstrings.
+
+It cheats in a major way: nested blocks are not really nested. They
+are just indented blocks that look like they are nested. This relies
+on the user to keep the right indentation for the blocks.
+
+It only supports a small subset of reStructuredText:
+
+- paragraphs
+
+- definition lists (must use ' ' to indent definitions)
+
+- lists (items must start with '-')
+
+- literal blocks
+
+- option lists (supports only long options without arguments)
+
+- inline markup is not recognized at all.
+"""
+
+import re, sys, textwrap
+
+
+def findblocks(text):
+ """Find continuous blocks of lines in text.
+
+ Returns a list of dictionaries representing the blocks. Each block
+ has an 'indent' field and a 'lines' field.
+ """
+ blocks = [[]]
+ lines = text.splitlines()
+ for line in lines:
+ if line.strip():
+ blocks[-1].append(line)
+ elif blocks[-1]:
+ blocks.append([])
+ if not blocks[-1]:
+ del blocks[-1]
+
+ for i, block in enumerate(blocks):
+ indent = min((len(l) - len(l.lstrip())) for l in block)
+ blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
+ return blocks
+
+
+def findliteralblocks(blocks):
+ """Finds literal blocks and adds a 'type' field to the blocks.
+
+ Literal blocks are given the type 'literal', all other blocks are
+ given type the 'paragraph'.
+ """
+ i = 0
+ while i < len(blocks):
+ # Searching for a block that looks like this:
+ #
+ # +------------------------------+
+ # | paragraph |
+ # | (ends with "::") |
+ # +------------------------------+
+ # +---------------------------+
+ # | indented literal block |
+ # +---------------------------+
+ blocks[i]['type'] = 'paragraph'
+ if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
+ indent = blocks[i]['indent']
+ adjustment = blocks[i+1]['indent'] - indent
+
+ if blocks[i]['lines'] == ['::']:
+ # Expanded form: remove block
+ del blocks[i]
+ i -= 1
+ elif blocks[i]['lines'][-1].endswith(' ::'):
+ # Partially minimized form: remove space and both
+ # colons.
+ blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
+ else:
+ # Fully minimized form: remove just one colon.
+ blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
+
+ # List items are formatted with a hanging indent. We must
+ # correct for this here while we still have the original
+ # information on the indentation of the subsequent literal
+ # blocks available.
+ if blocks[i]['lines'][0].startswith('- '):
+ indent += 2
+ adjustment -= 2
+
+ # Mark the following indented blocks.
+ while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
+ blocks[i+1]['type'] = 'literal'
+ blocks[i+1]['indent'] -= adjustment
+ i += 1
+ i += 1
+ return blocks
+
+
+def findsections(blocks):
+ """Finds sections.
+
+ The blocks must have a 'type' field, i.e., they should have been
+ run through findliteralblocks first.
+ """
+ for block in blocks:
+ # Searching for a block that looks like this:
+ #
+ # +------------------------------+
+ # | Section title |
+ # | ------------- |
+ # +------------------------------+
+ if (block['type'] == 'paragraph' and
+ len(block['lines']) == 2 and
+ block['lines'][1] == '-' * len(block['lines'][0])):
+ block['type'] = 'section'
+ return blocks
+
+
+def findbulletlists(blocks):
+ """Finds bullet lists.
+
+ The blocks must have a 'type' field, i.e., they should have been
+ run through findliteralblocks first.
+ """
+ i = 0
+ while i < len(blocks):
+ # Searching for a paragraph that looks like this:
+ #
+ # +------+-----------------------+
+ # | "- " | list item |
+ # +------| (body elements)+ |
+ # +-----------------------+
+ if (blocks[i]['type'] == 'paragraph' and
+ blocks[i]['lines'][0].startswith('- ')):
+ items = []
+ for line in blocks[i]['lines']:
+ if line.startswith('- '):
+ items.append(dict(type='bullet', lines=[],
+ indent=blocks[i]['indent'] + 2))
+ line = line[2:]
+ items[-1]['lines'].append(line)
+ blocks[i:i+1] = items
+ i += len(items) - 1
+ i += 1
+ return blocks
+
+
+_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
+def findoptionlists(blocks):
+ """Finds option lists.
+
+ The blocks must have a 'type' field, i.e., they should have been
+ run through findliteralblocks first.
+ """
+ i = 0
+ while i < len(blocks):
+ # Searching for a paragraph that looks like this:
+ #
+ # +----------------------------+-------------+
+ # | "--" option " " | description |
+ # +-------+--------------------+ |
+ # | (body elements)+ |
+ # +----------------------------------+
+ if (blocks[i]['type'] == 'paragraph' and
+ _optionre.match(blocks[i]['lines'][0])):
+ options = []
+ for line in blocks[i]['lines']:
+ m = _optionre.match(line)
+ if m:
+ option, arg, rest = m.groups()
+ width = len(option) + len(arg)
+ options.append(dict(type='option', lines=[],
+ indent=blocks[i]['indent'],
+ width=width))
+ options[-1]['lines'].append(line)
+ blocks[i:i+1] = options
+ i += len(options) - 1
+ i += 1
+ return blocks
+
+
+def finddefinitionlists(blocks):
+ """Finds definition lists.
+
+ The blocks must have a 'type' field, i.e., they should have been
+ run through findliteralblocks first.
+ """
+ i = 0
+ while i < len(blocks):
+ # Searching for a paragraph that looks like this:
+ #
+ # +----------------------------+
+ # | term |
+ # +--+-------------------------+--+
+ # | definition |
+ # | (body elements)+ |
+ # +----------------------------+
+ if (blocks[i]['type'] == 'paragraph' and
+ len(blocks[i]['lines']) > 1 and
+ not blocks[i]['lines'][0].startswith(' ') and
+ blocks[i]['lines'][1].startswith(' ')):
+ definitions = []
+ for line in blocks[i]['lines']:
+ if not line.startswith(' '):
+ definitions.append(dict(type='definition', lines=[],
+ indent=blocks[i]['indent']))
+ definitions[-1]['lines'].append(line)
+ definitions[-1]['hang'] = len(line) - len(line.lstrip())
+ blocks[i:i+1] = definitions
+ i += len(definitions) - 1
+ i += 1
+ return blocks
+
+
+def addmargins(blocks):
+ """Adds empty blocks for vertical spacing.
+
+ This groups bullets, options, and definitions together with no vertical
+ space between them, and adds an empty block between all other blocks.
+ """
+ i = 1
+ while i < len(blocks):
+ if (blocks[i]['type'] == blocks[i-1]['type'] and
+ blocks[i]['type'] in ('bullet', 'option', 'definition')):
+ i += 1
+ else:
+ blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
+ i += 2
+ return blocks
+
+
+def formatblock(block, width):
+ """Format a block according to width."""
+ indent = ' ' * block['indent']
+ if block['type'] == 'margin':
+ return ''
+ elif block['type'] in ('literal', 'section'):
+ return indent + ('\n' + indent).join(block['lines'])
+ elif block['type'] == 'definition':
+ term = indent + block['lines'][0]
+ defindent = indent + block['hang'] * ' '
+ text = ' '.join(map(str.strip, block['lines'][1:]))
+ return "%s\n%s" % (term, textwrap.fill(text, width=width,
+ initial_indent=defindent,
+ subsequent_indent=defindent))
+ else:
+ initindent = subindent = indent
+ text = ' '.join(map(str.strip, block['lines']))
+ if block['type'] == 'bullet':
+ initindent = indent[:-2] + '- '
+ subindent = indent
+ elif block['type'] == 'option':
+ subindent = indent + block['width'] * ' '
+
+ return textwrap.fill(text, width=width,
+ initial_indent=initindent,
+ subsequent_indent=subindent)
+
+
+def format(text, width):
+ """Parse and format the text according to width."""
+ blocks = findblocks(text)
+ blocks = findliteralblocks(blocks)
+ blocks = findsections(blocks)
+ blocks = findbulletlists(blocks)
+ blocks = findoptionlists(blocks)
+ blocks = finddefinitionlists(blocks)
+ blocks = addmargins(blocks)
+ return '\n'.join(formatblock(b, width) for b in blocks)
+
+
+if __name__ == "__main__":
+ from pprint import pprint
+
+ def debug(func, blocks):
+ blocks = func(blocks)
+ print "*** after %s:" % func.__name__
+ pprint(blocks)
+ print
+ return blocks
+
+ text = open(sys.argv[1]).read()
+ blocks = debug(findblocks, text)
+ blocks = debug(findliteralblocks, blocks)
+ blocks = debug(findsections, blocks)
+ blocks = debug(findbulletlists, blocks)
+ blocks = debug(findoptionlists, blocks)
+ blocks = debug(finddefinitionlists, blocks)
+ blocks = debug(addmargins, blocks)
+ print '\n'.join(formatblock(b, 30) for b in blocks)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-minirst.py Thu Jul 16 23:25:25 2009 +0200
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+
+from mercurial import minirst
+
+def debugformat(title, text, width):
+ print "%s formatted to fit within %d characters:" % (title, width)
+ print "-" * 70
+ print minirst.format(text, width)
+ print "-" * 70
+ print
+
+paragraphs = """
+This is some text in the first paragraph.
+
+ An indented paragraph
+ with just two lines.
+
+
+The third paragraph. It is followed by some
+random lines with spurious spaces.
+
+
+
+
+
+No indention
+ here, despite
+the uneven left
+ margin.
+
+ Only the
+ left-most line
+ (this line!)
+ is significant
+ for the indentation
+
+"""
+
+debugformat('paragraphs', paragraphs, 60)
+debugformat('paragraphs', paragraphs, 30)
+
+
+definitions = """
+A Term
+ Definition. The indented
+ lines make up the definition.
+Another Term
+ Another definition. The final line in the
+ definition determines the indentation, so
+ this will be indented with four spaces.
+
+ A Nested/Indented Term
+ Definition.
+"""
+
+debugformat('definitions', definitions, 60)
+debugformat('definitions', definitions, 30)
+
+
+literals = r"""
+The fully minimized form is the most
+convenient form::
+
+ Hello
+ literal
+ world
+
+In the partially minimized form a paragraph
+simply ends with space-double-colon. ::
+
+ ////////////////////////////////////////
+ long un-wrapped line in a literal block
+ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
+
+::
+
+ This literal block is started with '::',
+ the so-called expanded form. The paragraph
+ with '::' disappears in the final output.
+"""
+
+debugformat('literals', literals, 60)
+debugformat('literals', literals, 30)
+
+
+lists = """
+- This is the first list item.
+
+ Second paragraph in the first list item.
+
+- List items need not be separated
+ by a blank line.
+- And will be rendered without
+ one in any case.
+
+We can have indented lists:
+
+ - This is an indented list item
+
+ - Another indented list item::
+
+ - A literal block in the middle
+ of an indented list.
+
+ (The above is not a list item since we are in the literal block.)
+
+::
+
+ Literal block with no indentation.
+"""
+
+debugformat('lists', lists, 60)
+debugformat('lists', lists, 30)
+
+
+options = """
+There is support for simple option lists,
+but only with long options:
+
+--all Output all.
+--both Output both (this description is
+ quite long).
+--long Output all day long.
+
+--par This option has two paragraphs in its description.
+ This is the first.
+
+ This is the second. Blank lines may be omitted between
+ options (as above) or left in (as here).
+
+The next paragraph looks like an option list, but lacks the two-space
+marker after the option. It is treated as a normal paragraph:
+
+--foo bar baz
+"""
+
+debugformat('options', options, 60)
+debugformat('options', options, 30)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-minirst.py.out Thu Jul 16 23:25:25 2009 +0200
@@ -0,0 +1,209 @@
+paragraphs formatted to fit within 60 characters:
+----------------------------------------------------------------------
+This is some text in the first paragraph.
+
+ An indented paragraph with just two lines.
+
+The third paragraph. It is followed by some random lines
+with spurious spaces.
+
+No indention here, despite the uneven left margin.
+
+ Only the left-most line (this line!) is significant for
+ the indentation
+----------------------------------------------------------------------
+
+paragraphs formatted to fit within 30 characters:
+----------------------------------------------------------------------
+This is some text in the first
+paragraph.
+
+ An indented paragraph with
+ just two lines.
+
+The third paragraph. It is
+followed by some random lines
+with spurious spaces.
+
+No indention here, despite the
+uneven left margin.
+
+ Only the left-most line
+ (this line!) is significant
+ for the indentation
+----------------------------------------------------------------------
+
+definitions formatted to fit within 60 characters:
+----------------------------------------------------------------------
+A Term
+ Definition. The indented lines make up the definition.
+Another Term
+ Another definition. The final line in the definition
+ determines the indentation, so this will be indented
+ with four spaces.
+ A Nested/Indented Term
+ Definition.
+----------------------------------------------------------------------
+
+definitions formatted to fit within 30 characters:
+----------------------------------------------------------------------
+A Term
+ Definition. The indented
+ lines make up the
+ definition.
+Another Term
+ Another definition. The
+ final line in the
+ definition determines the
+ indentation, so this will
+ be indented with four
+ spaces.
+ A Nested/Indented Term
+ Definition.
+----------------------------------------------------------------------
+
+literals formatted to fit within 60 characters:
+----------------------------------------------------------------------
+The fully minimized form is the most convenient form:
+
+Hello
+ literal
+ world
+
+In the partially minimized form a paragraph simply ends with
+space-double-colon.
+
+////////////////////////////////////////
+long un-wrapped line in a literal block
+\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
+
+This literal block is started with '::',
+ the so-called expanded form. The paragraph
+ with '::' disappears in the final output.
+----------------------------------------------------------------------
+
+literals formatted to fit within 30 characters:
+----------------------------------------------------------------------
+The fully minimized form is
+the most convenient form:
+
+Hello
+ literal
+ world
+
+In the partially minimized
+form a paragraph simply ends
+with space-double-colon.
+
+////////////////////////////////////////
+long un-wrapped line in a literal block
+\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
+
+This literal block is started with '::',
+ the so-called expanded form. The paragraph
+ with '::' disappears in the final output.
+----------------------------------------------------------------------
+
+lists formatted to fit within 60 characters:
+----------------------------------------------------------------------
+- This is the first list item.
+
+ Second paragraph in the first list item.
+
+- List items need not be separated by a blank line.
+- And will be rendered without one in any case.
+
+We can have indented lists:
+
+ - This is an indented list item
+ - Another indented list item:
+
+ - A literal block in the middle
+ of an indented list.
+
+ (The above is not a list item since we are in the literal block.)
+
+Literal block with no indentation.
+----------------------------------------------------------------------
+
+lists formatted to fit within 30 characters:
+----------------------------------------------------------------------
+- This is the first list item.
+
+ Second paragraph in the
+ first list item.
+
+- List items need not be
+ separated by a blank line.
+- And will be rendered without
+ one in any case.
+
+We can have indented lists:
+
+ - This is an indented list
+ item
+ - Another indented list
+ item:
+
+ - A literal block in the middle
+ of an indented list.
+
+ (The above is not a list item since we are in the literal block.)
+
+Literal block with no indentation.
+----------------------------------------------------------------------
+
+options formatted to fit within 60 characters:
+----------------------------------------------------------------------
+There is support for simple option lists, but only with long
+options:
+
+--all Output all.
+--both Output both (this description is quite long).
+--long Output all day long.
+--par This option has two paragraphs in its
+ description. This is the first.
+
+ This is the second. Blank lines may be omitted
+ between options (as above) or left in (as here).
+
+The next paragraph looks like an option list, but lacks the
+two-space marker after the option. It is treated as a normal
+paragraph:
+
+--foo bar baz
+----------------------------------------------------------------------
+
+options formatted to fit within 30 characters:
+----------------------------------------------------------------------
+There is support for simple
+option lists, but only with
+long options:
+
+--all Output all.
+--both Output both (this
+ description is
+ quite long).
+--long Output all day
+ long.
+--par This option has two
+ paragraphs in its
+ description. This
+ is the first.
+
+ This is the second.
+ Blank lines may be
+ omitted between
+ options (as above)
+ or left in (as
+ here).
+
+The next paragraph looks like
+an option list, but lacks the
+two-space marker after the
+option. It is treated as a
+normal paragraph:
+
+--foo bar baz
+----------------------------------------------------------------------
+