Mercurial > hg
changeset 11464:521c8e0c93bf stable
minirst: use unicode string as intermediate form for replacement
Some character encodings use ASCII characters other than
control/alphabet/digit as a part of multi-bytes characters, so direct
replacing with such characters on strings in local encoding causes
invalid byte sequences.
author | FUJIWARA Katsunori <foozy@lares.dti.ne.jp> |
---|---|
date | Wed, 30 Jun 2010 12:44:58 +0900 |
parents | f0ea93557133 |
children | ace5bd98bee3 |
files | mercurial/minirst.py |
diffstat | 1 files changed, 11 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/minirst.py Tue Jun 29 17:30:42 2010 -0500 +++ b/mercurial/minirst.py Wed Jun 30 12:44:58 2010 +0900 @@ -36,7 +36,13 @@ """ import re, sys -import util +import util, encoding + +def replace(text, substs): + utext = text.decode(encoding.encoding) + for f, t in substs: + utext = utext.replace(f, t) + return utext.encode(encoding.encoding) def findblocks(text): """Find continuous blocks of lines in text. @@ -251,21 +257,22 @@ def inlineliterals(blocks): + substs = [('``', '"')] for b in blocks: if b['type'] in ('paragraph', 'section'): - b['lines'] = [l.replace('``', '"') for l in b['lines']] + b['lines'] = [replace(l, substs) for l in b['lines']] return blocks def hgrole(blocks): + substs = [(':hg:`', '"hg '), ('`', '"')] for b in blocks: if b['type'] in ('paragraph', 'section'): # Turn :hg:`command` into "hg command". This also works # when there is a line break in the command and relies on # the fact that we have no stray back-quotes in the input # (run the blocks through inlineliterals first). - b['lines'] = [l.replace(':hg:`', '"hg ').replace('`', '"') - for l in b['lines']] + b['lines'] = [replace(l, substs) for l in b['lines']] return blocks