changeset 11464:521c8e0c93bf stable

minirst: use unicode string as intermediate form for replacement Some character encodings use ASCII characters other than control/alphabet/digit as a part of multi-bytes characters, so direct replacing with such characters on strings in local encoding causes invalid byte sequences.
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
date Wed, 30 Jun 2010 12:44:58 +0900
parents f0ea93557133
children ace5bd98bee3
files mercurial/minirst.py
diffstat 1 files changed, 11 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/minirst.py	Tue Jun 29 17:30:42 2010 -0500
+++ b/mercurial/minirst.py	Wed Jun 30 12:44:58 2010 +0900
@@ -36,7 +36,13 @@
 """
 
 import re, sys
-import util
+import util, encoding
+
+def replace(text, substs):
+    utext = text.decode(encoding.encoding)
+    for f, t in substs:
+        utext = utext.replace(f, t)
+    return utext.encode(encoding.encoding)
 
 def findblocks(text):
     """Find continuous blocks of lines in text.
@@ -251,21 +257,22 @@
 
 
 def inlineliterals(blocks):
+    substs = [('``', '"')]
     for b in blocks:
         if b['type'] in ('paragraph', 'section'):
-            b['lines'] = [l.replace('``', '"') for l in b['lines']]
+            b['lines'] = [replace(l, substs) for l in b['lines']]
     return blocks
 
 
 def hgrole(blocks):
+    substs = [(':hg:`', '"hg '), ('`', '"')]
     for b in blocks:
         if b['type'] in ('paragraph', 'section'):
             # Turn :hg:`command` into "hg command". This also works
             # when there is a line break in the command and relies on
             # the fact that we have no stray back-quotes in the input
             # (run the blocks through inlineliterals first).
-            b['lines'] = [l.replace(':hg:`', '"hg ').replace('`', '"')
-                          for l in b['lines']]
+            b['lines'] = [replace(l, substs) for l in b['lines']]
     return blocks