changeset 12718:372abc799caa

merge with stable
author Matt Mackall <mpm@selenic.com>
date Tue, 12 Oct 2010 16:25:38 -0500
parents c7e619e30ba3 (current diff) 89df79b3c011 (diff)
children 97fd67f35c00
files tests/test-convert-darcs.t
diffstat 2 files changed, 41 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/darcs.py	Mon Oct 11 09:44:19 2010 -0500
+++ b/hgext/convert/darcs.py	Tue Oct 12 16:25:38 2010 -0500
@@ -7,22 +7,22 @@
 
 from common import NoRepo, checktool, commandline, commit, converter_source
 from mercurial.i18n import _
-from mercurial import util
+from mercurial import encoding, util
 import os, shutil, tempfile, re
 
 # The naming drift of ElementTree is fun!
 
 try:
-    from xml.etree.cElementTree import ElementTree
+    from xml.etree.cElementTree import ElementTree, XMLParser
 except ImportError:
     try:
-        from xml.etree.ElementTree import ElementTree
+        from xml.etree.ElementTree import ElementTree, XMLParser
     except ImportError:
         try:
-            from elementtree.cElementTree import ElementTree
+            from elementtree.cElementTree import ElementTree, XMLParser
         except ImportError:
             try:
-                from elementtree.ElementTree import ElementTree
+                from elementtree.ElementTree import ElementTree, XMLParser
             except ImportError:
                 ElementTree = None
 
@@ -88,12 +88,24 @@
         self.ui.debug('cleaning up %s\n' % self.tmppath)
         shutil.rmtree(self.tmppath, ignore_errors=True)
 
+    def recode(self, s, encoding=None):
+        if isinstance(s, unicode):
+            # XMLParser returns unicode objects for anything it can't
+            # encode into ASCII. We convert them back to str to get
+            # recode's normal conversion behavior.
+            s = s.encode('latin-1')
+        return super(darcs_source, self).recode(s, encoding)
+
     def xml(self, cmd, **kwargs):
         # NOTE: darcs is currently encoding agnostic and will print
         # patch metadata byte-for-byte, even in the XML changelog.
         etree = ElementTree()
+        # While we are decoding the XML as latin-1 to be as liberal as
+        # possible, etree will still raise an exception if any
+        # non-printable characters are in the XML changelog.
+        parser = XMLParser(encoding='latin-1')
         fp = self._run(cmd, **kwargs)
-        etree.parse(fp)
+        etree.parse(fp, parser=parser)
         self.checkexit(fp.close())
         return etree.getroot()
 
--- a/tests/test-convert-darcs.t	Mon Oct 11 09:44:19 2010 -0500
+++ b/tests/test-convert-darcs.t	Tue Oct 12 16:25:38 2010 -0500
@@ -49,8 +49,6 @@
   $ darcs record -a -l -m p1.2
   Finished recording patch 'p1.2'
 
-merge branch
-
   $ darcs pull -a ../darcs-clone
   Backing up ./a(-darcs-backup0)
   We have conflicts in the following files:
@@ -85,6 +83,15 @@
 
   $ darcs record -a -l -m 'p4: desc ñ' -A 'author ñ'
   Finished recording patch 'p4: desc ñ'
+
+Test latin-1 commit message
+
+  $ echo h > h
+  $ printf "p5: desc " > ../p5
+  $ python -c 'print "".join([chr(i) for i in range(128, 256)])' >> ../p5
+  $ darcs record -a -l --logfile ../p5
+  Finished recording patch 'p5: desc '
+ 
   $ glog()
   > {
   >     HGENCODING=utf-8 hg glog --template '{rev} "{desc|firstline}" ({author}) files: {files}\n' "$@"
@@ -95,12 +102,13 @@
   scanning source...
   sorting...
   converting...
-  5 p0
-  4 p1.2
-  3 p1.1
-  2 p2
-  1 p3
-  0 p4: desc ?
+  6 p0
+  5 p1.2
+  4 p1.1
+  3 p2
+  2 p3
+  1 p4: desc ?
+  0 p5: desc ????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
 
 The converter does not currently handle patch conflicts very well.
 When they occur, it reverts *all* changes and moves forward,
@@ -109,8 +117,11 @@
 "c" file in p1.1 patch are reverted too.
 Just to say that manifest not listing "c" here is a bug.
 
-  $ glog -R darcs-repo-hg
-  o  5 "p4: desc ñ" (author ñ) files: g
+  $ HGENCODING=latin-1 glog -R darcs-repo-hg -r 6 | "$TESTDIR"/printrepr.py
+  o  6 "p5: desc \xc2\x80\xc2\x81\xc2\x82\xc2\x83\xc2\x84\xc2\x85\xc2\x86\xc2\x87\xc2\x88\xc2\x89\xc2\x8a\xc2\x8b\xc2\x8c\xc2\x8d\xc2\x8e\xc2\x8f\xc2\x90\xc2\x91\xc2\x92\xc2\x93\xc2\x94\xc2\x95\xc2\x96\xc2\x97\xc2\x98\xc2\x99\xc2\x9a\xc2\x9b\xc2\x9c\xc2\x9d\xc2\x9e\xc2\x9f\xc2\xa0\xc2\xa1\xc2\xa2\xc2\xa3\xc2\xa4\xc2\xa5\xc2\xa6\xc2\xa7\xc2\xa8\xc2\xa9\xc2\xaa\xc2\xab\xc2\xac\xc2\xad\xc2\xae\xc2\xaf\xc2\xb0\xc2\xb1\xc2\xb2\xc2\xb3\xc2\xb4\xc2\xb5\xc2\xb6\xc2\xb7\xc2\xb8\xc2\xb9\xc2\xba\xc2\xbb\xc2\xbc\xc2\xbd\xc2\xbe\xc2\xbf\xc3\x80\xc3\x81\xc3\x82\xc3\x83\xc3\x84\xc3\x85\xc3\x86\xc3\x87\xc3\x88\xc3\x89\xc3\x8a\xc3\x8b\xc3\x8c\xc3\x8d\xc3\x8e\xc3\x8f\xc3\x90\xc3\x91\xc3\x92\xc3\x93\xc3\x94\xc3\x95\xc3\x96\xc3\x97\xc3\x98\xc3\x99\xc3\x9a\xc3\x9b\xc3\x9c\xc3\x9d\xc3\x9e\xc3\x9f\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4\xc3\xa5\xc3\xa6\xc3\xa7\xc3\xa8\xc3\xa9\xc3\xaa\xc3\xab\xc3\xac\xc3\xad\xc3\xae\xc3\xaf\xc3\xb0\xc3\xb1\xc3\xb2\xc3\xb3\xc3\xb4\xc3\xb5\xc3\xb6\xc3\xb7\xc3\xb8\xc3\xb9\xc3\xba\xc3\xbb\xc3\xbc\xc3\xbd\xc3\xbe\xc3\xbf" (test@example.org) files: h
+  |
+  $ HGENCODING=utf-8 glog -R darcs-repo-hg -r 0:5 | "$TESTDIR"/printrepr.py
+  o  5 "p4: desc \xc3\xb1" (author \xc3\xb1) files: g
   |
   o  4 "p3" (test@example.org) files: dir/d dir/d2 dir2/d f ff
   |
@@ -122,6 +133,7 @@
   |
   o  0 "p0" (test@example.org) files: a
   
+
   $ hg up -q -R darcs-repo-hg
   $ hg -R darcs-repo-hg manifest --debug
   7225b30cdf38257d5cc7780772c051b6f33e6d6b 644   a
@@ -129,3 +141,4 @@
   37406831adc447ec2385014019599dfec953c806 644   dir2/d
   b783a337463792a5c7d548ad85a7d3253c16ba8c 644   ff
   0973eb1b2ecc4de7fafe7447ce1b7462108b4848 644   g
+  fe6f8b4f507fe3eb524c527192a84920a4288dac 644   h