--- a/hgext/convert/darcs.py Mon Oct 11 09:44:19 2010 -0500
+++ b/hgext/convert/darcs.py Tue Oct 12 16:25:38 2010 -0500
@@ -7,22 +7,22 @@
from common import NoRepo, checktool, commandline, commit, converter_source
from mercurial.i18n import _
-from mercurial import util
+from mercurial import encoding, util
import os, shutil, tempfile, re
# The naming drift of ElementTree is fun!
try:
- from xml.etree.cElementTree import ElementTree
+ from xml.etree.cElementTree import ElementTree, XMLParser
except ImportError:
try:
- from xml.etree.ElementTree import ElementTree
+ from xml.etree.ElementTree import ElementTree, XMLParser
except ImportError:
try:
- from elementtree.cElementTree import ElementTree
+ from elementtree.cElementTree import ElementTree, XMLParser
except ImportError:
try:
- from elementtree.ElementTree import ElementTree
+ from elementtree.ElementTree import ElementTree, XMLParser
except ImportError:
ElementTree = None
@@ -88,12 +88,24 @@
self.ui.debug('cleaning up %s\n' % self.tmppath)
shutil.rmtree(self.tmppath, ignore_errors=True)
+ def recode(self, s, encoding=None):
+ if isinstance(s, unicode):
+ # XMLParser returns unicode objects for anything it can't
+ # encode into ASCII. We convert them back to str to get
+ # recode's normal conversion behavior.
+ s = s.encode('latin-1')
+ return super(darcs_source, self).recode(s, encoding)
+
def xml(self, cmd, **kwargs):
# NOTE: darcs is currently encoding agnostic and will print
# patch metadata byte-for-byte, even in the XML changelog.
etree = ElementTree()
+ # While we are decoding the XML as latin-1 to be as liberal as
+ # possible, etree will still raise an exception if any
+ # non-printable characters are in the XML changelog.
+ parser = XMLParser(encoding='latin-1')
fp = self._run(cmd, **kwargs)
- etree.parse(fp)
+ etree.parse(fp, parser=parser)
self.checkexit(fp.close())
return etree.getroot()
--- a/tests/test-convert-darcs.t Mon Oct 11 09:44:19 2010 -0500
+++ b/tests/test-convert-darcs.t Tue Oct 12 16:25:38 2010 -0500
@@ -49,8 +49,6 @@
$ darcs record -a -l -m p1.2
Finished recording patch 'p1.2'
-merge branch
-
$ darcs pull -a ../darcs-clone
Backing up ./a(-darcs-backup0)
We have conflicts in the following files:
@@ -85,6 +83,15 @@
$ darcs record -a -l -m 'p4: desc ñ' -A 'author ñ'
Finished recording patch 'p4: desc ñ'
+
+Test latin-1 commit message
+
+ $ echo h > h
+ $ printf "p5: desc " > ../p5
+ $ python -c 'print "".join([chr(i) for i in range(128, 256)])' >> ../p5
+ $ darcs record -a -l --logfile ../p5
+ Finished recording patch 'p5: desc '
+
$ glog()
> {
> HGENCODING=utf-8 hg glog --template '{rev} "{desc|firstline}" ({author}) files: {files}\n' "$@"
@@ -95,12 +102,13 @@
scanning source...
sorting...
converting...
- 5 p0
- 4 p1.2
- 3 p1.1
- 2 p2
- 1 p3
- 0 p4: desc ?
+ 6 p0
+ 5 p1.2
+ 4 p1.1
+ 3 p2
+ 2 p3
+ 1 p4: desc ?
+ 0 p5: desc ????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
The converter does not currently handle patch conflicts very well.
When they occur, it reverts *all* changes and moves forward,
@@ -109,8 +117,11 @@
"c" file in p1.1 patch are reverted too.
Just to say that manifest not listing "c" here is a bug.
- $ glog -R darcs-repo-hg
- o 5 "p4: desc ñ" (author ñ) files: g
+ $ HGENCODING=latin-1 glog -R darcs-repo-hg -r 6 | "$TESTDIR"/printrepr.py
+ o 6 "p5: desc \xc2\x80\xc2\x81\xc2\x82\xc2\x83\xc2\x84\xc2\x85\xc2\x86\xc2\x87\xc2\x88\xc2\x89\xc2\x8a\xc2\x8b\xc2\x8c\xc2\x8d\xc2\x8e\xc2\x8f\xc2\x90\xc2\x91\xc2\x92\xc2\x93\xc2\x94\xc2\x95\xc2\x96\xc2\x97\xc2\x98\xc2\x99\xc2\x9a\xc2\x9b\xc2\x9c\xc2\x9d\xc2\x9e\xc2\x9f\xc2\xa0\xc2\xa1\xc2\xa2\xc2\xa3\xc2\xa4\xc2\xa5\xc2\xa6\xc2\xa7\xc2\xa8\xc2\xa9\xc2\xaa\xc2\xab\xc2\xac\xc2\xad\xc2\xae\xc2\xaf\xc2\xb0\xc2\xb1\xc2\xb2\xc2\xb3\xc2\xb4\xc2\xb5\xc2\xb6\xc2\xb7\xc2\xb8\xc2\xb9\xc2\xba\xc2\xbb\xc2\xbc\xc2\xbd\xc2\xbe\xc2\xbf\xc3\x80\xc3\x81\xc3\x82\xc3\x83\xc3\x84\xc3\x85\xc3\x86\xc3\x87\xc3\x88\xc3\x89\xc3\x8a\xc3\x8b\xc3\x8c\xc3\x8d\xc3\x8e\xc3\x8f\xc3\x90\xc3\x91\xc3\x92\xc3\x93\xc3\x94\xc3\x95\xc3\x96\xc3\x97\xc3\x98\xc3\x99\xc3\x9a\xc3\x9b\xc3\x9c\xc3\x9d\xc3\x9e\xc3\x9f\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4\xc3\xa5\xc3\xa6\xc3\xa7\xc3\xa8\xc3\xa9\xc3\xaa\xc3\xab\xc3\xac\xc3\xad\xc3\xae\xc3\xaf\xc3\xb0\xc3\xb1\xc3\xb2\xc3\xb3\xc3\xb4\xc3\xb5\xc3\xb6\xc3\xb7\xc3\xb8\xc3\xb9\xc3\xba\xc3\xbb\xc3\xbc\xc3\xbd\xc3\xbe\xc3\xbf" (test@example.org) files: h
+ |
+ $ HGENCODING=utf-8 glog -R darcs-repo-hg -r 0:5 | "$TESTDIR"/printrepr.py
+ o 5 "p4: desc \xc3\xb1" (author \xc3\xb1) files: g
|
o 4 "p3" (test@example.org) files: dir/d dir/d2 dir2/d f ff
|
@@ -122,6 +133,7 @@
|
o 0 "p0" (test@example.org) files: a
+
$ hg up -q -R darcs-repo-hg
$ hg -R darcs-repo-hg manifest --debug
7225b30cdf38257d5cc7780772c051b6f33e6d6b 644 a
@@ -129,3 +141,4 @@
37406831adc447ec2385014019599dfec953c806 644 dir2/d
b783a337463792a5c7d548ad85a7d3253c16ba8c 644 ff
0973eb1b2ecc4de7fafe7447ce1b7462108b4848 644 g
+ fe6f8b4f507fe3eb524c527192a84920a4288dac 644 h