diff -r f314723f36f5 -r 89df79b3c011 hgext/convert/darcs.py --- a/hgext/convert/darcs.py Tue Oct 05 11:34:13 2010 +0200 +++ b/hgext/convert/darcs.py Fri Oct 01 10:15:04 2010 -0500 @@ -7,22 +7,22 @@ from common import NoRepo, checktool, commandline, commit, converter_source from mercurial.i18n import _ -from mercurial import util +from mercurial import encoding, util import os, shutil, tempfile, re # The naming drift of ElementTree is fun! try: - from xml.etree.cElementTree import ElementTree + from xml.etree.cElementTree import ElementTree, XMLParser except ImportError: try: - from xml.etree.ElementTree import ElementTree + from xml.etree.ElementTree import ElementTree, XMLParser except ImportError: try: - from elementtree.cElementTree import ElementTree + from elementtree.cElementTree import ElementTree, XMLParser except ImportError: try: - from elementtree.ElementTree import ElementTree + from elementtree.ElementTree import ElementTree, XMLParser except ImportError: ElementTree = None @@ -88,12 +88,24 @@ self.ui.debug('cleaning up %s\n' % self.tmppath) shutil.rmtree(self.tmppath, ignore_errors=True) + def recode(self, s, encoding=None): + if isinstance(s, unicode): + # XMLParser returns unicode objects for anything it can't + # encode into ASCII. We convert them back to str to get + # recode's normal conversion behavior. + s = s.encode('latin-1') + return super(darcs_source, self).recode(s, encoding) + def xml(self, cmd, **kwargs): # NOTE: darcs is currently encoding agnostic and will print # patch metadata byte-for-byte, even in the XML changelog. etree = ElementTree() + # While we are decoding the XML as latin-1 to be as liberal as + # possible, etree will still raise an exception if any + # non-printable characters are in the XML changelog. + parser = XMLParser(encoding='latin-1') fp = self._run(cmd, **kwargs) - etree.parse(fp) + etree.parse(fp, parser=parser) self.checkexit(fp.close()) return etree.getroot()