convert/bzr: expect unicode metadata, encode in UTF-8 (
issue3232)
Before this patch, metadata and file names were interpreted like:
- unicode objects were converted to UTF-8
- non unicode objects were left unchanged
Looking at the code and bzr being known for transcoding filenames, we expect
everything to be returned as unicode objects, and we want to encode them in
UTF-8, like the subversion source does. To do that, we just remove the custom
implementation of .recode().
--- a/hgext/convert/bzr.py Mon Jan 30 00:05:28 2012 +0100
+++ b/hgext/convert/bzr.py Thu Feb 02 10:15:04 2012 +0100
@@ -143,7 +143,6 @@
return commit(parents=parents,
date='%d %d' % (rev.timestamp, -rev.timezone),
author=self.recode(rev.committer),
- # bzr returns bytestrings or unicode, depending on the content
desc=self.recode(rev.message),
rev=version)
@@ -231,7 +230,11 @@
continue
# we got unicode paths, need to convert them
- path, topath = [self.recode(part) for part in paths]
+ path, topath = paths
+ if path is not None:
+ path = self.recode(path)
+ if topath is not None:
+ topath = self.recode(topath)
seen.add(path or topath)
if topath is None:
@@ -260,19 +263,3 @@
parentmap = self.sourcerepo.get_parent_map(ids)
parents = tuple([parent for parent in ids if parent in parentmap])
return parents
-
- def recode(self, s, encoding=None):
- """This version of recode tries to encode unicode to bytecode,
- and preferably using the UTF-8 codec.
- Other types than Unicode are silently returned, this is by
- intention, e.g. the None-type is not going to be encoded but instead
- just passed through
- """
- if not encoding:
- encoding = self.encoding or 'utf-8'
-
- if isinstance(s, unicode):
- return s.encode(encoding)
- else:
- # leave it alone
- return s