comparison hgext/convert/bzr.py @ 16059:f5b6046f6ce8

convert/bzr: expect unicode metadata, encode in UTF-8 (issue3232) Before this patch, metadata and file names were interpreted like: - unicode objects were converted to UTF-8 - non unicode objects were left unchanged Looking at the code and bzr being known for transcoding filenames, we expect everything to be returned as unicode objects, and we want to encode them in UTF-8, like the subversion source does. To do that, we just remove the custom implementation of .recode().
author Patrick Mezard <pmezard@gmail.com>
date Thu, 02 Feb 2012 10:15:04 +0100
parents 6ba2fc0a87ab
children f84dda152a55
comparison
equal deleted inserted replaced
16057:db4b0532dbf2 16059:f5b6046f6ce8
141 self._parentids[version] = parents 141 self._parentids[version] = parents
142 142
143 return commit(parents=parents, 143 return commit(parents=parents,
144 date='%d %d' % (rev.timestamp, -rev.timezone), 144 date='%d %d' % (rev.timestamp, -rev.timezone),
145 author=self.recode(rev.committer), 145 author=self.recode(rev.committer),
146 # bzr returns bytestrings or unicode, depending on the content
147 desc=self.recode(rev.message), 146 desc=self.recode(rev.message),
148 rev=version) 147 rev=version)
149 148
150 def gettags(self): 149 def gettags(self):
151 if not self.branch.supports_tags(): 150 if not self.branch.supports_tags():
229 228
230 # no futher changes, go to the next change 229 # no futher changes, go to the next change
231 continue 230 continue
232 231
233 # we got unicode paths, need to convert them 232 # we got unicode paths, need to convert them
234 path, topath = [self.recode(part) for part in paths] 233 path, topath = paths
234 if path is not None:
235 path = self.recode(path)
236 if topath is not None:
237 topath = self.recode(topath)
235 seen.add(path or topath) 238 seen.add(path or topath)
236 239
237 if topath is None: 240 if topath is None:
238 # file deleted 241 # file deleted
239 changes.append((path, revid)) 242 changes.append((path, revid))
258 <http://bazaar-vcs.org/GhostRevision> 261 <http://bazaar-vcs.org/GhostRevision>
259 """ 262 """
260 parentmap = self.sourcerepo.get_parent_map(ids) 263 parentmap = self.sourcerepo.get_parent_map(ids)
261 parents = tuple([parent for parent in ids if parent in parentmap]) 264 parents = tuple([parent for parent in ids if parent in parentmap])
262 return parents 265 return parents
263
264 def recode(self, s, encoding=None):
265 """This version of recode tries to encode unicode to bytecode,
266 and preferably using the UTF-8 codec.
267 Other types than Unicode are silently returned, this is by
268 intention, e.g. the None-type is not going to be encoded but instead
269 just passed through
270 """
271 if not encoding:
272 encoding = self.encoding or 'utf-8'
273
274 if isinstance(s, unicode):
275 return s.encode(encoding)
276 else:
277 # leave it alone
278 return s