# HG changeset patch # User Matt Mackall # Date 1290635792 21600 # Node ID 6c375e07d6736925d3b44065760d927ad2e1dcb7 # Parent 7cc4263e07a94644e8857c682db01a16118dbb53 branch: operate on branch names in local string space where possible Previously, branch names were ideally manipulated as UTF-8 strings, because they were stored as UTF-8 in the dirstate and the changelog and could not be safely converted to the local encoding and back. However, only about 80% of branch name code was actually using the right encoding conventions. This patch uses the localstr addition to allow working on branch names as local strings, which simplifies handling so that the previously incorrect code becomes correct. diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/archival.py --- a/mercurial/archival.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/archival.py Wed Nov 24 15:56:32 2010 -0600 @@ -8,7 +8,7 @@ from i18n import _ from node import hex import cmdutil -import util +import util, encoding import cStringIO, os, stat, tarfile, time, zipfile import zlib, gzip @@ -243,7 +243,7 @@ if repo.ui.configbool("ui", "archivemeta", True): def metadata(): base = 'repo: %s\nnode: %s\nbranch: %s\n' % ( - repo[0].hex(), hex(node), ctx.branch()) + repo[0].hex(), hex(node), encoding.fromlocal(ctx.branch())) tags = ''.join('tag: %s\n' % t for t in ctx.tags() if repo.tagtype(t) == 'global') diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/cmdutil.py --- a/mercurial/cmdutil.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/cmdutil.py Wed Nov 24 15:56:32 2010 -0600 @@ -801,7 +801,6 @@ branch = ctx.branch() # don't show the default branch name if branch != 'default': - branch = encoding.tolocal(branch) self.ui.write(_("branch: %s\n") % branch, label='log.branch') for tag in self.repo.nodetags(changenode): @@ -1357,8 +1356,7 @@ if ctx.p2(): edittext.append(_("HG: branch merge")) if ctx.branch(): - edittext.append(_("HG: branch '%s'") - % encoding.tolocal(ctx.branch())) + edittext.append(_("HG: branch '%s'") % ctx.branch()) edittext.extend([_("HG: subrepo %s") % s for s in subs]) edittext.extend([_("HG: added %s") % f for f in added]) edittext.extend([_("HG: changed %s") % f for f in modified]) diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/commands.py --- a/mercurial/commands.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/commands.py Wed Nov 24 15:56:32 2010 -0600 @@ -488,15 +488,14 @@ repo.dirstate.setbranch(label) ui.status(_('reset working directory to branch %s\n') % label) elif label: - utflabel = encoding.fromlocal(label) - if not opts.get('force') and utflabel in repo.branchtags(): + if not opts.get('force') and label in repo.branchtags(): if label not in [p.branch() for p in repo.parents()]: raise util.Abort(_('a branch of the same name already exists' " (use 'hg update' to switch to it)")) - repo.dirstate.setbranch(utflabel) + repo.dirstate.setbranch(label) ui.status(_('marked working directory as branch %s\n') % label) else: - ui.write("%s\n" % encoding.tolocal(repo.dirstate.branch())) + ui.write("%s\n" % repo.dirstate.branch()) def branches(ui, repo, active=False, closed=False): """list repository named branches @@ -525,9 +524,8 @@ for isactive, node, tag in branches: if (not active) or isactive: - encodedtag = encoding.tolocal(tag) if ui.quiet: - ui.write("%s\n" % encodedtag) + ui.write("%s\n" % tag) else: hn = repo.lookup(node) if isactive: @@ -543,10 +541,10 @@ notice = _(' (inactive)') if tag == repo.dirstate.branch(): label = 'branches.current' - rev = str(node).rjust(31 - encoding.colwidth(encodedtag)) + rev = str(node).rjust(31 - encoding.colwidth(tag)) rev = ui.label('%s:%s' % (rev, hexfunc(hn)), 'log.changeset') - encodedtag = ui.label(encodedtag, label) - ui.write("%s %s%s\n" % (encodedtag, rev, notice)) + tag = ui.label(tag, label) + ui.write("%s %s%s\n" % (tag, rev, notice)) def bundle(ui, repo, fname, dest=None, **opts): """create a changegroup file @@ -1830,8 +1828,7 @@ heads += [repo[h] for h in ls if rev(h) in descendants] if branchrevs: - decode, encode = encoding.fromlocal, encoding.tolocal - branches = set(repo[decode(br)].branch() for br in branchrevs) + branches = set(repo[br].branch() for br in branchrevs) heads = [h for h in heads if h.branch() in branches] if not opts.get('closed'): @@ -1844,7 +1841,7 @@ if branchrevs: haveheads = set(h.branch() for h in heads) if branches - haveheads: - headless = ', '.join(encode(b) for b in branches - haveheads) + headless = ', '.join(b for b in branches - haveheads) msg = _('no open branch heads found on branches %s') if opts.get('rev'): msg += _(' (started at %s)' % opts['rev']) @@ -2209,7 +2206,7 @@ output.append(str(ctx.rev())) if repo.local() and default and not ui.quiet: - b = encoding.tolocal(ctx.branch()) + b = ctx.branch() if b != 'default': output.append("(%s)" % b) @@ -2219,7 +2216,7 @@ output.append(t) if branch: - output.append(encoding.tolocal(ctx.branch())) + output.append(ctx.branch()) if tags: output.extend(ctx.tags()) @@ -2623,7 +2620,7 @@ node = opts.get('rev') if not node: - branch = repo.changectx(None).branch() + branch = repo[None].branch() bheads = repo.branchheads(branch) if len(bheads) > 2: raise util.Abort(_( diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/context.py --- a/mercurial/context.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/context.py Wed Nov 24 15:56:32 2010 -0600 @@ -7,7 +7,7 @@ from node import nullid, nullrev, short, hex from i18n import _ -import ancestor, bdiff, error, util, subrepo, patch +import ancestor, bdiff, error, util, subrepo, patch, encoding import os, errno, stat propertycache = util.propertycache @@ -109,7 +109,7 @@ def description(self): return self._changeset[4] def branch(self): - return self._changeset[5].get("branch") + return encoding.tolocal(self._changeset[5].get("branch")) def extra(self): return self._changeset[5] def tags(self): @@ -591,9 +591,8 @@ if extra: self._extra = extra.copy() if 'branch' not in self._extra: - branch = self._repo.dirstate.branch() try: - branch = branch.decode('UTF-8').encode('UTF-8') + branch = encoding.fromlocal(self._repo.dirstate.branch()) except UnicodeDecodeError: raise util.Abort(_('branch name not in UTF-8!')) self._extra['branch'] = branch @@ -715,7 +714,7 @@ assert self._clean is not None # must call status first return self._clean def branch(self): - return self._extra['branch'] + return encoding.tolocal(self._extra['branch']) def extra(self): return self._extra @@ -1048,7 +1047,7 @@ def clean(self): return self._status[6] def branch(self): - return self._extra['branch'] + return encoding.tolocal(self._extra['branch']) def extra(self): return self._extra def flags(self, f): diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/dirstate.py --- a/mercurial/dirstate.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/dirstate.py Wed Nov 24 15:56:32 2010 -0600 @@ -7,7 +7,7 @@ from node import nullid from i18n import _ -import util, ignore, osutil, parsers +import util, ignore, osutil, parsers, encoding import struct, os, stat, errno import cStringIO @@ -201,7 +201,7 @@ return [self._validate(p) for p in self._pl] def branch(self): - return self._branch + return encoding.tolocal(self._branch) def setparents(self, p1, p2=nullid): self._dirty = self._dirtypl = True @@ -210,8 +210,8 @@ def setbranch(self, branch): if branch in ['tip', '.', 'null']: raise util.Abort(_('the name \'%s\' is reserved') % branch) - self._branch = branch - self._opener("branch", "w").write(branch + '\n') + self._branch = encoding.fromlocal(branch) + self._opener("branch", "w").write(self._branch + '\n') def _read(self): self._map = {} diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/hg.py --- a/mercurial/hg.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/hg.py Wed Nov 24 15:56:32 2010 -0600 @@ -32,24 +32,22 @@ return revs, revs[0] branchmap = repo.branchmap() - def primary(butf8): - if butf8 == '.': + def primary(branch): + if branch == '.': if not lrepo or not lrepo.local(): raise util.Abort(_("dirstate branch not accessible")) - butf8 = lrepo.dirstate.branch() - if butf8 in branchmap: - revs.extend(node.hex(r) for r in reversed(branchmap[butf8])) + branch = lrepo.dirstate.branch() + if branch in branchmap: + revs.extend(node.hex(r) for r in reversed(branchmap[branch])) return True else: return False for branch in branches: - butf8 = encoding.fromlocal(branch) - if not primary(butf8): + if not primary(branch): raise error.RepoLookupError(_("unknown branch '%s'") % branch) if hashbranch: - butf8 = encoding.fromlocal(hashbranch) - if not primary(butf8): + if not primary(hashbranch): revs.append(hashbranch) return revs, revs[0] @@ -365,8 +363,7 @@ except error.RepoLookupError: continue bn = dest_repo[uprev].branch() - dest_repo.ui.status(_("updating to branch %s\n") - % encoding.tolocal(bn)) + dest_repo.ui.status(_("updating to branch %s\n") % bn) _update(dest_repo, uprev) return src_repo, dest_repo diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/localrepo.py --- a/mercurial/localrepo.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/localrepo.py Wed Nov 24 15:56:32 2010 -0600 @@ -105,7 +105,7 @@ self._tags = None self._tagtypes = None - self._branchcache = None # in UTF-8 + self._branchcache = None self._branchcachetip = None self.nodetagscache = None self.filterpats = {} @@ -435,7 +435,6 @@ bt[bn] = tip return bt - def _readbranchcache(self): partial = {} try: @@ -455,7 +454,8 @@ if not l: continue node, label = l.split(" ", 1) - partial.setdefault(label.strip(), []).append(bin(node)) + label = encoding.tolocal(label.strip()) + partial.setdefault(label, []).append(bin(node)) except KeyboardInterrupt: raise except Exception, inst: @@ -470,7 +470,7 @@ f.write("%s %s\n" % (hex(tip), tiprev)) for label, nodes in branches.iteritems(): for node in nodes: - f.write("%s %s\n" % (hex(node), label)) + f.write("%s %s\n" % (hex(node), encoding.fromlocal(label))) f.rename() except (IOError, OSError): pass @@ -659,7 +659,8 @@ except IOError: ds = "" self.opener("journal.dirstate", "w").write(ds) - self.opener("journal.branch", "w").write(self.dirstate.branch()) + self.opener("journal.branch", "w").write( + encoding.fromlocal(self.dirstate.branch())) self.opener("journal.desc", "w").write("%d\n%s\n" % (len(self), desc)) renames = [(self.sjoin("journal"), self.sjoin("undo")), @@ -717,7 +718,7 @@ except IOError: self.ui.warn(_("Named branch could not be reset, " "current branch still is: %s\n") - % encoding.tolocal(self.dirstate.branch())) + % self.dirstate.branch()) self.invalidate() self.dirstate.invalidate() self.destroyed() diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/templatekw.py --- a/mercurial/templatekw.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/templatekw.py Wed Nov 24 15:56:32 2010 -0600 @@ -148,7 +148,6 @@ def showbranches(**args): branch = args['ctx'].branch() if branch != 'default': - branch = encoding.tolocal(branch) return showlist('branch', [branch], plural='branches', **args) def showchildren(**args): diff -r 7cc4263e07a9 -r 6c375e07d673 mercurial/wireproto.py --- a/mercurial/wireproto.py Wed Nov 24 15:38:52 2010 -0600 +++ b/mercurial/wireproto.py Wed Nov 24 15:56:32 2010 -0600 @@ -44,14 +44,7 @@ branchmap = {} for branchpart in d.splitlines(): branchname, branchheads = branchpart.split(' ', 1) - branchname = urllib.unquote(branchname) - # Earlier servers (1.3.x) send branch names in (their) local - # charset. The best we can do is assume it's identical to our - # own local charset, in case it's not utf-8. - try: - branchname.decode('utf-8') - except UnicodeDecodeError: - branchname = encoding.fromlocal(branchname) + branchname = encoding.tolocal(urllib.unquote(branchname)) branchheads = decodelist(branchheads) branchmap[branchname] = branchheads return branchmap @@ -162,7 +155,7 @@ branchmap = repo.branchmap() heads = [] for branch, nodes in branchmap.iteritems(): - branchname = urllib.quote(branch) + branchname = urllib.quote(encoding.fromlocal(branch)) branchnodes = encodelist(nodes) heads.append('%s %s' % (branchname, branchnodes)) return '\n'.join(heads) diff -r 7cc4263e07a9 -r 6c375e07d673 tests/test-encoding.t --- a/tests/test-encoding.t Wed Nov 24 15:38:52 2010 -0600 +++ b/tests/test-encoding.t Wed Nov 24 15:56:32 2010 -0600 @@ -240,6 +240,4 @@ abort: decoding near '\xe9': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)! (esc) [255] $ cp latin-1-tag .hg/branch - $ HGENCODING=latin-1 hg ci -m 'should fail' - abort: branch name not in UTF-8! - [255] + $ HGENCODING=latin-1 hg ci -m 'auto-promote legacy name'