changeset 13047:6c375e07d673

branch: operate on branch names in local string space where possible Previously, branch names were ideally manipulated as UTF-8 strings, because they were stored as UTF-8 in the dirstate and the changelog and could not be safely converted to the local encoding and back. However, only about 80% of branch name code was actually using the right encoding conventions. This patch uses the localstr addition to allow working on branch names as local strings, which simplifies handling so that the previously incorrect code becomes correct.
author Matt Mackall <mpm@selenic.com>
date Wed, 24 Nov 2010 15:56:32 -0600
parents 7cc4263e07a9
children e298cca2d53a
files mercurial/archival.py mercurial/cmdutil.py mercurial/commands.py mercurial/context.py mercurial/dirstate.py mercurial/hg.py mercurial/localrepo.py mercurial/templatekw.py mercurial/wireproto.py tests/test-encoding.t
diffstat 10 files changed, 42 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/archival.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/archival.py	Wed Nov 24 15:56:32 2010 -0600
@@ -8,7 +8,7 @@
 from i18n import _
 from node import hex
 import cmdutil
-import util
+import util, encoding
 import cStringIO, os, stat, tarfile, time, zipfile
 import zlib, gzip
 
@@ -243,7 +243,7 @@
     if repo.ui.configbool("ui", "archivemeta", True):
         def metadata():
             base = 'repo: %s\nnode: %s\nbranch: %s\n' % (
-                repo[0].hex(), hex(node), ctx.branch())
+                repo[0].hex(), hex(node), encoding.fromlocal(ctx.branch()))
 
             tags = ''.join('tag: %s\n' % t for t in ctx.tags()
                            if repo.tagtype(t) == 'global')
--- a/mercurial/cmdutil.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/cmdutil.py	Wed Nov 24 15:56:32 2010 -0600
@@ -801,7 +801,6 @@
         branch = ctx.branch()
         # don't show the default branch name
         if branch != 'default':
-            branch = encoding.tolocal(branch)
             self.ui.write(_("branch:      %s\n") % branch,
                           label='log.branch')
         for tag in self.repo.nodetags(changenode):
@@ -1357,8 +1356,7 @@
     if ctx.p2():
         edittext.append(_("HG: branch merge"))
     if ctx.branch():
-        edittext.append(_("HG: branch '%s'")
-                        % encoding.tolocal(ctx.branch()))
+        edittext.append(_("HG: branch '%s'") % ctx.branch())
     edittext.extend([_("HG: subrepo %s") % s for s in subs])
     edittext.extend([_("HG: added %s") % f for f in added])
     edittext.extend([_("HG: changed %s") % f for f in modified])
--- a/mercurial/commands.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/commands.py	Wed Nov 24 15:56:32 2010 -0600
@@ -488,15 +488,14 @@
         repo.dirstate.setbranch(label)
         ui.status(_('reset working directory to branch %s\n') % label)
     elif label:
-        utflabel = encoding.fromlocal(label)
-        if not opts.get('force') and utflabel in repo.branchtags():
+        if not opts.get('force') and label in repo.branchtags():
             if label not in [p.branch() for p in repo.parents()]:
                 raise util.Abort(_('a branch of the same name already exists'
                                    " (use 'hg update' to switch to it)"))
-        repo.dirstate.setbranch(utflabel)
+        repo.dirstate.setbranch(label)
         ui.status(_('marked working directory as branch %s\n') % label)
     else:
-        ui.write("%s\n" % encoding.tolocal(repo.dirstate.branch()))
+        ui.write("%s\n" % repo.dirstate.branch())
 
 def branches(ui, repo, active=False, closed=False):
     """list repository named branches
@@ -525,9 +524,8 @@
 
     for isactive, node, tag in branches:
         if (not active) or isactive:
-            encodedtag = encoding.tolocal(tag)
             if ui.quiet:
-                ui.write("%s\n" % encodedtag)
+                ui.write("%s\n" % tag)
             else:
                 hn = repo.lookup(node)
                 if isactive:
@@ -543,10 +541,10 @@
                     notice = _(' (inactive)')
                 if tag == repo.dirstate.branch():
                     label = 'branches.current'
-                rev = str(node).rjust(31 - encoding.colwidth(encodedtag))
+                rev = str(node).rjust(31 - encoding.colwidth(tag))
                 rev = ui.label('%s:%s' % (rev, hexfunc(hn)), 'log.changeset')
-                encodedtag = ui.label(encodedtag, label)
-                ui.write("%s %s%s\n" % (encodedtag, rev, notice))
+                tag = ui.label(tag, label)
+                ui.write("%s %s%s\n" % (tag, rev, notice))
 
 def bundle(ui, repo, fname, dest=None, **opts):
     """create a changegroup file
@@ -1830,8 +1828,7 @@
             heads += [repo[h] for h in ls if rev(h) in descendants]
 
     if branchrevs:
-        decode, encode = encoding.fromlocal, encoding.tolocal
-        branches = set(repo[decode(br)].branch() for br in branchrevs)
+        branches = set(repo[br].branch() for br in branchrevs)
         heads = [h for h in heads if h.branch() in branches]
 
     if not opts.get('closed'):
@@ -1844,7 +1841,7 @@
     if branchrevs:
         haveheads = set(h.branch() for h in heads)
         if branches - haveheads:
-            headless = ', '.join(encode(b) for b in branches - haveheads)
+            headless = ', '.join(b for b in branches - haveheads)
             msg = _('no open branch heads found on branches %s')
             if opts.get('rev'):
                 msg += _(' (started at %s)' % opts['rev'])
@@ -2209,7 +2206,7 @@
             output.append(str(ctx.rev()))
 
     if repo.local() and default and not ui.quiet:
-        b = encoding.tolocal(ctx.branch())
+        b = ctx.branch()
         if b != 'default':
             output.append("(%s)" % b)
 
@@ -2219,7 +2216,7 @@
             output.append(t)
 
     if branch:
-        output.append(encoding.tolocal(ctx.branch()))
+        output.append(ctx.branch())
 
     if tags:
         output.extend(ctx.tags())
@@ -2623,7 +2620,7 @@
         node = opts.get('rev')
 
     if not node:
-        branch = repo.changectx(None).branch()
+        branch = repo[None].branch()
         bheads = repo.branchheads(branch)
         if len(bheads) > 2:
             raise util.Abort(_(
--- a/mercurial/context.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/context.py	Wed Nov 24 15:56:32 2010 -0600
@@ -7,7 +7,7 @@
 
 from node import nullid, nullrev, short, hex
 from i18n import _
-import ancestor, bdiff, error, util, subrepo, patch
+import ancestor, bdiff, error, util, subrepo, patch, encoding
 import os, errno, stat
 
 propertycache = util.propertycache
@@ -109,7 +109,7 @@
     def description(self):
         return self._changeset[4]
     def branch(self):
-        return self._changeset[5].get("branch")
+        return encoding.tolocal(self._changeset[5].get("branch"))
     def extra(self):
         return self._changeset[5]
     def tags(self):
@@ -591,9 +591,8 @@
         if extra:
             self._extra = extra.copy()
         if 'branch' not in self._extra:
-            branch = self._repo.dirstate.branch()
             try:
-                branch = branch.decode('UTF-8').encode('UTF-8')
+                branch = encoding.fromlocal(self._repo.dirstate.branch())
             except UnicodeDecodeError:
                 raise util.Abort(_('branch name not in UTF-8!'))
             self._extra['branch'] = branch
@@ -715,7 +714,7 @@
         assert self._clean is not None  # must call status first
         return self._clean
     def branch(self):
-        return self._extra['branch']
+        return encoding.tolocal(self._extra['branch'])
     def extra(self):
         return self._extra
 
@@ -1048,7 +1047,7 @@
     def clean(self):
         return self._status[6]
     def branch(self):
-        return self._extra['branch']
+        return encoding.tolocal(self._extra['branch'])
     def extra(self):
         return self._extra
     def flags(self, f):
--- a/mercurial/dirstate.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/dirstate.py	Wed Nov 24 15:56:32 2010 -0600
@@ -7,7 +7,7 @@
 
 from node import nullid
 from i18n import _
-import util, ignore, osutil, parsers
+import util, ignore, osutil, parsers, encoding
 import struct, os, stat, errno
 import cStringIO
 
@@ -201,7 +201,7 @@
         return [self._validate(p) for p in self._pl]
 
     def branch(self):
-        return self._branch
+        return encoding.tolocal(self._branch)
 
     def setparents(self, p1, p2=nullid):
         self._dirty = self._dirtypl = True
@@ -210,8 +210,8 @@
     def setbranch(self, branch):
         if branch in ['tip', '.', 'null']:
             raise util.Abort(_('the name \'%s\' is reserved') % branch)
-        self._branch = branch
-        self._opener("branch", "w").write(branch + '\n')
+        self._branch = encoding.fromlocal(branch)
+        self._opener("branch", "w").write(self._branch + '\n')
 
     def _read(self):
         self._map = {}
--- a/mercurial/hg.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/hg.py	Wed Nov 24 15:56:32 2010 -0600
@@ -32,24 +32,22 @@
         return revs, revs[0]
     branchmap = repo.branchmap()
 
-    def primary(butf8):
-        if butf8 == '.':
+    def primary(branch):
+        if branch == '.':
             if not lrepo or not lrepo.local():
                 raise util.Abort(_("dirstate branch not accessible"))
-            butf8 = lrepo.dirstate.branch()
-        if butf8 in branchmap:
-            revs.extend(node.hex(r) for r in reversed(branchmap[butf8]))
+            branch = lrepo.dirstate.branch()
+        if branch in branchmap:
+            revs.extend(node.hex(r) for r in reversed(branchmap[branch]))
             return True
         else:
             return False
 
     for branch in branches:
-        butf8 = encoding.fromlocal(branch)
-        if not primary(butf8):
+        if not primary(branch):
             raise error.RepoLookupError(_("unknown branch '%s'") % branch)
     if hashbranch:
-        butf8 = encoding.fromlocal(hashbranch)
-        if not primary(butf8):
+        if not primary(hashbranch):
             revs.append(hashbranch)
     return revs, revs[0]
 
@@ -365,8 +363,7 @@
                     except error.RepoLookupError:
                         continue
                 bn = dest_repo[uprev].branch()
-                dest_repo.ui.status(_("updating to branch %s\n")
-                                    % encoding.tolocal(bn))
+                dest_repo.ui.status(_("updating to branch %s\n") % bn)
                 _update(dest_repo, uprev)
 
         return src_repo, dest_repo
--- a/mercurial/localrepo.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/localrepo.py	Wed Nov 24 15:56:32 2010 -0600
@@ -105,7 +105,7 @@
         self._tags = None
         self._tagtypes = None
 
-        self._branchcache = None  # in UTF-8
+        self._branchcache = None
         self._branchcachetip = None
         self.nodetagscache = None
         self.filterpats = {}
@@ -435,7 +435,6 @@
             bt[bn] = tip
         return bt
 
-
     def _readbranchcache(self):
         partial = {}
         try:
@@ -455,7 +454,8 @@
                 if not l:
                     continue
                 node, label = l.split(" ", 1)
-                partial.setdefault(label.strip(), []).append(bin(node))
+                label = encoding.tolocal(label.strip())
+                partial.setdefault(label, []).append(bin(node))
         except KeyboardInterrupt:
             raise
         except Exception, inst:
@@ -470,7 +470,7 @@
             f.write("%s %s\n" % (hex(tip), tiprev))
             for label, nodes in branches.iteritems():
                 for node in nodes:
-                    f.write("%s %s\n" % (hex(node), label))
+                    f.write("%s %s\n" % (hex(node), encoding.fromlocal(label)))
             f.rename()
         except (IOError, OSError):
             pass
@@ -659,7 +659,8 @@
         except IOError:
             ds = ""
         self.opener("journal.dirstate", "w").write(ds)
-        self.opener("journal.branch", "w").write(self.dirstate.branch())
+        self.opener("journal.branch", "w").write(
+            encoding.fromlocal(self.dirstate.branch()))
         self.opener("journal.desc", "w").write("%d\n%s\n" % (len(self), desc))
 
         renames = [(self.sjoin("journal"), self.sjoin("undo")),
@@ -717,7 +718,7 @@
                 except IOError:
                     self.ui.warn(_("Named branch could not be reset, "
                                    "current branch still is: %s\n")
-                                 % encoding.tolocal(self.dirstate.branch()))
+                                 % self.dirstate.branch())
                 self.invalidate()
                 self.dirstate.invalidate()
                 self.destroyed()
--- a/mercurial/templatekw.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/templatekw.py	Wed Nov 24 15:56:32 2010 -0600
@@ -148,7 +148,6 @@
 def showbranches(**args):
     branch = args['ctx'].branch()
     if branch != 'default':
-        branch = encoding.tolocal(branch)
         return showlist('branch', [branch], plural='branches', **args)
 
 def showchildren(**args):
--- a/mercurial/wireproto.py	Wed Nov 24 15:38:52 2010 -0600
+++ b/mercurial/wireproto.py	Wed Nov 24 15:56:32 2010 -0600
@@ -44,14 +44,7 @@
             branchmap = {}
             for branchpart in d.splitlines():
                 branchname, branchheads = branchpart.split(' ', 1)
-                branchname = urllib.unquote(branchname)
-                # Earlier servers (1.3.x) send branch names in (their) local
-                # charset. The best we can do is assume it's identical to our
-                # own local charset, in case it's not utf-8.
-                try:
-                    branchname.decode('utf-8')
-                except UnicodeDecodeError:
-                    branchname = encoding.fromlocal(branchname)
+                branchname = encoding.tolocal(urllib.unquote(branchname))
                 branchheads = decodelist(branchheads)
                 branchmap[branchname] = branchheads
             return branchmap
@@ -162,7 +155,7 @@
     branchmap = repo.branchmap()
     heads = []
     for branch, nodes in branchmap.iteritems():
-        branchname = urllib.quote(branch)
+        branchname = urllib.quote(encoding.fromlocal(branch))
         branchnodes = encodelist(nodes)
         heads.append('%s %s' % (branchname, branchnodes))
     return '\n'.join(heads)
--- a/tests/test-encoding.t	Wed Nov 24 15:38:52 2010 -0600
+++ b/tests/test-encoding.t	Wed Nov 24 15:56:32 2010 -0600
@@ -240,6 +240,4 @@
   abort: decoding near '\xe9': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)! (esc)
   [255]
   $ cp latin-1-tag .hg/branch
-  $ HGENCODING=latin-1 hg ci -m 'should fail'
-  abort: branch name not in UTF-8!
-  [255]
+  $ HGENCODING=latin-1 hg ci -m 'auto-promote legacy name'