changeset 38707:6b5ca1d0aa1e

obsolete: store user name and note in UTF-8 (issue5754) (BC) Before, user names were stored in local encoding and transferred across repositories, which made it impossible to restore non-ASCII user names on different platforms. This patch fixes new markers to be encoded in UTF-8 and decoded back to local encoding when displaying. Existing markers are unfixable so they may result in mojibake. I don't like the API that requires metadata dict to be UTF-8 encoded, which is a source of bugs, but there's no abstraction layer to process the encoding thingy efficiently. So we apply the same rule as extras dict to obsstore metadata.
author Yuya Nishihara <yuya@tcha.org>
date Sun, 15 Jul 2018 18:24:57 +0900
parents 83d965803325
children ff1182d166a2
files mercurial/cmdutil.py mercurial/debugcommands.py mercurial/obsolete.py mercurial/obsutil.py tests/test-obsmarker-template.t
diffstat 5 files changed, 69 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/cmdutil.py	Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/cmdutil.py	Sun Jul 15 18:24:57 2018 +0900
@@ -2555,7 +2555,7 @@
         mapping = {old.node(): (newid,)}
         obsmetadata = None
         if opts.get('note'):
-            obsmetadata = {'note': opts['note']}
+            obsmetadata = {'note': encoding.fromlocal(opts['note'])}
         scmutil.cleanupnodes(repo, mapping, 'amend', metadata=obsmetadata,
                              fixphase=True, targetphase=commitphase)
 
--- a/mercurial/debugcommands.py	Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/debugcommands.py	Sun Jul 15 18:24:57 2018 +0900
@@ -1619,7 +1619,7 @@
         if opts['rev']:
             raise error.Abort('cannot select revision when creating marker')
         metadata = {}
-        metadata['user'] = opts['user'] or ui.username()
+        metadata['user'] = encoding.fromlocal(opts['user'] or ui.username())
         succs = tuple(parsenodeid(succ) for succ in successors)
         l = repo.lock()
         try:
--- a/mercurial/obsolete.py	Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/obsolete.py	Sun Jul 15 18:24:57 2018 +0900
@@ -74,6 +74,7 @@
 
 from .i18n import _
 from . import (
+    encoding,
     error,
     node,
     obsutil,
@@ -526,7 +527,7 @@
     # prec:    nodeid, predecessors changesets
     # succs:   tuple of nodeid, successor changesets (0-N length)
     # flag:    integer, flag field carrying modifier for the markers (see doc)
-    # meta:    binary blob, encoded metadata dictionary
+    # meta:    binary blob in UTF-8, encoded metadata dictionary
     # date:    (float, int) tuple, date of marker creation
     # parents: (tuple of nodeid) or None, parents of predecessors
     #          None is used when no data has been recorded
@@ -950,7 +951,8 @@
     <relations> must be an iterable of (<old>, (<new>, ...)[,{metadata}])
     tuple. `old` and `news` are changectx. metadata is an optional dictionary
     containing metadata for this marker only. It is merged with the global
-    metadata specified through the `metadata` argument of this function,
+    metadata specified through the `metadata` argument of this function.
+    Any string values in metadata must be UTF-8 bytes.
 
     Trying to obsolete a public changeset will raise an exception.
 
@@ -964,11 +966,8 @@
     if metadata is None:
         metadata = {}
     if 'user' not in metadata:
-        develuser = repo.ui.config('devel', 'user.obsmarker')
-        if develuser:
-            metadata['user'] = develuser
-        else:
-            metadata['user'] = repo.ui.username()
+        luser = repo.ui.config('devel', 'user.obsmarker') or repo.ui.username()
+        metadata['user'] = encoding.fromlocal(luser)
 
     # Operation metadata handling
     useoperation = repo.ui.configbool('experimental',
--- a/mercurial/obsutil.py	Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/obsutil.py	Sun Jul 15 18:24:57 2018 +0900
@@ -12,6 +12,7 @@
 from .i18n import _
 from . import (
     diffutil,
+    encoding,
     node as nodemod,
     phases,
     util,
@@ -822,7 +823,8 @@
     """ Returns a sorted list of markers users without duplicates
     """
     markersmeta = [dict(m[3]) for m in markers]
-    users = set(meta['user'] for meta in markersmeta if meta.get('user'))
+    users = set(encoding.tolocal(meta['user']) for meta in markersmeta
+                if meta.get('user'))
 
     return sorted(users)
 
--- a/tests/test-obsmarker-template.t	Sun Jul 15 18:22:40 2018 +0900
+++ b/tests/test-obsmarker-template.t	Sun Jul 15 18:24:57 2018 +0900
@@ -2581,3 +2581,61 @@
      date:        Thu Jan 01 00:00:00 1970 +0000
      summary:     ROOT
   
+
+Test metadata encoding (issue5754)
+==================================
+
+  $ hg init $TESTTMP/metadata-encoding
+  $ cd $TESTTMP/metadata-encoding
+  $ cat <<'EOF' >> .hg/hgrc
+  > [extensions]
+  > amend =
+  > EOF
+  $ $PYTHON <<'EOF'
+  > with open('test1', 'wb') as f:
+  >    f.write(b't\xe8st1') and None
+  > with open('test2', 'wb') as f:
+  >    f.write(b't\xe8st2') and None
+  > EOF
+  $ mkcommit ROOT
+  $ HGENCODING=latin-1 HGUSER="`cat test1`" mkcommit A0
+  $ echo 42 >> A0
+  $ hg amend -m "A1" --note "`cat test2`"
+  $ HGENCODING=latin-1 hg amend -m "A2" \
+  > --config devel.user.obsmarker="`cat test2`"
+  $ mkcommit B0
+  $ HGENCODING=latin-1 hg debugobsolete -u "`cat test2`" "`getid 'desc(B0)'`"
+  obsoleted 1 changesets
+
+metadata should be stored in UTF-8, and debugobsolete doesn't decode it to
+local encoding since the command is supposed to show unmodified content:
+
+  $ HGENCODING=latin-1 hg debugobsolete
+  5f66a482f0bb2fcaccfc215554ad5eb9f40b50f5 718c0d00cee1429bdb73064e0d88908c601507a8 0 (Thu Jan 01 00:00:00 1970 +0000) {'ef1': '9', 'note': 't\xc3\xa8st2', 'operation': 'amend', 'user': 't\xc3\xa8st1'}
+  718c0d00cee1429bdb73064e0d88908c601507a8 1132562159b35bb27e1d6b80c80ee94a1659a4da 0 (Thu Jan 01 00:00:00 1970 +0000) {'ef1': '1', 'operation': 'amend', 'user': 't\xc3\xa8st2'}
+  e1724525bc3bec4472d7915a02811b938004a7a2 0 (Thu Jan 01 00:00:00 1970 +0000) {'user': 't\xc3\xa8st2'}
+
+metadata should be converted back to local encoding when displaying:
+
+  $ HGENCODING=latin-1 hg fatelog --hidden
+  @  e1724525bc3b
+  |    Obsfate: pruned by t\xe8st2 (at 1970-01-01 00:00 +0000); (esc)
+  o  1132562159b3
+  |
+  | x  718c0d00cee1
+  |/     Obsfate: rewritten using amend as 3:1132562159b3 by t\xe8st2 (at 1970-01-01 00:00 +0000); (esc)
+  | x  5f66a482f0bb
+  |/     Obsfate: rewritten using amend as 2:718c0d00cee1 by t\xe8st1 (at 1970-01-01 00:00 +0000); (esc)
+  o  ea207398892e
+  
+  $ HGENCODING=utf-8 hg fatelog --hidden
+  @  e1724525bc3b
+  |    Obsfate: pruned by t\xc3\xa8st2 (at 1970-01-01 00:00 +0000); (esc)
+  o  1132562159b3
+  |
+  | x  718c0d00cee1
+  |/     Obsfate: rewritten using amend as 3:1132562159b3 by t\xc3\xa8st2 (at 1970-01-01 00:00 +0000); (esc)
+  | x  5f66a482f0bb
+  |/     Obsfate: rewritten using amend as 2:718c0d00cee1 by t\xc3\xa8st1 (at 1970-01-01 00:00 +0000); (esc)
+  o  ea207398892e
+