obsolete: store user name and note in UTF-8 (
issue5754) (BC)
Before, user names were stored in local encoding and transferred across
repositories, which made it impossible to restore non-ASCII user names on
different platforms. This patch fixes new markers to be encoded in UTF-8
and decoded back to local encoding when displaying. Existing markers are
unfixable so they may result in mojibake.
I don't like the API that requires metadata dict to be UTF-8 encoded, which
is a source of bugs, but there's no abstraction layer to process the encoding
thingy efficiently. So we apply the same rule as extras dict to obsstore
metadata.
--- a/mercurial/cmdutil.py Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/cmdutil.py Sun Jul 15 18:24:57 2018 +0900
@@ -2555,7 +2555,7 @@
mapping = {old.node(): (newid,)}
obsmetadata = None
if opts.get('note'):
- obsmetadata = {'note': opts['note']}
+ obsmetadata = {'note': encoding.fromlocal(opts['note'])}
scmutil.cleanupnodes(repo, mapping, 'amend', metadata=obsmetadata,
fixphase=True, targetphase=commitphase)
--- a/mercurial/debugcommands.py Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/debugcommands.py Sun Jul 15 18:24:57 2018 +0900
@@ -1619,7 +1619,7 @@
if opts['rev']:
raise error.Abort('cannot select revision when creating marker')
metadata = {}
- metadata['user'] = opts['user'] or ui.username()
+ metadata['user'] = encoding.fromlocal(opts['user'] or ui.username())
succs = tuple(parsenodeid(succ) for succ in successors)
l = repo.lock()
try:
--- a/mercurial/obsolete.py Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/obsolete.py Sun Jul 15 18:24:57 2018 +0900
@@ -74,6 +74,7 @@
from .i18n import _
from . import (
+ encoding,
error,
node,
obsutil,
@@ -526,7 +527,7 @@
# prec: nodeid, predecessors changesets
# succs: tuple of nodeid, successor changesets (0-N length)
# flag: integer, flag field carrying modifier for the markers (see doc)
- # meta: binary blob, encoded metadata dictionary
+ # meta: binary blob in UTF-8, encoded metadata dictionary
# date: (float, int) tuple, date of marker creation
# parents: (tuple of nodeid) or None, parents of predecessors
# None is used when no data has been recorded
@@ -950,7 +951,8 @@
<relations> must be an iterable of (<old>, (<new>, ...)[,{metadata}])
tuple. `old` and `news` are changectx. metadata is an optional dictionary
containing metadata for this marker only. It is merged with the global
- metadata specified through the `metadata` argument of this function,
+ metadata specified through the `metadata` argument of this function.
+ Any string values in metadata must be UTF-8 bytes.
Trying to obsolete a public changeset will raise an exception.
@@ -964,11 +966,8 @@
if metadata is None:
metadata = {}
if 'user' not in metadata:
- develuser = repo.ui.config('devel', 'user.obsmarker')
- if develuser:
- metadata['user'] = develuser
- else:
- metadata['user'] = repo.ui.username()
+ luser = repo.ui.config('devel', 'user.obsmarker') or repo.ui.username()
+ metadata['user'] = encoding.fromlocal(luser)
# Operation metadata handling
useoperation = repo.ui.configbool('experimental',
--- a/mercurial/obsutil.py Sun Jul 15 18:22:40 2018 +0900
+++ b/mercurial/obsutil.py Sun Jul 15 18:24:57 2018 +0900
@@ -12,6 +12,7 @@
from .i18n import _
from . import (
diffutil,
+ encoding,
node as nodemod,
phases,
util,
@@ -822,7 +823,8 @@
""" Returns a sorted list of markers users without duplicates
"""
markersmeta = [dict(m[3]) for m in markers]
- users = set(meta['user'] for meta in markersmeta if meta.get('user'))
+ users = set(encoding.tolocal(meta['user']) for meta in markersmeta
+ if meta.get('user'))
return sorted(users)
--- a/tests/test-obsmarker-template.t Sun Jul 15 18:22:40 2018 +0900
+++ b/tests/test-obsmarker-template.t Sun Jul 15 18:24:57 2018 +0900
@@ -2581,3 +2581,61 @@
date: Thu Jan 01 00:00:00 1970 +0000
summary: ROOT
+
+Test metadata encoding (issue5754)
+==================================
+
+ $ hg init $TESTTMP/metadata-encoding
+ $ cd $TESTTMP/metadata-encoding
+ $ cat <<'EOF' >> .hg/hgrc
+ > [extensions]
+ > amend =
+ > EOF
+ $ $PYTHON <<'EOF'
+ > with open('test1', 'wb') as f:
+ > f.write(b't\xe8st1') and None
+ > with open('test2', 'wb') as f:
+ > f.write(b't\xe8st2') and None
+ > EOF
+ $ mkcommit ROOT
+ $ HGENCODING=latin-1 HGUSER="`cat test1`" mkcommit A0
+ $ echo 42 >> A0
+ $ hg amend -m "A1" --note "`cat test2`"
+ $ HGENCODING=latin-1 hg amend -m "A2" \
+ > --config devel.user.obsmarker="`cat test2`"
+ $ mkcommit B0
+ $ HGENCODING=latin-1 hg debugobsolete -u "`cat test2`" "`getid 'desc(B0)'`"
+ obsoleted 1 changesets
+
+metadata should be stored in UTF-8, and debugobsolete doesn't decode it to
+local encoding since the command is supposed to show unmodified content:
+
+ $ HGENCODING=latin-1 hg debugobsolete
+ 5f66a482f0bb2fcaccfc215554ad5eb9f40b50f5 718c0d00cee1429bdb73064e0d88908c601507a8 0 (Thu Jan 01 00:00:00 1970 +0000) {'ef1': '9', 'note': 't\xc3\xa8st2', 'operation': 'amend', 'user': 't\xc3\xa8st1'}
+ 718c0d00cee1429bdb73064e0d88908c601507a8 1132562159b35bb27e1d6b80c80ee94a1659a4da 0 (Thu Jan 01 00:00:00 1970 +0000) {'ef1': '1', 'operation': 'amend', 'user': 't\xc3\xa8st2'}
+ e1724525bc3bec4472d7915a02811b938004a7a2 0 (Thu Jan 01 00:00:00 1970 +0000) {'user': 't\xc3\xa8st2'}
+
+metadata should be converted back to local encoding when displaying:
+
+ $ HGENCODING=latin-1 hg fatelog --hidden
+ @ e1724525bc3b
+ | Obsfate: pruned by t\xe8st2 (at 1970-01-01 00:00 +0000); (esc)
+ o 1132562159b3
+ |
+ | x 718c0d00cee1
+ |/ Obsfate: rewritten using amend as 3:1132562159b3 by t\xe8st2 (at 1970-01-01 00:00 +0000); (esc)
+ | x 5f66a482f0bb
+ |/ Obsfate: rewritten using amend as 2:718c0d00cee1 by t\xe8st1 (at 1970-01-01 00:00 +0000); (esc)
+ o ea207398892e
+
+ $ HGENCODING=utf-8 hg fatelog --hidden
+ @ e1724525bc3b
+ | Obsfate: pruned by t\xc3\xa8st2 (at 1970-01-01 00:00 +0000); (esc)
+ o 1132562159b3
+ |
+ | x 718c0d00cee1
+ |/ Obsfate: rewritten using amend as 3:1132562159b3 by t\xc3\xa8st2 (at 1970-01-01 00:00 +0000); (esc)
+ | x 5f66a482f0bb
+ |/ Obsfate: rewritten using amend as 2:718c0d00cee1 by t\xc3\xa8st1 (at 1970-01-01 00:00 +0000); (esc)
+ o ea207398892e
+