comparison mercurial/changelog.py @ 42116:caa067ee21dc

changelog: extract a _string_unescape() to mirror _string_escape() We use our own _string_escape() to encode the "extras" field. Then we use codecs.escape_decode() to escape it. But there's also a little workaround for dealing with escaped text that looks like octal numbers since the fix for https://bz.mercurial-scm.org/show_bug.cgi?id=3156. This patch extracts the call to codecs.escape_decode() along with the fix for octal numbers and puts it in a _string_unescape(). It also updates the test to check for the octal-number case from the aforementioned bug. As you may have suspected, I want to be able to reuse this new function later. Differential Revision: https://phab.mercurial-scm.org/D6184
author Martin von Zweigbergk <martinvonz@google.com>
date Sun, 24 Mar 2019 23:47:01 -0700
parents ae189674bdad
children 0e41f40b01cc
comparison
equal deleted inserted replaced
42115:27475ae67676 42116:caa067ee21dc
33 33
34 def _string_escape(text): 34 def _string_escape(text):
35 """ 35 """
36 >>> from .pycompat import bytechr as chr 36 >>> from .pycompat import bytechr as chr
37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)} 37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d 38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s 39 >>> s
40 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n' 40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 >>> res = _string_escape(s) 41 >>> res = _string_escape(s)
42 >>> s == stringutil.unescapestr(res) 42 >>> s == _string_unescape(res)
43 True 43 True
44 """ 44 """
45 # subset of the string_escape codec 45 # subset of the string_escape codec
46 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r') 46 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
47 return text.replace('\0', '\\0') 47 return text.replace('\0', '\\0')
48
49 def _string_unescape(text):
50 if '\\0' in text:
51 # fix up \0 without getting into trouble with \\0
52 text = text.replace('\\\\', '\\\\\n')
53 text = text.replace('\\0', '\0')
54 text = text.replace('\n', '')
55 return stringutil.unescapestr(text)
48 56
49 def decodeextra(text): 57 def decodeextra(text):
50 """ 58 """
51 >>> from .pycompat import bytechr as chr 59 >>> from .pycompat import bytechr as chr
52 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'}) 60 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
58 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')] 66 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
59 """ 67 """
60 extra = _defaultextra.copy() 68 extra = _defaultextra.copy()
61 for l in text.split('\0'): 69 for l in text.split('\0'):
62 if l: 70 if l:
63 if '\\0' in l: 71 k, v = _string_unescape(l).split(':', 1)
64 # fix up \0 without getting into trouble with \\0
65 l = l.replace('\\\\', '\\\\\n')
66 l = l.replace('\\0', '\0')
67 l = l.replace('\n', '')
68 k, v = stringutil.unescapestr(l).split(':', 1)
69 extra[k] = v 72 extra[k] = v
70 return extra 73 return extra
71 74
72 def encodeextra(d): 75 def encodeextra(d):
73 # keys must be sorted to produce a deterministic changelog entry 76 # keys must be sorted to produce a deterministic changelog entry