comparison hgext/fsmonitor/__init__.py @ 31846:1064a296a2a7

fsmonitor: match watchman and filesystem encoding watchman's paths encoding can differ from filesystem encoding. For example, on Windows, it's always utf-8. Before this patch, on Windows, mismatch in path comparison between fsmonitor state and osutil.statfiles would yield a clean status for added/modified files. In addition to status reporting wrong results, this leads to files being discarded from changesets while doing history editing operations such as rebase. Benchmark: There is a little overhead at module import: python -m timeit "import hgext.fsmonitor" Windows before patch: 1000000 loops, best of 3: 0.563 usec per loop Windows after patch: 1000000 loops, best of 3: 0.583 usec per loop Linx before patch: 1000000 loops, best of 3: 0.579 usec per loop Linux after patch: 1000000 loops, best of 3: 0.588 usec per loop 10000 calls to _watchmantofsencoding: python -m timeit -s "from hgext.fsmonitor import _watchmantofsencoding, _fixencoding" "fname = '/path/to/file'" "for i in range(10000):" " if _fixencoding: fname = _watchmantofsencoding(fname)" Windows (_fixencoding is True): 100 loops, best of 3: 19.5 msec per loop Linux (_fixencoding is False): 100 loops, best of 3: 3.08 msec per loop
author Olivier Trempe <oliviertrempe@gmail.com>
date Wed, 08 Mar 2017 09:03:42 -0500
parents 6ada1658fc6b
children bf3af0eced44 20bac46f7744
comparison
equal deleted inserted replaced
31845:86246530b8d2 31846:1064a296a2a7
89 # The issues related to nested repos and subrepos are probably not fundamental 89 # The issues related to nested repos and subrepos are probably not fundamental
90 # ones. Patches to fix them are welcome. 90 # ones. Patches to fix them are welcome.
91 91
92 from __future__ import absolute_import 92 from __future__ import absolute_import
93 93
94 import codecs
94 import hashlib 95 import hashlib
95 import os 96 import os
96 import stat 97 import stat
98 import sys
97 99
98 from mercurial.i18n import _ 100 from mercurial.i18n import _
99 from mercurial import ( 101 from mercurial import (
100 context, 102 context,
101 encoding, 103 encoding,
104 error,
102 extensions, 105 extensions,
103 localrepo, 106 localrepo,
104 merge, 107 merge,
105 pathutil, 108 pathutil,
106 pycompat, 109 pycompat,
108 util, 111 util,
109 ) 112 )
110 from mercurial import match as matchmod 113 from mercurial import match as matchmod
111 114
112 from . import ( 115 from . import (
116 pywatchman,
113 state, 117 state,
114 watchmanclient, 118 watchmanclient,
115 ) 119 )
116 120
117 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for 121 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
156 if util.safehasattr(ignore, '_files'): 160 if util.safehasattr(ignore, '_files'):
157 for f in ignore._files: 161 for f in ignore._files:
158 sha1.update(f) 162 sha1.update(f)
159 sha1.update('\0') 163 sha1.update('\0')
160 return sha1.hexdigest() 164 return sha1.hexdigest()
165
166 _watchmanencoding = pywatchman.encoding.get_local_encoding()
167 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
168 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
169
170 def _watchmantofsencoding(path):
171 """Fix path to match watchman and local filesystem encoding
172
173 watchman's paths encoding can differ from filesystem encoding. For example,
174 on Windows, it's always utf-8.
175 """
176 try:
177 decoded = path.decode(_watchmanencoding)
178 except UnicodeDecodeError as e:
179 raise error.Abort(str(e), hint='watchman encoding error')
180
181 try:
182 encoded = decoded.encode(_fsencoding, 'strict')
183 except UnicodeEncodeError as e:
184 raise error.Abort(str(e))
185
186 return encoded
161 187
162 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True): 188 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
163 '''Replacement for dirstate.walk, hooking into Watchman. 189 '''Replacement for dirstate.walk, hooking into Watchman.
164 190
165 Whenever full is False, ignored is False, and the Watchman client is 191 Whenever full is False, ignored is False, and the Watchman client is
301 # as being happens-after the exists=False entries due to the way that 327 # as being happens-after the exists=False entries due to the way that
302 # Watchman tracks files. We use this property to reconcile deletes 328 # Watchman tracks files. We use this property to reconcile deletes
303 # for name case changes. 329 # for name case changes.
304 for entry in result['files']: 330 for entry in result['files']:
305 fname = entry['name'] 331 fname = entry['name']
332 if _fixencoding:
333 fname = _watchmantofsencoding(fname)
306 if switch_slashes: 334 if switch_slashes:
307 fname = fname.replace('\\', '/') 335 fname = fname.replace('\\', '/')
308 if normalize: 336 if normalize:
309 normed = normcase(fname) 337 normed = normcase(fname)
310 fname = normalize(fname, True, True) 338 fname = normalize(fname, True, True)