comparison mercurial/vfs.py @ 31217:0f31830fbfc4

vfs: extract 'vfs' class and related code to a new 'vfs' module (API) The 'scmutil' is growing large (1500+ lines) and 2/5 of it is related to vfs. We extract the 'vfs' related code in its own module get both module back to a better scale and clearer contents. We keep all the references available in 'scmutil' for now as many reference needs to be updated.
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
date Wed, 01 Mar 2017 11:00:12 +0100
parents mercurial/scmutil.py@6cf2857526c7
children 8908f985570c
comparison
equal deleted inserted replaced
31216:21fa3d3688f3 31217:0f31830fbfc4
1 # vfs.py - Mercurial 'vfs' classes
2 #
3 # Copyright Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
8
9 import contextlib
10 import errno
11 import os
12 import shutil
13 import stat
14 import tempfile
15 import threading
16
17 from .i18n import _
18 from . import (
19 error,
20 osutil,
21 pathutil,
22 pycompat,
23 util,
24 )
25
26 class abstractvfs(object):
27 """Abstract base class; cannot be instantiated"""
28
29 def __init__(self, *args, **kwargs):
30 '''Prevent instantiation; don't call this from subclasses.'''
31 raise NotImplementedError('attempted instantiating ' + str(type(self)))
32
33 def tryread(self, path):
34 '''gracefully return an empty string for missing files'''
35 try:
36 return self.read(path)
37 except IOError as inst:
38 if inst.errno != errno.ENOENT:
39 raise
40 return ""
41
42 def tryreadlines(self, path, mode='rb'):
43 '''gracefully return an empty array for missing files'''
44 try:
45 return self.readlines(path, mode=mode)
46 except IOError as inst:
47 if inst.errno != errno.ENOENT:
48 raise
49 return []
50
51 @util.propertycache
52 def open(self):
53 '''Open ``path`` file, which is relative to vfs root.
54
55 Newly created directories are marked as "not to be indexed by
56 the content indexing service", if ``notindexed`` is specified
57 for "write" mode access.
58 '''
59 return self.__call__
60
61 def read(self, path):
62 with self(path, 'rb') as fp:
63 return fp.read()
64
65 def readlines(self, path, mode='rb'):
66 with self(path, mode=mode) as fp:
67 return fp.readlines()
68
69 def write(self, path, data, backgroundclose=False):
70 with self(path, 'wb', backgroundclose=backgroundclose) as fp:
71 return fp.write(data)
72
73 def writelines(self, path, data, mode='wb', notindexed=False):
74 with self(path, mode=mode, notindexed=notindexed) as fp:
75 return fp.writelines(data)
76
77 def append(self, path, data):
78 with self(path, 'ab') as fp:
79 return fp.write(data)
80
81 def basename(self, path):
82 """return base element of a path (as os.path.basename would do)
83
84 This exists to allow handling of strange encoding if needed."""
85 return os.path.basename(path)
86
87 def chmod(self, path, mode):
88 return os.chmod(self.join(path), mode)
89
90 def dirname(self, path):
91 """return dirname element of a path (as os.path.dirname would do)
92
93 This exists to allow handling of strange encoding if needed."""
94 return os.path.dirname(path)
95
96 def exists(self, path=None):
97 return os.path.exists(self.join(path))
98
99 def fstat(self, fp):
100 return util.fstat(fp)
101
102 def isdir(self, path=None):
103 return os.path.isdir(self.join(path))
104
105 def isfile(self, path=None):
106 return os.path.isfile(self.join(path))
107
108 def islink(self, path=None):
109 return os.path.islink(self.join(path))
110
111 def isfileorlink(self, path=None):
112 '''return whether path is a regular file or a symlink
113
114 Unlike isfile, this doesn't follow symlinks.'''
115 try:
116 st = self.lstat(path)
117 except OSError:
118 return False
119 mode = st.st_mode
120 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
121
122 def reljoin(self, *paths):
123 """join various elements of a path together (as os.path.join would do)
124
125 The vfs base is not injected so that path stay relative. This exists
126 to allow handling of strange encoding if needed."""
127 return os.path.join(*paths)
128
129 def split(self, path):
130 """split top-most element of a path (as os.path.split would do)
131
132 This exists to allow handling of strange encoding if needed."""
133 return os.path.split(path)
134
135 def lexists(self, path=None):
136 return os.path.lexists(self.join(path))
137
138 def lstat(self, path=None):
139 return os.lstat(self.join(path))
140
141 def listdir(self, path=None):
142 return os.listdir(self.join(path))
143
144 def makedir(self, path=None, notindexed=True):
145 return util.makedir(self.join(path), notindexed)
146
147 def makedirs(self, path=None, mode=None):
148 return util.makedirs(self.join(path), mode)
149
150 def makelock(self, info, path):
151 return util.makelock(info, self.join(path))
152
153 def mkdir(self, path=None):
154 return os.mkdir(self.join(path))
155
156 def mkstemp(self, suffix='', prefix='tmp', dir=None, text=False):
157 fd, name = tempfile.mkstemp(suffix=suffix, prefix=prefix,
158 dir=self.join(dir), text=text)
159 dname, fname = util.split(name)
160 if dir:
161 return fd, os.path.join(dir, fname)
162 else:
163 return fd, fname
164
165 def readdir(self, path=None, stat=None, skip=None):
166 return osutil.listdir(self.join(path), stat, skip)
167
168 def readlock(self, path):
169 return util.readlock(self.join(path))
170
171 def rename(self, src, dst, checkambig=False):
172 """Rename from src to dst
173
174 checkambig argument is used with util.filestat, and is useful
175 only if destination file is guarded by any lock
176 (e.g. repo.lock or repo.wlock).
177 """
178 dstpath = self.join(dst)
179 oldstat = checkambig and util.filestat(dstpath)
180 if oldstat and oldstat.stat:
181 ret = util.rename(self.join(src), dstpath)
182 newstat = util.filestat(dstpath)
183 if newstat.isambig(oldstat):
184 # stat of renamed file is ambiguous to original one
185 newstat.avoidambig(dstpath, oldstat)
186 return ret
187 return util.rename(self.join(src), dstpath)
188
189 def readlink(self, path):
190 return os.readlink(self.join(path))
191
192 def removedirs(self, path=None):
193 """Remove a leaf directory and all empty intermediate ones
194 """
195 return util.removedirs(self.join(path))
196
197 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
198 """Remove a directory tree recursively
199
200 If ``forcibly``, this tries to remove READ-ONLY files, too.
201 """
202 if forcibly:
203 def onerror(function, path, excinfo):
204 if function is not os.remove:
205 raise
206 # read-only files cannot be unlinked under Windows
207 s = os.stat(path)
208 if (s.st_mode & stat.S_IWRITE) != 0:
209 raise
210 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
211 os.remove(path)
212 else:
213 onerror = None
214 return shutil.rmtree(self.join(path),
215 ignore_errors=ignore_errors, onerror=onerror)
216
217 def setflags(self, path, l, x):
218 return util.setflags(self.join(path), l, x)
219
220 def stat(self, path=None):
221 return os.stat(self.join(path))
222
223 def unlink(self, path=None):
224 return util.unlink(self.join(path))
225
226 def unlinkpath(self, path=None, ignoremissing=False):
227 return util.unlinkpath(self.join(path), ignoremissing)
228
229 def utime(self, path=None, t=None):
230 return os.utime(self.join(path), t)
231
232 def walk(self, path=None, onerror=None):
233 """Yield (dirpath, dirs, files) tuple for each directories under path
234
235 ``dirpath`` is relative one from the root of this vfs. This
236 uses ``os.sep`` as path separator, even you specify POSIX
237 style ``path``.
238
239 "The root of this vfs" is represented as empty ``dirpath``.
240 """
241 root = os.path.normpath(self.join(None))
242 # when dirpath == root, dirpath[prefixlen:] becomes empty
243 # because len(dirpath) < prefixlen.
244 prefixlen = len(pathutil.normasprefix(root))
245 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
246 yield (dirpath[prefixlen:], dirs, files)
247
248 @contextlib.contextmanager
249 def backgroundclosing(self, ui, expectedcount=-1):
250 """Allow files to be closed asynchronously.
251
252 When this context manager is active, ``backgroundclose`` can be passed
253 to ``__call__``/``open`` to result in the file possibly being closed
254 asynchronously, on a background thread.
255 """
256 # This is an arbitrary restriction and could be changed if we ever
257 # have a use case.
258 vfs = getattr(self, 'vfs', self)
259 if getattr(vfs, '_backgroundfilecloser', None):
260 raise error.Abort(
261 _('can only have 1 active background file closer'))
262
263 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
264 try:
265 vfs._backgroundfilecloser = bfc
266 yield bfc
267 finally:
268 vfs._backgroundfilecloser = None
269
270 class vfs(abstractvfs):
271 '''Operate files relative to a base directory
272
273 This class is used to hide the details of COW semantics and
274 remote file access from higher level code.
275 '''
276 def __init__(self, base, audit=True, expandpath=False, realpath=False):
277 if expandpath:
278 base = util.expandpath(base)
279 if realpath:
280 base = os.path.realpath(base)
281 self.base = base
282 self.mustaudit = audit
283 self.createmode = None
284 self._trustnlink = None
285
286 @property
287 def mustaudit(self):
288 return self._audit
289
290 @mustaudit.setter
291 def mustaudit(self, onoff):
292 self._audit = onoff
293 if onoff:
294 self.audit = pathutil.pathauditor(self.base)
295 else:
296 self.audit = util.always
297
298 @util.propertycache
299 def _cansymlink(self):
300 return util.checklink(self.base)
301
302 @util.propertycache
303 def _chmod(self):
304 return util.checkexec(self.base)
305
306 def _fixfilemode(self, name):
307 if self.createmode is None or not self._chmod:
308 return
309 os.chmod(name, self.createmode & 0o666)
310
311 def __call__(self, path, mode="r", text=False, atomictemp=False,
312 notindexed=False, backgroundclose=False, checkambig=False):
313 '''Open ``path`` file, which is relative to vfs root.
314
315 Newly created directories are marked as "not to be indexed by
316 the content indexing service", if ``notindexed`` is specified
317 for "write" mode access.
318
319 If ``backgroundclose`` is passed, the file may be closed asynchronously.
320 It can only be used if the ``self.backgroundclosing()`` context manager
321 is active. This should only be specified if the following criteria hold:
322
323 1. There is a potential for writing thousands of files. Unless you
324 are writing thousands of files, the performance benefits of
325 asynchronously closing files is not realized.
326 2. Files are opened exactly once for the ``backgroundclosing``
327 active duration and are therefore free of race conditions between
328 closing a file on a background thread and reopening it. (If the
329 file were opened multiple times, there could be unflushed data
330 because the original file handle hasn't been flushed/closed yet.)
331
332 ``checkambig`` argument is passed to atomictemplfile (valid
333 only for writing), and is useful only if target file is
334 guarded by any lock (e.g. repo.lock or repo.wlock).
335 '''
336 if self._audit:
337 r = util.checkosfilename(path)
338 if r:
339 raise error.Abort("%s: %r" % (r, path))
340 self.audit(path)
341 f = self.join(path)
342
343 if not text and "b" not in mode:
344 mode += "b" # for that other OS
345
346 nlink = -1
347 if mode not in ('r', 'rb'):
348 dirname, basename = util.split(f)
349 # If basename is empty, then the path is malformed because it points
350 # to a directory. Let the posixfile() call below raise IOError.
351 if basename:
352 if atomictemp:
353 util.makedirs(dirname, self.createmode, notindexed)
354 return util.atomictempfile(f, mode, self.createmode,
355 checkambig=checkambig)
356 try:
357 if 'w' in mode:
358 util.unlink(f)
359 nlink = 0
360 else:
361 # nlinks() may behave differently for files on Windows
362 # shares if the file is open.
363 with util.posixfile(f):
364 nlink = util.nlinks(f)
365 if nlink < 1:
366 nlink = 2 # force mktempcopy (issue1922)
367 except (OSError, IOError) as e:
368 if e.errno != errno.ENOENT:
369 raise
370 nlink = 0
371 util.makedirs(dirname, self.createmode, notindexed)
372 if nlink > 0:
373 if self._trustnlink is None:
374 self._trustnlink = nlink > 1 or util.checknlink(f)
375 if nlink > 1 or not self._trustnlink:
376 util.rename(util.mktempcopy(f), f)
377 fp = util.posixfile(f, mode)
378 if nlink == 0:
379 self._fixfilemode(f)
380
381 if checkambig:
382 if mode in ('r', 'rb'):
383 raise error.Abort(_('implementation error: mode %s is not'
384 ' valid for checkambig=True') % mode)
385 fp = checkambigatclosing(fp)
386
387 if backgroundclose:
388 if not self._backgroundfilecloser:
389 raise error.Abort(_('backgroundclose can only be used when a '
390 'backgroundclosing context manager is active')
391 )
392
393 fp = delayclosedfile(fp, self._backgroundfilecloser)
394
395 return fp
396
397 def symlink(self, src, dst):
398 self.audit(dst)
399 linkname = self.join(dst)
400 try:
401 os.unlink(linkname)
402 except OSError:
403 pass
404
405 util.makedirs(os.path.dirname(linkname), self.createmode)
406
407 if self._cansymlink:
408 try:
409 os.symlink(src, linkname)
410 except OSError as err:
411 raise OSError(err.errno, _('could not symlink to %r: %s') %
412 (src, err.strerror), linkname)
413 else:
414 self.write(dst, src)
415
416 def join(self, path, *insidef):
417 if path:
418 return os.path.join(self.base, path, *insidef)
419 else:
420 return self.base
421
422 opener = vfs
423
424 class auditvfs(object):
425 def __init__(self, vfs):
426 self.vfs = vfs
427
428 @property
429 def mustaudit(self):
430 return self.vfs.mustaudit
431
432 @mustaudit.setter
433 def mustaudit(self, onoff):
434 self.vfs.mustaudit = onoff
435
436 @property
437 def options(self):
438 return self.vfs.options
439
440 @options.setter
441 def options(self, value):
442 self.vfs.options = value
443
444 class filtervfs(abstractvfs, auditvfs):
445 '''Wrapper vfs for filtering filenames with a function.'''
446
447 def __init__(self, vfs, filter):
448 auditvfs.__init__(self, vfs)
449 self._filter = filter
450
451 def __call__(self, path, *args, **kwargs):
452 return self.vfs(self._filter(path), *args, **kwargs)
453
454 def join(self, path, *insidef):
455 if path:
456 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
457 else:
458 return self.vfs.join(path)
459
460 filteropener = filtervfs
461
462 class readonlyvfs(abstractvfs, auditvfs):
463 '''Wrapper vfs preventing any writing.'''
464
465 def __init__(self, vfs):
466 auditvfs.__init__(self, vfs)
467
468 def __call__(self, path, mode='r', *args, **kw):
469 if mode not in ('r', 'rb'):
470 raise error.Abort(_('this vfs is read only'))
471 return self.vfs(path, mode, *args, **kw)
472
473 def join(self, path, *insidef):
474 return self.vfs.join(path, *insidef)
475
476 class closewrapbase(object):
477 """Base class of wrapper, which hooks closing
478
479 Do not instantiate outside of the vfs layer.
480 """
481 def __init__(self, fh):
482 object.__setattr__(self, '_origfh', fh)
483
484 def __getattr__(self, attr):
485 return getattr(self._origfh, attr)
486
487 def __setattr__(self, attr, value):
488 return setattr(self._origfh, attr, value)
489
490 def __delattr__(self, attr):
491 return delattr(self._origfh, attr)
492
493 def __enter__(self):
494 return self._origfh.__enter__()
495
496 def __exit__(self, exc_type, exc_value, exc_tb):
497 raise NotImplementedError('attempted instantiating ' + str(type(self)))
498
499 def close(self):
500 raise NotImplementedError('attempted instantiating ' + str(type(self)))
501
502 class delayclosedfile(closewrapbase):
503 """Proxy for a file object whose close is delayed.
504
505 Do not instantiate outside of the vfs layer.
506 """
507 def __init__(self, fh, closer):
508 super(delayclosedfile, self).__init__(fh)
509 object.__setattr__(self, '_closer', closer)
510
511 def __exit__(self, exc_type, exc_value, exc_tb):
512 self._closer.close(self._origfh)
513
514 def close(self):
515 self._closer.close(self._origfh)
516
517 class backgroundfilecloser(object):
518 """Coordinates background closing of file handles on multiple threads."""
519 def __init__(self, ui, expectedcount=-1):
520 self._running = False
521 self._entered = False
522 self._threads = []
523 self._threadexception = None
524
525 # Only Windows/NTFS has slow file closing. So only enable by default
526 # on that platform. But allow to be enabled elsewhere for testing.
527 defaultenabled = pycompat.osname == 'nt'
528 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
529
530 if not enabled:
531 return
532
533 # There is overhead to starting and stopping the background threads.
534 # Don't do background processing unless the file count is large enough
535 # to justify it.
536 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount',
537 2048)
538 # FUTURE dynamically start background threads after minfilecount closes.
539 # (We don't currently have any callers that don't know their file count)
540 if expectedcount > 0 and expectedcount < minfilecount:
541 return
542
543 # Windows defaults to a limit of 512 open files. A buffer of 128
544 # should give us enough headway.
545 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue', 384)
546 threadcount = ui.configint('worker', 'backgroundclosethreadcount', 4)
547
548 ui.debug('starting %d threads for background file closing\n' %
549 threadcount)
550
551 self._queue = util.queue(maxsize=maxqueue)
552 self._running = True
553
554 for i in range(threadcount):
555 t = threading.Thread(target=self._worker, name='backgroundcloser')
556 self._threads.append(t)
557 t.start()
558
559 def __enter__(self):
560 self._entered = True
561 return self
562
563 def __exit__(self, exc_type, exc_value, exc_tb):
564 self._running = False
565
566 # Wait for threads to finish closing so open files don't linger for
567 # longer than lifetime of context manager.
568 for t in self._threads:
569 t.join()
570
571 def _worker(self):
572 """Main routine for worker thread."""
573 while True:
574 try:
575 fh = self._queue.get(block=True, timeout=0.100)
576 # Need to catch or the thread will terminate and
577 # we could orphan file descriptors.
578 try:
579 fh.close()
580 except Exception as e:
581 # Stash so can re-raise from main thread later.
582 self._threadexception = e
583 except util.empty:
584 if not self._running:
585 break
586
587 def close(self, fh):
588 """Schedule a file for closing."""
589 if not self._entered:
590 raise error.Abort(_('can only call close() when context manager '
591 'active'))
592
593 # If a background thread encountered an exception, raise now so we fail
594 # fast. Otherwise we may potentially go on for minutes until the error
595 # is acted on.
596 if self._threadexception:
597 e = self._threadexception
598 self._threadexception = None
599 raise e
600
601 # If we're not actively running, close synchronously.
602 if not self._running:
603 fh.close()
604 return
605
606 self._queue.put(fh, block=True, timeout=None)
607
608 class checkambigatclosing(closewrapbase):
609 """Proxy for a file object, to avoid ambiguity of file stat
610
611 See also util.filestat for detail about "ambiguity of file stat".
612
613 This proxy is useful only if the target file is guarded by any
614 lock (e.g. repo.lock or repo.wlock)
615
616 Do not instantiate outside of the vfs layer.
617 """
618 def __init__(self, fh):
619 super(checkambigatclosing, self).__init__(fh)
620 object.__setattr__(self, '_oldstat', util.filestat(fh.name))
621
622 def _checkambig(self):
623 oldstat = self._oldstat
624 if oldstat.stat:
625 newstat = util.filestat(self._origfh.name)
626 if newstat.isambig(oldstat):
627 # stat of changed file is ambiguous to original one
628 newstat.avoidambig(self._origfh.name, oldstat)
629
630 def __exit__(self, exc_type, exc_value, exc_tb):
631 self._origfh.__exit__(exc_type, exc_value, exc_tb)
632 self._checkambig()
633
634 def close(self):
635 self._origfh.close()
636 self._checkambig()