comparison mercurial/scmutil.py @ 31217:0f31830fbfc4

vfs: extract 'vfs' class and related code to a new 'vfs' module (API) The 'scmutil' is growing large (1500+ lines) and 2/5 of it is related to vfs. We extract the 'vfs' related code in its own module get both module back to a better scale and clearer contents. We keep all the references available in 'scmutil' for now as many reference needs to be updated.
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
date Wed, 01 Mar 2017 11:00:12 +0100
parents 6cf2857526c7
children 1937671105bc
comparison
equal deleted inserted replaced
31216:21fa3d3688f3 31217:0f31830fbfc4
5 # This software may be used and distributed according to the terms of the 5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version. 6 # GNU General Public License version 2 or any later version.
7 7
8 from __future__ import absolute_import 8 from __future__ import absolute_import
9 9
10 import contextlib
11 import errno 10 import errno
12 import glob 11 import glob
13 import hashlib 12 import hashlib
14 import os 13 import os
15 import re 14 import re
16 import shutil
17 import socket 15 import socket
18 import stat
19 import tempfile
20 import threading
21 16
22 from .i18n import _ 17 from .i18n import _
23 from .node import wdirrev 18 from .node import wdirrev
24 from . import ( 19 from . import (
25 encoding, 20 encoding,
30 phases, 25 phases,
31 pycompat, 26 pycompat,
32 revsetlang, 27 revsetlang,
33 similar, 28 similar,
34 util, 29 util,
30 vfs as vfsmod,
35 ) 31 )
36 32
37 if pycompat.osname == 'nt': 33 if pycompat.osname == 'nt':
38 from . import scmwindows as scmplatform 34 from . import scmwindows as scmplatform
39 else: 35 else:
334 for rev in revs: 330 for rev in revs:
335 s.update('%s;' % rev) 331 s.update('%s;' % rev)
336 key = s.digest() 332 key = s.digest()
337 return key 333 return key
338 334
339 class abstractvfs(object): 335 # compatibility layer since all 'vfs' code moved to 'mercurial.vfs'
340 """Abstract base class; cannot be instantiated""" 336 #
341 337 # This is hard to instal deprecation warning to this since we do not have
342 def __init__(self, *args, **kwargs): 338 # access to a 'ui' object.
343 '''Prevent instantiation; don't call this from subclasses.''' 339 opener = vfs = vfsmod.vfs
344 raise NotImplementedError('attempted instantiating ' + str(type(self))) 340 filteropener = filtervfs = vfsmod.filtervfs
345 341 abstractvfs = vfsmod.abstractvfs
346 def tryread(self, path): 342 readonlyvfs = vfsmod.readonlyvfs
347 '''gracefully return an empty string for missing files''' 343 auditvfs = vfsmod.auditvfs
348 try: 344 checkambigatclosing = vfsmod.checkambigatclosing
349 return self.read(path)
350 except IOError as inst:
351 if inst.errno != errno.ENOENT:
352 raise
353 return ""
354
355 def tryreadlines(self, path, mode='rb'):
356 '''gracefully return an empty array for missing files'''
357 try:
358 return self.readlines(path, mode=mode)
359 except IOError as inst:
360 if inst.errno != errno.ENOENT:
361 raise
362 return []
363
364 @util.propertycache
365 def open(self):
366 '''Open ``path`` file, which is relative to vfs root.
367
368 Newly created directories are marked as "not to be indexed by
369 the content indexing service", if ``notindexed`` is specified
370 for "write" mode access.
371 '''
372 return self.__call__
373
374 def read(self, path):
375 with self(path, 'rb') as fp:
376 return fp.read()
377
378 def readlines(self, path, mode='rb'):
379 with self(path, mode=mode) as fp:
380 return fp.readlines()
381
382 def write(self, path, data, backgroundclose=False):
383 with self(path, 'wb', backgroundclose=backgroundclose) as fp:
384 return fp.write(data)
385
386 def writelines(self, path, data, mode='wb', notindexed=False):
387 with self(path, mode=mode, notindexed=notindexed) as fp:
388 return fp.writelines(data)
389
390 def append(self, path, data):
391 with self(path, 'ab') as fp:
392 return fp.write(data)
393
394 def basename(self, path):
395 """return base element of a path (as os.path.basename would do)
396
397 This exists to allow handling of strange encoding if needed."""
398 return os.path.basename(path)
399
400 def chmod(self, path, mode):
401 return os.chmod(self.join(path), mode)
402
403 def dirname(self, path):
404 """return dirname element of a path (as os.path.dirname would do)
405
406 This exists to allow handling of strange encoding if needed."""
407 return os.path.dirname(path)
408
409 def exists(self, path=None):
410 return os.path.exists(self.join(path))
411
412 def fstat(self, fp):
413 return util.fstat(fp)
414
415 def isdir(self, path=None):
416 return os.path.isdir(self.join(path))
417
418 def isfile(self, path=None):
419 return os.path.isfile(self.join(path))
420
421 def islink(self, path=None):
422 return os.path.islink(self.join(path))
423
424 def isfileorlink(self, path=None):
425 '''return whether path is a regular file or a symlink
426
427 Unlike isfile, this doesn't follow symlinks.'''
428 try:
429 st = self.lstat(path)
430 except OSError:
431 return False
432 mode = st.st_mode
433 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
434
435 def reljoin(self, *paths):
436 """join various elements of a path together (as os.path.join would do)
437
438 The vfs base is not injected so that path stay relative. This exists
439 to allow handling of strange encoding if needed."""
440 return os.path.join(*paths)
441
442 def split(self, path):
443 """split top-most element of a path (as os.path.split would do)
444
445 This exists to allow handling of strange encoding if needed."""
446 return os.path.split(path)
447
448 def lexists(self, path=None):
449 return os.path.lexists(self.join(path))
450
451 def lstat(self, path=None):
452 return os.lstat(self.join(path))
453
454 def listdir(self, path=None):
455 return os.listdir(self.join(path))
456
457 def makedir(self, path=None, notindexed=True):
458 return util.makedir(self.join(path), notindexed)
459
460 def makedirs(self, path=None, mode=None):
461 return util.makedirs(self.join(path), mode)
462
463 def makelock(self, info, path):
464 return util.makelock(info, self.join(path))
465
466 def mkdir(self, path=None):
467 return os.mkdir(self.join(path))
468
469 def mkstemp(self, suffix='', prefix='tmp', dir=None, text=False):
470 fd, name = tempfile.mkstemp(suffix=suffix, prefix=prefix,
471 dir=self.join(dir), text=text)
472 dname, fname = util.split(name)
473 if dir:
474 return fd, os.path.join(dir, fname)
475 else:
476 return fd, fname
477
478 def readdir(self, path=None, stat=None, skip=None):
479 return osutil.listdir(self.join(path), stat, skip)
480
481 def readlock(self, path):
482 return util.readlock(self.join(path))
483
484 def rename(self, src, dst, checkambig=False):
485 """Rename from src to dst
486
487 checkambig argument is used with util.filestat, and is useful
488 only if destination file is guarded by any lock
489 (e.g. repo.lock or repo.wlock).
490 """
491 dstpath = self.join(dst)
492 oldstat = checkambig and util.filestat(dstpath)
493 if oldstat and oldstat.stat:
494 ret = util.rename(self.join(src), dstpath)
495 newstat = util.filestat(dstpath)
496 if newstat.isambig(oldstat):
497 # stat of renamed file is ambiguous to original one
498 newstat.avoidambig(dstpath, oldstat)
499 return ret
500 return util.rename(self.join(src), dstpath)
501
502 def readlink(self, path):
503 return os.readlink(self.join(path))
504
505 def removedirs(self, path=None):
506 """Remove a leaf directory and all empty intermediate ones
507 """
508 return util.removedirs(self.join(path))
509
510 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
511 """Remove a directory tree recursively
512
513 If ``forcibly``, this tries to remove READ-ONLY files, too.
514 """
515 if forcibly:
516 def onerror(function, path, excinfo):
517 if function is not os.remove:
518 raise
519 # read-only files cannot be unlinked under Windows
520 s = os.stat(path)
521 if (s.st_mode & stat.S_IWRITE) != 0:
522 raise
523 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
524 os.remove(path)
525 else:
526 onerror = None
527 return shutil.rmtree(self.join(path),
528 ignore_errors=ignore_errors, onerror=onerror)
529
530 def setflags(self, path, l, x):
531 return util.setflags(self.join(path), l, x)
532
533 def stat(self, path=None):
534 return os.stat(self.join(path))
535
536 def unlink(self, path=None):
537 return util.unlink(self.join(path))
538
539 def unlinkpath(self, path=None, ignoremissing=False):
540 return util.unlinkpath(self.join(path), ignoremissing)
541
542 def utime(self, path=None, t=None):
543 return os.utime(self.join(path), t)
544
545 def walk(self, path=None, onerror=None):
546 """Yield (dirpath, dirs, files) tuple for each directories under path
547
548 ``dirpath`` is relative one from the root of this vfs. This
549 uses ``os.sep`` as path separator, even you specify POSIX
550 style ``path``.
551
552 "The root of this vfs" is represented as empty ``dirpath``.
553 """
554 root = os.path.normpath(self.join(None))
555 # when dirpath == root, dirpath[prefixlen:] becomes empty
556 # because len(dirpath) < prefixlen.
557 prefixlen = len(pathutil.normasprefix(root))
558 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
559 yield (dirpath[prefixlen:], dirs, files)
560
561 @contextlib.contextmanager
562 def backgroundclosing(self, ui, expectedcount=-1):
563 """Allow files to be closed asynchronously.
564
565 When this context manager is active, ``backgroundclose`` can be passed
566 to ``__call__``/``open`` to result in the file possibly being closed
567 asynchronously, on a background thread.
568 """
569 # This is an arbitrary restriction and could be changed if we ever
570 # have a use case.
571 vfs = getattr(self, 'vfs', self)
572 if getattr(vfs, '_backgroundfilecloser', None):
573 raise error.Abort(
574 _('can only have 1 active background file closer'))
575
576 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
577 try:
578 vfs._backgroundfilecloser = bfc
579 yield bfc
580 finally:
581 vfs._backgroundfilecloser = None
582
583 class vfs(abstractvfs):
584 '''Operate files relative to a base directory
585
586 This class is used to hide the details of COW semantics and
587 remote file access from higher level code.
588 '''
589 def __init__(self, base, audit=True, expandpath=False, realpath=False):
590 if expandpath:
591 base = util.expandpath(base)
592 if realpath:
593 base = os.path.realpath(base)
594 self.base = base
595 self.mustaudit = audit
596 self.createmode = None
597 self._trustnlink = None
598
599 @property
600 def mustaudit(self):
601 return self._audit
602
603 @mustaudit.setter
604 def mustaudit(self, onoff):
605 self._audit = onoff
606 if onoff:
607 self.audit = pathutil.pathauditor(self.base)
608 else:
609 self.audit = util.always
610
611 @util.propertycache
612 def _cansymlink(self):
613 return util.checklink(self.base)
614
615 @util.propertycache
616 def _chmod(self):
617 return util.checkexec(self.base)
618
619 def _fixfilemode(self, name):
620 if self.createmode is None or not self._chmod:
621 return
622 os.chmod(name, self.createmode & 0o666)
623
624 def __call__(self, path, mode="r", text=False, atomictemp=False,
625 notindexed=False, backgroundclose=False, checkambig=False):
626 '''Open ``path`` file, which is relative to vfs root.
627
628 Newly created directories are marked as "not to be indexed by
629 the content indexing service", if ``notindexed`` is specified
630 for "write" mode access.
631
632 If ``backgroundclose`` is passed, the file may be closed asynchronously.
633 It can only be used if the ``self.backgroundclosing()`` context manager
634 is active. This should only be specified if the following criteria hold:
635
636 1. There is a potential for writing thousands of files. Unless you
637 are writing thousands of files, the performance benefits of
638 asynchronously closing files is not realized.
639 2. Files are opened exactly once for the ``backgroundclosing``
640 active duration and are therefore free of race conditions between
641 closing a file on a background thread and reopening it. (If the
642 file were opened multiple times, there could be unflushed data
643 because the original file handle hasn't been flushed/closed yet.)
644
645 ``checkambig`` argument is passed to atomictemplfile (valid
646 only for writing), and is useful only if target file is
647 guarded by any lock (e.g. repo.lock or repo.wlock).
648 '''
649 if self._audit:
650 r = util.checkosfilename(path)
651 if r:
652 raise error.Abort("%s: %r" % (r, path))
653 self.audit(path)
654 f = self.join(path)
655
656 if not text and "b" not in mode:
657 mode += "b" # for that other OS
658
659 nlink = -1
660 if mode not in ('r', 'rb'):
661 dirname, basename = util.split(f)
662 # If basename is empty, then the path is malformed because it points
663 # to a directory. Let the posixfile() call below raise IOError.
664 if basename:
665 if atomictemp:
666 util.makedirs(dirname, self.createmode, notindexed)
667 return util.atomictempfile(f, mode, self.createmode,
668 checkambig=checkambig)
669 try:
670 if 'w' in mode:
671 util.unlink(f)
672 nlink = 0
673 else:
674 # nlinks() may behave differently for files on Windows
675 # shares if the file is open.
676 with util.posixfile(f):
677 nlink = util.nlinks(f)
678 if nlink < 1:
679 nlink = 2 # force mktempcopy (issue1922)
680 except (OSError, IOError) as e:
681 if e.errno != errno.ENOENT:
682 raise
683 nlink = 0
684 util.makedirs(dirname, self.createmode, notindexed)
685 if nlink > 0:
686 if self._trustnlink is None:
687 self._trustnlink = nlink > 1 or util.checknlink(f)
688 if nlink > 1 or not self._trustnlink:
689 util.rename(util.mktempcopy(f), f)
690 fp = util.posixfile(f, mode)
691 if nlink == 0:
692 self._fixfilemode(f)
693
694 if checkambig:
695 if mode in ('r', 'rb'):
696 raise error.Abort(_('implementation error: mode %s is not'
697 ' valid for checkambig=True') % mode)
698 fp = checkambigatclosing(fp)
699
700 if backgroundclose:
701 if not self._backgroundfilecloser:
702 raise error.Abort(_('backgroundclose can only be used when a '
703 'backgroundclosing context manager is active')
704 )
705
706 fp = delayclosedfile(fp, self._backgroundfilecloser)
707
708 return fp
709
710 def symlink(self, src, dst):
711 self.audit(dst)
712 linkname = self.join(dst)
713 try:
714 os.unlink(linkname)
715 except OSError:
716 pass
717
718 util.makedirs(os.path.dirname(linkname), self.createmode)
719
720 if self._cansymlink:
721 try:
722 os.symlink(src, linkname)
723 except OSError as err:
724 raise OSError(err.errno, _('could not symlink to %r: %s') %
725 (src, err.strerror), linkname)
726 else:
727 self.write(dst, src)
728
729 def join(self, path, *insidef):
730 if path:
731 return os.path.join(self.base, path, *insidef)
732 else:
733 return self.base
734
735 opener = vfs
736
737 class auditvfs(object):
738 def __init__(self, vfs):
739 self.vfs = vfs
740
741 @property
742 def mustaudit(self):
743 return self.vfs.mustaudit
744
745 @mustaudit.setter
746 def mustaudit(self, onoff):
747 self.vfs.mustaudit = onoff
748
749 @property
750 def options(self):
751 return self.vfs.options
752
753 @options.setter
754 def options(self, value):
755 self.vfs.options = value
756
757 class filtervfs(abstractvfs, auditvfs):
758 '''Wrapper vfs for filtering filenames with a function.'''
759
760 def __init__(self, vfs, filter):
761 auditvfs.__init__(self, vfs)
762 self._filter = filter
763
764 def __call__(self, path, *args, **kwargs):
765 return self.vfs(self._filter(path), *args, **kwargs)
766
767 def join(self, path, *insidef):
768 if path:
769 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
770 else:
771 return self.vfs.join(path)
772
773 filteropener = filtervfs
774
775 class readonlyvfs(abstractvfs, auditvfs):
776 '''Wrapper vfs preventing any writing.'''
777
778 def __init__(self, vfs):
779 auditvfs.__init__(self, vfs)
780
781 def __call__(self, path, mode='r', *args, **kw):
782 if mode not in ('r', 'rb'):
783 raise error.Abort(_('this vfs is read only'))
784 return self.vfs(path, mode, *args, **kw)
785
786 def join(self, path, *insidef):
787 return self.vfs.join(path, *insidef)
788 345
789 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False): 346 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False):
790 '''yield every hg repository under path, always recursively. 347 '''yield every hg repository under path, always recursively.
791 The recurse flag will only control recursion into repo working dirs''' 348 The recurse flag will only control recursion into repo working dirs'''
792 def errhandler(err): 349 def errhandler(err):
1406 def gddeltaconfig(ui): 963 def gddeltaconfig(ui):
1407 """helper function to know if incoming delta should be optimised 964 """helper function to know if incoming delta should be optimised
1408 """ 965 """
1409 # experimental config: format.generaldelta 966 # experimental config: format.generaldelta
1410 return ui.configbool('format', 'generaldelta', False) 967 return ui.configbool('format', 'generaldelta', False)
1411
1412 class closewrapbase(object):
1413 """Base class of wrapper, which hooks closing
1414
1415 Do not instantiate outside of the vfs layer.
1416 """
1417 def __init__(self, fh):
1418 object.__setattr__(self, '_origfh', fh)
1419
1420 def __getattr__(self, attr):
1421 return getattr(self._origfh, attr)
1422
1423 def __setattr__(self, attr, value):
1424 return setattr(self._origfh, attr, value)
1425
1426 def __delattr__(self, attr):
1427 return delattr(self._origfh, attr)
1428
1429 def __enter__(self):
1430 return self._origfh.__enter__()
1431
1432 def __exit__(self, exc_type, exc_value, exc_tb):
1433 raise NotImplementedError('attempted instantiating ' + str(type(self)))
1434
1435 def close(self):
1436 raise NotImplementedError('attempted instantiating ' + str(type(self)))
1437
1438 class delayclosedfile(closewrapbase):
1439 """Proxy for a file object whose close is delayed.
1440
1441 Do not instantiate outside of the vfs layer.
1442 """
1443 def __init__(self, fh, closer):
1444 super(delayclosedfile, self).__init__(fh)
1445 object.__setattr__(self, '_closer', closer)
1446
1447 def __exit__(self, exc_type, exc_value, exc_tb):
1448 self._closer.close(self._origfh)
1449
1450 def close(self):
1451 self._closer.close(self._origfh)
1452
1453 class backgroundfilecloser(object):
1454 """Coordinates background closing of file handles on multiple threads."""
1455 def __init__(self, ui, expectedcount=-1):
1456 self._running = False
1457 self._entered = False
1458 self._threads = []
1459 self._threadexception = None
1460
1461 # Only Windows/NTFS has slow file closing. So only enable by default
1462 # on that platform. But allow to be enabled elsewhere for testing.
1463 defaultenabled = pycompat.osname == 'nt'
1464 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
1465
1466 if not enabled:
1467 return
1468
1469 # There is overhead to starting and stopping the background threads.
1470 # Don't do background processing unless the file count is large enough
1471 # to justify it.
1472 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount',
1473 2048)
1474 # FUTURE dynamically start background threads after minfilecount closes.
1475 # (We don't currently have any callers that don't know their file count)
1476 if expectedcount > 0 and expectedcount < minfilecount:
1477 return
1478
1479 # Windows defaults to a limit of 512 open files. A buffer of 128
1480 # should give us enough headway.
1481 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue', 384)
1482 threadcount = ui.configint('worker', 'backgroundclosethreadcount', 4)
1483
1484 ui.debug('starting %d threads for background file closing\n' %
1485 threadcount)
1486
1487 self._queue = util.queue(maxsize=maxqueue)
1488 self._running = True
1489
1490 for i in range(threadcount):
1491 t = threading.Thread(target=self._worker, name='backgroundcloser')
1492 self._threads.append(t)
1493 t.start()
1494
1495 def __enter__(self):
1496 self._entered = True
1497 return self
1498
1499 def __exit__(self, exc_type, exc_value, exc_tb):
1500 self._running = False
1501
1502 # Wait for threads to finish closing so open files don't linger for
1503 # longer than lifetime of context manager.
1504 for t in self._threads:
1505 t.join()
1506
1507 def _worker(self):
1508 """Main routine for worker thread."""
1509 while True:
1510 try:
1511 fh = self._queue.get(block=True, timeout=0.100)
1512 # Need to catch or the thread will terminate and
1513 # we could orphan file descriptors.
1514 try:
1515 fh.close()
1516 except Exception as e:
1517 # Stash so can re-raise from main thread later.
1518 self._threadexception = e
1519 except util.empty:
1520 if not self._running:
1521 break
1522
1523 def close(self, fh):
1524 """Schedule a file for closing."""
1525 if not self._entered:
1526 raise error.Abort(_('can only call close() when context manager '
1527 'active'))
1528
1529 # If a background thread encountered an exception, raise now so we fail
1530 # fast. Otherwise we may potentially go on for minutes until the error
1531 # is acted on.
1532 if self._threadexception:
1533 e = self._threadexception
1534 self._threadexception = None
1535 raise e
1536
1537 # If we're not actively running, close synchronously.
1538 if not self._running:
1539 fh.close()
1540 return
1541
1542 self._queue.put(fh, block=True, timeout=None)
1543
1544 class checkambigatclosing(closewrapbase):
1545 """Proxy for a file object, to avoid ambiguity of file stat
1546
1547 See also util.filestat for detail about "ambiguity of file stat".
1548
1549 This proxy is useful only if the target file is guarded by any
1550 lock (e.g. repo.lock or repo.wlock)
1551
1552 Do not instantiate outside of the vfs layer.
1553 """
1554 def __init__(self, fh):
1555 super(checkambigatclosing, self).__init__(fh)
1556 object.__setattr__(self, '_oldstat', util.filestat(fh.name))
1557
1558 def _checkambig(self):
1559 oldstat = self._oldstat
1560 if oldstat.stat:
1561 newstat = util.filestat(self._origfh.name)
1562 if newstat.isambig(oldstat):
1563 # stat of changed file is ambiguous to original one
1564 newstat.avoidambig(self._origfh.name, oldstat)
1565
1566 def __exit__(self, exc_type, exc_value, exc_tb):
1567 self._origfh.__exit__(exc_type, exc_value, exc_tb)
1568 self._checkambig()
1569
1570 def close(self):
1571 self._origfh.close()
1572 self._checkambig()