mercurial/dirstate.py
author Matt Mackall <mpm@selenic.com>
Fri, 13 Apr 2012 22:55:40 -0500
changeset 16421 859ef739339e
parent 16323 0789d1bbf6c1
child 16472 14a4e17f0817
permissions -rw-r--r--
changegroupsubset: drop repeated len() calls

# dirstate.py - working directory tracking for mercurial
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import errno

from node import nullid
from i18n import _
import scmutil, util, ignore, osutil, parsers, encoding
import struct, os, stat, errno
import cStringIO

_format = ">cllll"
propertycache = util.propertycache
filecache = scmutil.filecache

class repocache(filecache):
    """filecache for files in .hg/"""
    def join(self, obj, fname):
        return obj._opener.join(fname)

class rootcache(filecache):
    """filecache for files in the repository root"""
    def join(self, obj, fname):
        return obj._join(fname)

def _finddirs(path):
    pos = path.rfind('/')
    while pos != -1:
        yield path[:pos]
        pos = path.rfind('/', 0, pos)

def _incdirs(dirs, path):
    for base in _finddirs(path):
        if base in dirs:
            dirs[base] += 1
            return
        dirs[base] = 1

def _decdirs(dirs, path):
    for base in _finddirs(path):
        if dirs[base] > 1:
            dirs[base] -= 1
            return
        del dirs[base]

class dirstate(object):

    def __init__(self, opener, ui, root, validate):
        '''Create a new dirstate object.

        opener is an open()-like callable that can be used to open the
        dirstate file; root is the root of the directory tracked by
        the dirstate.
        '''
        self._opener = opener
        self._validate = validate
        self._root = root
        self._rootdir = os.path.join(root, '')
        self._dirty = False
        self._dirtypl = False
        self._lastnormaltime = 0
        self._ui = ui
        self._filecache = {}

    @propertycache
    def _map(self):
        '''Return the dirstate contents as a map from filename to
        (state, mode, size, time).'''
        self._read()
        return self._map

    @propertycache
    def _copymap(self):
        self._read()
        return self._copymap

    @propertycache
    def _foldmap(self):
        f = {}
        for name in self._map:
            f[util.normcase(name)] = name
        for name in self._dirs:
            f[util.normcase(name)] = name
        f['.'] = '.' # prevents useless util.fspath() invocation
        return f

    @repocache('branch')
    def _branch(self):
        try:
            return self._opener.read("branch").strip() or "default"
        except IOError, inst:
            if inst.errno != errno.ENOENT:
                raise
            return "default"

    @propertycache
    def _pl(self):
        try:
            fp = self._opener("dirstate")
            st = fp.read(40)
            fp.close()
            l = len(st)
            if l == 40:
                return st[:20], st[20:40]
            elif l > 0 and l < 40:
                raise util.Abort(_('working directory state appears damaged!'))
        except IOError, err:
            if err.errno != errno.ENOENT:
                raise
        return [nullid, nullid]

    @propertycache
    def _dirs(self):
        dirs = {}
        for f, s in self._map.iteritems():
            if s[0] != 'r':
                _incdirs(dirs, f)
        return dirs

    def dirs(self):
        return self._dirs

    @rootcache('.hgignore')
    def _ignore(self):
        files = [self._join('.hgignore')]
        for name, path in self._ui.configitems("ui"):
            if name == 'ignore' or name.startswith('ignore.'):
                files.append(util.expandpath(path))
        return ignore.ignore(self._root, files, self._ui.warn)

    @propertycache
    def _slash(self):
        return self._ui.configbool('ui', 'slash') and os.sep != '/'

    @propertycache
    def _checklink(self):
        return util.checklink(self._root)

    @propertycache
    def _checkexec(self):
        return util.checkexec(self._root)

    @propertycache
    def _checkcase(self):
        return not util.checkcase(self._join('.hg'))

    def _join(self, f):
        # much faster than os.path.join()
        # it's safe because f is always a relative path
        return self._rootdir + f

    def flagfunc(self, buildfallback):
        if self._checklink and self._checkexec:
            def f(x):
                p = self._join(x)
                if os.path.islink(p):
                    return 'l'
                if util.isexec(p):
                    return 'x'
                return ''
            return f

        fallback = buildfallback()
        if self._checklink:
            def f(x):
                if os.path.islink(self._join(x)):
                    return 'l'
                if 'x' in fallback(x):
                    return 'x'
                return ''
            return f
        if self._checkexec:
            def f(x):
                if 'l' in fallback(x):
                    return 'l'
                if util.isexec(self._join(x)):
                    return 'x'
                return ''
            return f
        else:
            return fallback

    def getcwd(self):
        cwd = os.getcwd()
        if cwd == self._root:
            return ''
        # self._root ends with a path separator if self._root is '/' or 'C:\'
        rootsep = self._root
        if not util.endswithsep(rootsep):
            rootsep += os.sep
        if cwd.startswith(rootsep):
            return cwd[len(rootsep):]
        else:
            # we're outside the repo. return an absolute path.
            return cwd

    def pathto(self, f, cwd=None):
        if cwd is None:
            cwd = self.getcwd()
        path = util.pathto(self._root, cwd, f)
        if self._slash:
            return util.normpath(path)
        return path

    def __getitem__(self, key):
        '''Return the current state of key (a filename) in the dirstate.

        States are:
          n  normal
          m  needs merging
          r  marked for removal
          a  marked for addition
          ?  not tracked
        '''
        return self._map.get(key, ("?",))[0]

    def __contains__(self, key):
        return key in self._map

    def __iter__(self):
        for x in sorted(self._map):
            yield x

    def parents(self):
        return [self._validate(p) for p in self._pl]

    def p1(self):
        return self._validate(self._pl[0])

    def p2(self):
        return self._validate(self._pl[1])

    def branch(self):
        return encoding.tolocal(self._branch)

    def setparents(self, p1, p2=nullid):
        self._dirty = self._dirtypl = True
        self._pl = p1, p2

    def setbranch(self, branch):
        if branch in ['tip', '.', 'null']:
            raise util.Abort(_('the name \'%s\' is reserved') % branch)
        self._branch = encoding.fromlocal(branch)
        self._opener.write("branch", self._branch + '\n')

    def _read(self):
        self._map = {}
        self._copymap = {}
        try:
            st = self._opener.read("dirstate")
        except IOError, err:
            if err.errno != errno.ENOENT:
                raise
            return
        if not st:
            return

        p = parsers.parse_dirstate(self._map, self._copymap, st)
        if not self._dirtypl:
            self._pl = p

    def invalidate(self):
        for a in ("_map", "_copymap", "_foldmap", "_branch", "_pl", "_dirs",
                "_ignore"):
            if a in self.__dict__:
                delattr(self, a)
        self._lastnormaltime = 0
        self._dirty = False

    def copy(self, source, dest):
        """Mark dest as a copy of source. Unmark dest if source is None."""
        if source == dest:
            return
        self._dirty = True
        if source is not None:
            self._copymap[dest] = source
        elif dest in self._copymap:
            del self._copymap[dest]

    def copied(self, file):
        return self._copymap.get(file, None)

    def copies(self):
        return self._copymap

    def _droppath(self, f):
        if self[f] not in "?r" and "_dirs" in self.__dict__:
            _decdirs(self._dirs, f)

    def _addpath(self, f, check=False):
        oldstate = self[f]
        if check or oldstate == "r":
            scmutil.checkfilename(f)
            if f in self._dirs:
                raise util.Abort(_('directory %r already in dirstate') % f)
            # shadows
            for d in _finddirs(f):
                if d in self._dirs:
                    break
                if d in self._map and self[d] != 'r':
                    raise util.Abort(
                        _('file %r in dirstate clashes with %r') % (d, f))
        if oldstate in "?r" and "_dirs" in self.__dict__:
            _incdirs(self._dirs, f)

    def normal(self, f):
        '''Mark a file normal and clean.'''
        self._dirty = True
        self._addpath(f)
        s = os.lstat(self._join(f))
        mtime = int(s.st_mtime)
        self._map[f] = ('n', s.st_mode, s.st_size, mtime)
        if f in self._copymap:
            del self._copymap[f]
        if mtime > self._lastnormaltime:
            # Remember the most recent modification timeslot for status(),
            # to make sure we won't miss future size-preserving file content
            # modifications that happen within the same timeslot.
            self._lastnormaltime = mtime

    def normallookup(self, f):
        '''Mark a file normal, but possibly dirty.'''
        if self._pl[1] != nullid and f in self._map:
            # if there is a merge going on and the file was either
            # in state 'm' (-1) or coming from other parent (-2) before
            # being removed, restore that state.
            entry = self._map[f]
            if entry[0] == 'r' and entry[2] in (-1, -2):
                source = self._copymap.get(f)
                if entry[2] == -1:
                    self.merge(f)
                elif entry[2] == -2:
                    self.otherparent(f)
                if source:
                    self.copy(source, f)
                return
            if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
                return
        self._dirty = True
        self._addpath(f)
        self._map[f] = ('n', 0, -1, -1)
        if f in self._copymap:
            del self._copymap[f]

    def otherparent(self, f):
        '''Mark as coming from the other parent, always dirty.'''
        if self._pl[1] == nullid:
            raise util.Abort(_("setting %r to other parent "
                               "only allowed in merges") % f)
        self._dirty = True
        self._addpath(f)
        self._map[f] = ('n', 0, -2, -1)
        if f in self._copymap:
            del self._copymap[f]

    def add(self, f):
        '''Mark a file added.'''
        self._dirty = True
        self._addpath(f, True)
        self._map[f] = ('a', 0, -1, -1)
        if f in self._copymap:
            del self._copymap[f]

    def remove(self, f):
        '''Mark a file removed.'''
        self._dirty = True
        self._droppath(f)
        size = 0
        if self._pl[1] != nullid and f in self._map:
            # backup the previous state
            entry = self._map[f]
            if entry[0] == 'm': # merge
                size = -1
            elif entry[0] == 'n' and entry[2] == -2: # other parent
                size = -2
        self._map[f] = ('r', 0, size, 0)
        if size == 0 and f in self._copymap:
            del self._copymap[f]

    def merge(self, f):
        '''Mark a file merged.'''
        self._dirty = True
        s = os.lstat(self._join(f))
        self._addpath(f)
        self._map[f] = ('m', s.st_mode, s.st_size, int(s.st_mtime))
        if f in self._copymap:
            del self._copymap[f]

    def drop(self, f):
        '''Drop a file from the dirstate'''
        if f in self._map:
            self._dirty = True
            self._droppath(f)
            del self._map[f]

    def _normalize(self, path, isknown):
        normed = util.normcase(path)
        folded = self._foldmap.get(normed, None)
        if folded is None:
            if isknown or not os.path.lexists(os.path.join(self._root, path)):
                folded = path
            else:
                # recursively normalize leading directory components
                # against dirstate
                if '/' in normed:
                    d, f = normed.rsplit('/', 1)
                    d = self._normalize(d, isknown)
                    r = self._root + "/" + d
                    folded = d + "/" + util.fspath(f, r)
                else:
                    folded = util.fspath(normed, self._root)
                self._foldmap[normed] = folded

        return folded

    def normalize(self, path, isknown=False):
        '''
        normalize the case of a pathname when on a casefolding filesystem

        isknown specifies whether the filename came from walking the
        disk, to avoid extra filesystem access

        The normalized case is determined based on the following precedence:

        - version of name already stored in the dirstate
        - version of name stored on disk
        - version provided via command arguments
        '''

        if self._checkcase:
            return self._normalize(path, isknown)
        return path

    def clear(self):
        self._map = {}
        if "_dirs" in self.__dict__:
            delattr(self, "_dirs")
        self._copymap = {}
        self._pl = [nullid, nullid]
        self._lastnormaltime = 0
        self._dirty = True

    def rebuild(self, parent, files):
        self.clear()
        for f in files:
            if 'x' in files.flags(f):
                self._map[f] = ('n', 0777, -1, 0)
            else:
                self._map[f] = ('n', 0666, -1, 0)
        self._pl = (parent, nullid)
        self._dirty = True

    def write(self):
        if not self._dirty:
            return
        st = self._opener("dirstate", "w", atomictemp=True)

        # use the modification time of the newly created temporary file as the
        # filesystem's notion of 'now'
        now = int(util.fstat(st).st_mtime)

        cs = cStringIO.StringIO()
        copymap = self._copymap
        pack = struct.pack
        write = cs.write
        write("".join(self._pl))
        for f, e in self._map.iteritems():
            if e[0] == 'n' and e[3] == now:
                # The file was last modified "simultaneously" with the current
                # write to dirstate (i.e. within the same second for file-
                # systems with a granularity of 1 sec). This commonly happens
                # for at least a couple of files on 'update'.
                # The user could change the file without changing its size
                # within the same second. Invalidate the file's stat data in
                # dirstate, forcing future 'status' calls to compare the
                # contents of the file. This prevents mistakenly treating such
                # files as clean.
                e = (e[0], 0, -1, -1)   # mark entry as 'unset'
                self._map[f] = e

            if f in copymap:
                f = "%s\0%s" % (f, copymap[f])
            e = pack(_format, e[0], e[1], e[2], e[3], len(f))
            write(e)
            write(f)
        st.write(cs.getvalue())
        st.close()
        self._lastnormaltime = 0
        self._dirty = self._dirtypl = False

    def _dirignore(self, f):
        if f == '.':
            return False
        if self._ignore(f):
            return True
        for p in _finddirs(f):
            if self._ignore(p):
                return True
        return False

    def walk(self, match, subrepos, unknown, ignored):
        '''
        Walk recursively through the directory tree, finding all files
        matched by match.

        Return a dict mapping filename to stat-like object (either
        mercurial.osutil.stat instance or return value of os.stat()).
        '''

        def fwarn(f, msg):
            self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
            return False

        def badtype(mode):
            kind = _('unknown')
            if stat.S_ISCHR(mode):
                kind = _('character device')
            elif stat.S_ISBLK(mode):
                kind = _('block device')
            elif stat.S_ISFIFO(mode):
                kind = _('fifo')
            elif stat.S_ISSOCK(mode):
                kind = _('socket')
            elif stat.S_ISDIR(mode):
                kind = _('directory')
            return _('unsupported file type (type is %s)') % kind

        ignore = self._ignore
        dirignore = self._dirignore
        if ignored:
            ignore = util.never
            dirignore = util.never
        elif not unknown:
            # if unknown and ignored are False, skip step 2
            ignore = util.always
            dirignore = util.always

        matchfn = match.matchfn
        badfn = match.bad
        dmap = self._map
        normpath = util.normpath
        listdir = osutil.listdir
        lstat = os.lstat
        getkind = stat.S_IFMT
        dirkind = stat.S_IFDIR
        regkind = stat.S_IFREG
        lnkkind = stat.S_IFLNK
        join = self._join
        work = []
        wadd = work.append

        exact = skipstep3 = False
        if matchfn == match.exact: # match.exact
            exact = True
            dirignore = util.always # skip step 2
        elif match.files() and not match.anypats(): # match.match, no patterns
            skipstep3 = True

        if not exact and self._checkcase:
            normalize = self._normalize
            skipstep3 = False
        else:
            normalize = lambda x, y: x

        files = sorted(match.files())
        subrepos.sort()
        i, j = 0, 0
        while i < len(files) and j < len(subrepos):
            subpath = subrepos[j] + "/"
            if files[i] < subpath:
                i += 1
                continue
            while i < len(files) and files[i].startswith(subpath):
                del files[i]
            j += 1

        if not files or '.' in files:
            files = ['']
        results = dict.fromkeys(subrepos)
        results['.hg'] = None

        # step 1: find all explicit files
        for ff in files:
            nf = normalize(normpath(ff), False)
            if nf in results:
                continue

            try:
                st = lstat(join(nf))
                kind = getkind(st.st_mode)
                if kind == dirkind:
                    skipstep3 = False
                    if nf in dmap:
                        #file deleted on disk but still in dirstate
                        results[nf] = None
                    match.dir(nf)
                    if not dirignore(nf):
                        wadd(nf)
                elif kind == regkind or kind == lnkkind:
                    results[nf] = st
                else:
                    badfn(ff, badtype(kind))
                    if nf in dmap:
                        results[nf] = None
            except OSError, inst:
                if nf in dmap: # does it exactly match a file?
                    results[nf] = None
                else: # does it match a directory?
                    prefix = nf + "/"
                    for fn in dmap:
                        if fn.startswith(prefix):
                            match.dir(nf)
                            skipstep3 = False
                            break
                    else:
                        badfn(ff, inst.strerror)

        # step 2: visit subdirectories
        while work:
            nd = work.pop()
            skip = None
            if nd == '.':
                nd = ''
            else:
                skip = '.hg'
            try:
                entries = listdir(join(nd), stat=True, skip=skip)
            except OSError, inst:
                if inst.errno == errno.EACCES:
                    fwarn(nd, inst.strerror)
                    continue
                raise
            for f, kind, st in entries:
                nf = normalize(nd and (nd + "/" + f) or f, True)
                if nf not in results:
                    if kind == dirkind:
                        if not ignore(nf):
                            match.dir(nf)
                            wadd(nf)
                        if nf in dmap and matchfn(nf):
                            results[nf] = None
                    elif kind == regkind or kind == lnkkind:
                        if nf in dmap:
                            if matchfn(nf):
                                results[nf] = st
                        elif matchfn(nf) and not ignore(nf):
                            results[nf] = st
                    elif nf in dmap and matchfn(nf):
                        results[nf] = None

        # step 3: report unseen items in the dmap hash
        if not skipstep3 and not exact:
            visit = sorted([f for f in dmap if f not in results and matchfn(f)])
            for nf, st in zip(visit, util.statfiles([join(i) for i in visit])):
                if not st is None and not getkind(st.st_mode) in (regkind, lnkkind):
                    st = None
                results[nf] = st
        for s in subrepos:
            del results[s]
        del results['.hg']
        return results

    def status(self, match, subrepos, ignored, clean, unknown):
        '''Determine the status of the working copy relative to the
        dirstate and return a tuple of lists (unsure, modified, added,
        removed, deleted, unknown, ignored, clean), where:

          unsure:
            files that might have been modified since the dirstate was
            written, but need to be read to be sure (size is the same
            but mtime differs)
          modified:
            files that have definitely been modified since the dirstate
            was written (different size or mode)
          added:
            files that have been explicitly added with hg add
          removed:
            files that have been explicitly removed with hg remove
          deleted:
            files that have been deleted through other means ("missing")
          unknown:
            files not in the dirstate that are not ignored
          ignored:
            files not in the dirstate that are ignored
            (by _dirignore())
          clean:
            files that have definitely not been modified since the
            dirstate was written
        '''
        listignored, listclean, listunknown = ignored, clean, unknown
        lookup, modified, added, unknown, ignored = [], [], [], [], []
        removed, deleted, clean = [], [], []

        dmap = self._map
        ladd = lookup.append            # aka "unsure"
        madd = modified.append
        aadd = added.append
        uadd = unknown.append
        iadd = ignored.append
        radd = removed.append
        dadd = deleted.append
        cadd = clean.append

        lnkkind = stat.S_IFLNK

        for fn, st in self.walk(match, subrepos, listunknown,
                                listignored).iteritems():
            if fn not in dmap:
                if (listignored or match.exact(fn)) and self._dirignore(fn):
                    if listignored:
                        iadd(fn)
                elif listunknown:
                    uadd(fn)
                continue

            state, mode, size, time = dmap[fn]

            if not st and state in "nma":
                dadd(fn)
            elif state == 'n':
                # The "mode & lnkkind != lnkkind or self._checklink"
                # lines are an expansion of "islink => checklink"
                # where islink means "is this a link?" and checklink
                # means "can we check links?".
                mtime = int(st.st_mtime)
                if (size >= 0 and
                    (size != st.st_size
                     or ((mode ^ st.st_mode) & 0100 and self._checkexec))
                    and (mode & lnkkind != lnkkind or self._checklink)
                    or size == -2 # other parent
                    or fn in self._copymap):
                    madd(fn)
                elif (mtime != time
                      and (mode & lnkkind != lnkkind or self._checklink)):
                    ladd(fn)
                elif mtime == self._lastnormaltime:
                    # fn may have been changed in the same timeslot without
                    # changing its size. This can happen if we quickly do
                    # multiple commits in a single transaction.
                    # Force lookup, so we don't miss such a racy file change.
                    ladd(fn)
                elif listclean:
                    cadd(fn)
            elif state == 'm':
                madd(fn)
            elif state == 'a':
                aadd(fn)
            elif state == 'r':
                radd(fn)

        return (lookup, modified, added, removed, deleted, unknown, ignored,
                clean)