hgext/remotefilelog/shallowutil.py
changeset 40495 3a333a582d7b
child 40514 6f0b6905ef6f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/remotefilelog/shallowutil.py	Thu Sep 27 13:03:19 2018 -0400
@@ -0,0 +1,487 @@
+# shallowutil.py -- remotefilelog utilities
+#
+# Copyright 2014 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import
+
+import collections
+import errno
+import hashlib
+import os
+import stat
+import struct
+import tempfile
+
+from mercurial.i18n import _
+from mercurial import (
+    error,
+    pycompat,
+    revlog,
+    util,
+)
+from mercurial.utils import (
+    storageutil,
+    stringutil,
+)
+from . import constants
+
+if not pycompat.iswindows:
+    import grp
+
+def getcachekey(reponame, file, id):
+    pathhash = hashlib.sha1(file).hexdigest()
+    return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
+
+def getlocalkey(file, id):
+    pathhash = hashlib.sha1(file).hexdigest()
+    return os.path.join(pathhash, id)
+
+def getcachepath(ui, allowempty=False):
+    cachepath = ui.config("remotefilelog", "cachepath")
+    if not cachepath:
+        if allowempty:
+            return None
+        else:
+            raise error.Abort(_("could not find config option "
+                                "remotefilelog.cachepath"))
+    return util.expandpath(cachepath)
+
+def getcachepackpath(repo, category):
+    cachepath = getcachepath(repo.ui)
+    if category != constants.FILEPACK_CATEGORY:
+        return os.path.join(cachepath, repo.name, 'packs', category)
+    else:
+        return os.path.join(cachepath, repo.name, 'packs')
+
+def getlocalpackpath(base, category):
+    return os.path.join(base, 'packs', category)
+
+def createrevlogtext(text, copyfrom=None, copyrev=None):
+    """returns a string that matches the revlog contents in a
+    traditional revlog
+    """
+    meta = {}
+    if copyfrom or text.startswith('\1\n'):
+        if copyfrom:
+            meta['copy'] = copyfrom
+            meta['copyrev'] = copyrev
+        text = storageutil.packmeta(meta, text)
+
+    return text
+
+def parsemeta(text):
+    """parse mercurial filelog metadata"""
+    meta, size = storageutil.parsemeta(text)
+    if text.startswith('\1\n'):
+        s = text.index('\1\n', 2)
+        text = text[s + 2:]
+    return meta or {}, text
+
+def sumdicts(*dicts):
+    """Adds all the values of *dicts together into one dictionary. This assumes
+    the values in *dicts are all summable.
+
+    e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
+    """
+    result = collections.defaultdict(lambda: 0)
+    for dict in dicts:
+        for k, v in dict.iteritems():
+            result[k] += v
+    return result
+
+def prefixkeys(dict, prefix):
+    """Returns ``dict`` with ``prefix`` prepended to all its keys."""
+    result = {}
+    for k, v in dict.iteritems():
+        result[prefix + k] = v
+    return result
+
+def reportpackmetrics(ui, prefix, *stores):
+    dicts = [s.getmetrics() for s in stores]
+    dict = prefixkeys(sumdicts(*dicts), prefix + '_')
+    ui.log(prefix + "_packsizes", "", **dict)
+
+def _parsepackmeta(metabuf):
+    """parse datapack meta, bytes (<metadata-list>) -> dict
+
+    The dict contains raw content - both keys and values are strings.
+    Upper-level business may want to convert some of them to other types like
+    integers, on their own.
+
+    raise ValueError if the data is corrupted
+    """
+    metadict = {}
+    offset = 0
+    buflen = len(metabuf)
+    while buflen - offset >= 3:
+        key = metabuf[offset]
+        offset += 1
+        metalen = struct.unpack_from('!H', metabuf, offset)[0]
+        offset += 2
+        if offset + metalen > buflen:
+            raise ValueError('corrupted metadata: incomplete buffer')
+        value = metabuf[offset:offset + metalen]
+        metadict[key] = value
+        offset += metalen
+    if offset != buflen:
+        raise ValueError('corrupted metadata: redundant data')
+    return metadict
+
+def _buildpackmeta(metadict):
+    """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
+
+    The dict contains raw content - both keys and values are strings.
+    Upper-level business may want to serialize some of other types (like
+    integers) to strings before calling this function.
+
+    raise ProgrammingError when metadata key is illegal, or ValueError if
+    length limit is exceeded
+    """
+    metabuf = ''
+    for k, v in sorted((metadict or {}).iteritems()):
+        if len(k) != 1:
+            raise error.ProgrammingError('packmeta: illegal key: %s' % k)
+        if len(v) > 0xfffe:
+            raise ValueError('metadata value is too long: 0x%x > 0xfffe'
+                             % len(v))
+        metabuf += k
+        metabuf += struct.pack('!H', len(v))
+        metabuf += v
+    # len(metabuf) is guaranteed representable in 4 bytes, because there are
+    # only 256 keys, and for each value, len(value) <= 0xfffe.
+    return metabuf
+
+_metaitemtypes = {
+    constants.METAKEYFLAG: (int, long),
+    constants.METAKEYSIZE: (int, long),
+}
+
+def buildpackmeta(metadict):
+    """like _buildpackmeta, but typechecks metadict and normalize it.
+
+    This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
+    and METAKEYFLAG will be dropped if its value is 0.
+    """
+    newmeta = {}
+    for k, v in (metadict or {}).iteritems():
+        expectedtype = _metaitemtypes.get(k, (bytes,))
+        if not isinstance(v, expectedtype):
+            raise error.ProgrammingError('packmeta: wrong type of key %s' % k)
+        # normalize int to binary buffer
+        if int in expectedtype:
+            # optimization: remove flag if it's 0 to save space
+            if k == constants.METAKEYFLAG and v == 0:
+                continue
+            v = int2bin(v)
+        newmeta[k] = v
+    return _buildpackmeta(newmeta)
+
+def parsepackmeta(metabuf):
+    """like _parsepackmeta, but convert fields to desired types automatically.
+
+    This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
+    integers.
+    """
+    metadict = _parsepackmeta(metabuf)
+    for k, v in metadict.iteritems():
+        if k in _metaitemtypes and int in _metaitemtypes[k]:
+            metadict[k] = bin2int(v)
+    return metadict
+
+def int2bin(n):
+    """convert a non-negative integer to raw binary buffer"""
+    buf = bytearray()
+    while n > 0:
+        buf.insert(0, n & 0xff)
+        n >>= 8
+    return bytes(buf)
+
+def bin2int(buf):
+    """the reverse of int2bin, convert a binary buffer to an integer"""
+    x = 0
+    for b in bytearray(buf):
+        x <<= 8
+        x |= b
+    return x
+
+def parsesizeflags(raw):
+    """given a remotefilelog blob, return (headersize, rawtextsize, flags)
+
+    see remotefilelogserver.createfileblob for the format.
+    raise RuntimeError if the content is illformed.
+    """
+    flags = revlog.REVIDX_DEFAULT_FLAGS
+    size = None
+    try:
+        index = raw.index('\0')
+        header = raw[:index]
+        if header.startswith('v'):
+            # v1 and above, header starts with 'v'
+            if header.startswith('v1\n'):
+                for s in header.split('\n'):
+                    if s.startswith(constants.METAKEYSIZE):
+                        size = int(s[len(constants.METAKEYSIZE):])
+                    elif s.startswith(constants.METAKEYFLAG):
+                        flags = int(s[len(constants.METAKEYFLAG):])
+            else:
+                raise RuntimeError('unsupported remotefilelog header: %s'
+                                   % header)
+        else:
+            # v0, str(int(size)) is the header
+            size = int(header)
+    except ValueError:
+        raise RuntimeError("unexpected remotefilelog header: illegal format")
+    if size is None:
+        raise RuntimeError("unexpected remotefilelog header: no size found")
+    return index + 1, size, flags
+
+def buildfileblobheader(size, flags, version=None):
+    """return the header of a remotefilelog blob.
+
+    see remotefilelogserver.createfileblob for the format.
+    approximately the reverse of parsesizeflags.
+
+    version could be 0 or 1, or None (auto decide).
+    """
+    # choose v0 if flags is empty, otherwise v1
+    if version is None:
+        version = int(bool(flags))
+    if version == 1:
+        header = ('v1\n%s%d\n%s%d'
+                  % (constants.METAKEYSIZE, size,
+                     constants.METAKEYFLAG, flags))
+    elif version == 0:
+        if flags:
+            raise error.ProgrammingError('fileblob v0 does not support flag')
+        header = '%d' % size
+    else:
+        raise error.ProgrammingError('unknown fileblob version %d' % version)
+    return header
+
+def ancestormap(raw):
+    offset, size, flags = parsesizeflags(raw)
+    start = offset + size
+
+    mapping = {}
+    while start < len(raw):
+        divider = raw.index('\0', start + 80)
+
+        currentnode = raw[start:(start + 20)]
+        p1 = raw[(start + 20):(start + 40)]
+        p2 = raw[(start + 40):(start + 60)]
+        linknode = raw[(start + 60):(start + 80)]
+        copyfrom = raw[(start + 80):divider]
+
+        mapping[currentnode] = (p1, p2, linknode, copyfrom)
+        start = divider + 1
+
+    return mapping
+
+def readfile(path):
+    f = open(path, 'rb')
+    try:
+        result = f.read()
+
+        # we should never have empty files
+        if not result:
+            os.remove(path)
+            raise IOError("empty file: %s" % path)
+
+        return result
+    finally:
+        f.close()
+
+def unlinkfile(filepath):
+    if pycompat.iswindows:
+        # On Windows, os.unlink cannnot delete readonly files
+        os.chmod(filepath, stat.S_IWUSR)
+    os.unlink(filepath)
+
+def renamefile(source, destination):
+    if pycompat.iswindows:
+        # On Windows, os.rename cannot rename readonly files
+        # and cannot overwrite destination if it exists
+        os.chmod(source, stat.S_IWUSR)
+        if os.path.isfile(destination):
+            os.chmod(destination, stat.S_IWUSR)
+            os.unlink(destination)
+
+    os.rename(source, destination)
+
+def writefile(path, content, readonly=False):
+    dirname, filename = os.path.split(path)
+    if not os.path.exists(dirname):
+        try:
+            os.makedirs(dirname)
+        except OSError as ex:
+            if ex.errno != errno.EEXIST:
+                raise
+
+    fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname)
+    os.close(fd)
+
+    try:
+        f = util.posixfile(temp, 'wb')
+        f.write(content)
+        f.close()
+
+        if readonly:
+            mode = 0o444
+        else:
+            # tempfiles are created with 0o600, so we need to manually set the
+            # mode.
+            oldumask = os.umask(0)
+            # there's no way to get the umask without modifying it, so set it
+            # back
+            os.umask(oldumask)
+            mode = ~oldumask
+
+        renamefile(temp, path)
+        os.chmod(path, mode)
+    except Exception:
+        try:
+            unlinkfile(temp)
+        except OSError:
+            pass
+        raise
+
+def sortnodes(nodes, parentfunc):
+    """Topologically sorts the nodes, using the parentfunc to find
+    the parents of nodes."""
+    nodes = set(nodes)
+    childmap = {}
+    parentmap = {}
+    roots = []
+
+    # Build a child and parent map
+    for n in nodes:
+        parents = [p for p in parentfunc(n) if p in nodes]
+        parentmap[n] = set(parents)
+        for p in parents:
+            childmap.setdefault(p, set()).add(n)
+        if not parents:
+            roots.append(n)
+
+    roots.sort()
+    # Process roots, adding children to the queue as they become roots
+    results = []
+    while roots:
+        n = roots.pop(0)
+        results.append(n)
+        if n in childmap:
+            children = childmap[n]
+            for c in children:
+                childparents = parentmap[c]
+                childparents.remove(n)
+                if len(childparents) == 0:
+                    # insert at the beginning, that way child nodes
+                    # are likely to be output immediately after their
+                    # parents.  This gives better compression results.
+                    roots.insert(0, c)
+
+    return results
+
+def readexactly(stream, n):
+    '''read n bytes from stream.read and abort if less was available'''
+    s = stream.read(n)
+    if len(s) < n:
+        raise error.Abort(_("stream ended unexpectedly"
+                           " (got %d bytes, expected %d)")
+                          % (len(s), n))
+    return s
+
+def readunpack(stream, fmt):
+    data = readexactly(stream, struct.calcsize(fmt))
+    return struct.unpack(fmt, data)
+
+def readpath(stream):
+    rawlen = readexactly(stream, constants.FILENAMESIZE)
+    pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
+    return readexactly(stream, pathlen)
+
+def readnodelist(stream):
+    rawlen = readexactly(stream, constants.NODECOUNTSIZE)
+    nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
+    for i in pycompat.xrange(nodecount):
+        yield readexactly(stream, constants.NODESIZE)
+
+def readpathlist(stream):
+    rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
+    pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
+    for i in pycompat.xrange(pathcount):
+        yield readpath(stream)
+
+def getgid(groupname):
+    try:
+        gid = grp.getgrnam(groupname).gr_gid
+        return gid
+    except KeyError:
+        return None
+
+def setstickygroupdir(path, gid, warn=None):
+    if gid is None:
+        return
+    try:
+        os.chown(path, -1, gid)
+        os.chmod(path, 0o2775)
+    except (IOError, OSError) as ex:
+        if warn:
+            warn(_('unable to chown/chmod on %s: %s\n') % (path, ex))
+
+def mkstickygroupdir(ui, path):
+    """Creates the given directory (if it doesn't exist) and give it a
+    particular group with setgid enabled."""
+    gid = None
+    groupname = ui.config("remotefilelog", "cachegroup")
+    if groupname:
+        gid = getgid(groupname)
+        if gid is None:
+            ui.warn(_('unable to resolve group name: %s\n') % groupname)
+
+    # we use a single stat syscall to test the existence and mode / group bit
+    st = None
+    try:
+        st = os.stat(path)
+    except OSError:
+        pass
+
+    if st:
+        # exists
+        if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
+            # permission needs to be fixed
+            setstickygroupdir(path, gid, ui.warn)
+        return
+
+    oldumask = os.umask(0o002)
+    try:
+        missingdirs = [path]
+        path = os.path.dirname(path)
+        while path and not os.path.exists(path):
+            missingdirs.append(path)
+            path = os.path.dirname(path)
+
+        for path in reversed(missingdirs):
+            try:
+                os.mkdir(path)
+            except OSError as ex:
+                if ex.errno != errno.EEXIST:
+                    raise
+
+        for path in missingdirs:
+            setstickygroupdir(path, gid, ui.warn)
+    finally:
+        os.umask(oldumask)
+
+def getusername(ui):
+    try:
+        return stringutil.shortuser(ui.username())
+    except Exception:
+        return 'unknown'
+
+def getreponame(ui):
+    reponame = ui.config('paths', 'default')
+    if reponame:
+        return os.path.basename(reponame)
+    return "unknown"