hgext/remotefilelog/shallowutil.py
changeset 40495 3a333a582d7b
child 40514 6f0b6905ef6f
equal deleted inserted replaced
40494:9aeb9e2d28a7 40495:3a333a582d7b
       
     1 # shallowutil.py -- remotefilelog utilities
       
     2 #
       
     3 # Copyright 2014 Facebook, Inc.
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 from __future__ import absolute_import
       
     8 
       
     9 import collections
       
    10 import errno
       
    11 import hashlib
       
    12 import os
       
    13 import stat
       
    14 import struct
       
    15 import tempfile
       
    16 
       
    17 from mercurial.i18n import _
       
    18 from mercurial import (
       
    19     error,
       
    20     pycompat,
       
    21     revlog,
       
    22     util,
       
    23 )
       
    24 from mercurial.utils import (
       
    25     storageutil,
       
    26     stringutil,
       
    27 )
       
    28 from . import constants
       
    29 
       
    30 if not pycompat.iswindows:
       
    31     import grp
       
    32 
       
    33 def getcachekey(reponame, file, id):
       
    34     pathhash = hashlib.sha1(file).hexdigest()
       
    35     return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
       
    36 
       
    37 def getlocalkey(file, id):
       
    38     pathhash = hashlib.sha1(file).hexdigest()
       
    39     return os.path.join(pathhash, id)
       
    40 
       
    41 def getcachepath(ui, allowempty=False):
       
    42     cachepath = ui.config("remotefilelog", "cachepath")
       
    43     if not cachepath:
       
    44         if allowempty:
       
    45             return None
       
    46         else:
       
    47             raise error.Abort(_("could not find config option "
       
    48                                 "remotefilelog.cachepath"))
       
    49     return util.expandpath(cachepath)
       
    50 
       
    51 def getcachepackpath(repo, category):
       
    52     cachepath = getcachepath(repo.ui)
       
    53     if category != constants.FILEPACK_CATEGORY:
       
    54         return os.path.join(cachepath, repo.name, 'packs', category)
       
    55     else:
       
    56         return os.path.join(cachepath, repo.name, 'packs')
       
    57 
       
    58 def getlocalpackpath(base, category):
       
    59     return os.path.join(base, 'packs', category)
       
    60 
       
    61 def createrevlogtext(text, copyfrom=None, copyrev=None):
       
    62     """returns a string that matches the revlog contents in a
       
    63     traditional revlog
       
    64     """
       
    65     meta = {}
       
    66     if copyfrom or text.startswith('\1\n'):
       
    67         if copyfrom:
       
    68             meta['copy'] = copyfrom
       
    69             meta['copyrev'] = copyrev
       
    70         text = storageutil.packmeta(meta, text)
       
    71 
       
    72     return text
       
    73 
       
    74 def parsemeta(text):
       
    75     """parse mercurial filelog metadata"""
       
    76     meta, size = storageutil.parsemeta(text)
       
    77     if text.startswith('\1\n'):
       
    78         s = text.index('\1\n', 2)
       
    79         text = text[s + 2:]
       
    80     return meta or {}, text
       
    81 
       
    82 def sumdicts(*dicts):
       
    83     """Adds all the values of *dicts together into one dictionary. This assumes
       
    84     the values in *dicts are all summable.
       
    85 
       
    86     e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
       
    87     """
       
    88     result = collections.defaultdict(lambda: 0)
       
    89     for dict in dicts:
       
    90         for k, v in dict.iteritems():
       
    91             result[k] += v
       
    92     return result
       
    93 
       
    94 def prefixkeys(dict, prefix):
       
    95     """Returns ``dict`` with ``prefix`` prepended to all its keys."""
       
    96     result = {}
       
    97     for k, v in dict.iteritems():
       
    98         result[prefix + k] = v
       
    99     return result
       
   100 
       
   101 def reportpackmetrics(ui, prefix, *stores):
       
   102     dicts = [s.getmetrics() for s in stores]
       
   103     dict = prefixkeys(sumdicts(*dicts), prefix + '_')
       
   104     ui.log(prefix + "_packsizes", "", **dict)
       
   105 
       
   106 def _parsepackmeta(metabuf):
       
   107     """parse datapack meta, bytes (<metadata-list>) -> dict
       
   108 
       
   109     The dict contains raw content - both keys and values are strings.
       
   110     Upper-level business may want to convert some of them to other types like
       
   111     integers, on their own.
       
   112 
       
   113     raise ValueError if the data is corrupted
       
   114     """
       
   115     metadict = {}
       
   116     offset = 0
       
   117     buflen = len(metabuf)
       
   118     while buflen - offset >= 3:
       
   119         key = metabuf[offset]
       
   120         offset += 1
       
   121         metalen = struct.unpack_from('!H', metabuf, offset)[0]
       
   122         offset += 2
       
   123         if offset + metalen > buflen:
       
   124             raise ValueError('corrupted metadata: incomplete buffer')
       
   125         value = metabuf[offset:offset + metalen]
       
   126         metadict[key] = value
       
   127         offset += metalen
       
   128     if offset != buflen:
       
   129         raise ValueError('corrupted metadata: redundant data')
       
   130     return metadict
       
   131 
       
   132 def _buildpackmeta(metadict):
       
   133     """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
       
   134 
       
   135     The dict contains raw content - both keys and values are strings.
       
   136     Upper-level business may want to serialize some of other types (like
       
   137     integers) to strings before calling this function.
       
   138 
       
   139     raise ProgrammingError when metadata key is illegal, or ValueError if
       
   140     length limit is exceeded
       
   141     """
       
   142     metabuf = ''
       
   143     for k, v in sorted((metadict or {}).iteritems()):
       
   144         if len(k) != 1:
       
   145             raise error.ProgrammingError('packmeta: illegal key: %s' % k)
       
   146         if len(v) > 0xfffe:
       
   147             raise ValueError('metadata value is too long: 0x%x > 0xfffe'
       
   148                              % len(v))
       
   149         metabuf += k
       
   150         metabuf += struct.pack('!H', len(v))
       
   151         metabuf += v
       
   152     # len(metabuf) is guaranteed representable in 4 bytes, because there are
       
   153     # only 256 keys, and for each value, len(value) <= 0xfffe.
       
   154     return metabuf
       
   155 
       
   156 _metaitemtypes = {
       
   157     constants.METAKEYFLAG: (int, long),
       
   158     constants.METAKEYSIZE: (int, long),
       
   159 }
       
   160 
       
   161 def buildpackmeta(metadict):
       
   162     """like _buildpackmeta, but typechecks metadict and normalize it.
       
   163 
       
   164     This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
       
   165     and METAKEYFLAG will be dropped if its value is 0.
       
   166     """
       
   167     newmeta = {}
       
   168     for k, v in (metadict or {}).iteritems():
       
   169         expectedtype = _metaitemtypes.get(k, (bytes,))
       
   170         if not isinstance(v, expectedtype):
       
   171             raise error.ProgrammingError('packmeta: wrong type of key %s' % k)
       
   172         # normalize int to binary buffer
       
   173         if int in expectedtype:
       
   174             # optimization: remove flag if it's 0 to save space
       
   175             if k == constants.METAKEYFLAG and v == 0:
       
   176                 continue
       
   177             v = int2bin(v)
       
   178         newmeta[k] = v
       
   179     return _buildpackmeta(newmeta)
       
   180 
       
   181 def parsepackmeta(metabuf):
       
   182     """like _parsepackmeta, but convert fields to desired types automatically.
       
   183 
       
   184     This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
       
   185     integers.
       
   186     """
       
   187     metadict = _parsepackmeta(metabuf)
       
   188     for k, v in metadict.iteritems():
       
   189         if k in _metaitemtypes and int in _metaitemtypes[k]:
       
   190             metadict[k] = bin2int(v)
       
   191     return metadict
       
   192 
       
   193 def int2bin(n):
       
   194     """convert a non-negative integer to raw binary buffer"""
       
   195     buf = bytearray()
       
   196     while n > 0:
       
   197         buf.insert(0, n & 0xff)
       
   198         n >>= 8
       
   199     return bytes(buf)
       
   200 
       
   201 def bin2int(buf):
       
   202     """the reverse of int2bin, convert a binary buffer to an integer"""
       
   203     x = 0
       
   204     for b in bytearray(buf):
       
   205         x <<= 8
       
   206         x |= b
       
   207     return x
       
   208 
       
   209 def parsesizeflags(raw):
       
   210     """given a remotefilelog blob, return (headersize, rawtextsize, flags)
       
   211 
       
   212     see remotefilelogserver.createfileblob for the format.
       
   213     raise RuntimeError if the content is illformed.
       
   214     """
       
   215     flags = revlog.REVIDX_DEFAULT_FLAGS
       
   216     size = None
       
   217     try:
       
   218         index = raw.index('\0')
       
   219         header = raw[:index]
       
   220         if header.startswith('v'):
       
   221             # v1 and above, header starts with 'v'
       
   222             if header.startswith('v1\n'):
       
   223                 for s in header.split('\n'):
       
   224                     if s.startswith(constants.METAKEYSIZE):
       
   225                         size = int(s[len(constants.METAKEYSIZE):])
       
   226                     elif s.startswith(constants.METAKEYFLAG):
       
   227                         flags = int(s[len(constants.METAKEYFLAG):])
       
   228             else:
       
   229                 raise RuntimeError('unsupported remotefilelog header: %s'
       
   230                                    % header)
       
   231         else:
       
   232             # v0, str(int(size)) is the header
       
   233             size = int(header)
       
   234     except ValueError:
       
   235         raise RuntimeError("unexpected remotefilelog header: illegal format")
       
   236     if size is None:
       
   237         raise RuntimeError("unexpected remotefilelog header: no size found")
       
   238     return index + 1, size, flags
       
   239 
       
   240 def buildfileblobheader(size, flags, version=None):
       
   241     """return the header of a remotefilelog blob.
       
   242 
       
   243     see remotefilelogserver.createfileblob for the format.
       
   244     approximately the reverse of parsesizeflags.
       
   245 
       
   246     version could be 0 or 1, or None (auto decide).
       
   247     """
       
   248     # choose v0 if flags is empty, otherwise v1
       
   249     if version is None:
       
   250         version = int(bool(flags))
       
   251     if version == 1:
       
   252         header = ('v1\n%s%d\n%s%d'
       
   253                   % (constants.METAKEYSIZE, size,
       
   254                      constants.METAKEYFLAG, flags))
       
   255     elif version == 0:
       
   256         if flags:
       
   257             raise error.ProgrammingError('fileblob v0 does not support flag')
       
   258         header = '%d' % size
       
   259     else:
       
   260         raise error.ProgrammingError('unknown fileblob version %d' % version)
       
   261     return header
       
   262 
       
   263 def ancestormap(raw):
       
   264     offset, size, flags = parsesizeflags(raw)
       
   265     start = offset + size
       
   266 
       
   267     mapping = {}
       
   268     while start < len(raw):
       
   269         divider = raw.index('\0', start + 80)
       
   270 
       
   271         currentnode = raw[start:(start + 20)]
       
   272         p1 = raw[(start + 20):(start + 40)]
       
   273         p2 = raw[(start + 40):(start + 60)]
       
   274         linknode = raw[(start + 60):(start + 80)]
       
   275         copyfrom = raw[(start + 80):divider]
       
   276 
       
   277         mapping[currentnode] = (p1, p2, linknode, copyfrom)
       
   278         start = divider + 1
       
   279 
       
   280     return mapping
       
   281 
       
   282 def readfile(path):
       
   283     f = open(path, 'rb')
       
   284     try:
       
   285         result = f.read()
       
   286 
       
   287         # we should never have empty files
       
   288         if not result:
       
   289             os.remove(path)
       
   290             raise IOError("empty file: %s" % path)
       
   291 
       
   292         return result
       
   293     finally:
       
   294         f.close()
       
   295 
       
   296 def unlinkfile(filepath):
       
   297     if pycompat.iswindows:
       
   298         # On Windows, os.unlink cannnot delete readonly files
       
   299         os.chmod(filepath, stat.S_IWUSR)
       
   300     os.unlink(filepath)
       
   301 
       
   302 def renamefile(source, destination):
       
   303     if pycompat.iswindows:
       
   304         # On Windows, os.rename cannot rename readonly files
       
   305         # and cannot overwrite destination if it exists
       
   306         os.chmod(source, stat.S_IWUSR)
       
   307         if os.path.isfile(destination):
       
   308             os.chmod(destination, stat.S_IWUSR)
       
   309             os.unlink(destination)
       
   310 
       
   311     os.rename(source, destination)
       
   312 
       
   313 def writefile(path, content, readonly=False):
       
   314     dirname, filename = os.path.split(path)
       
   315     if not os.path.exists(dirname):
       
   316         try:
       
   317             os.makedirs(dirname)
       
   318         except OSError as ex:
       
   319             if ex.errno != errno.EEXIST:
       
   320                 raise
       
   321 
       
   322     fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname)
       
   323     os.close(fd)
       
   324 
       
   325     try:
       
   326         f = util.posixfile(temp, 'wb')
       
   327         f.write(content)
       
   328         f.close()
       
   329 
       
   330         if readonly:
       
   331             mode = 0o444
       
   332         else:
       
   333             # tempfiles are created with 0o600, so we need to manually set the
       
   334             # mode.
       
   335             oldumask = os.umask(0)
       
   336             # there's no way to get the umask without modifying it, so set it
       
   337             # back
       
   338             os.umask(oldumask)
       
   339             mode = ~oldumask
       
   340 
       
   341         renamefile(temp, path)
       
   342         os.chmod(path, mode)
       
   343     except Exception:
       
   344         try:
       
   345             unlinkfile(temp)
       
   346         except OSError:
       
   347             pass
       
   348         raise
       
   349 
       
   350 def sortnodes(nodes, parentfunc):
       
   351     """Topologically sorts the nodes, using the parentfunc to find
       
   352     the parents of nodes."""
       
   353     nodes = set(nodes)
       
   354     childmap = {}
       
   355     parentmap = {}
       
   356     roots = []
       
   357 
       
   358     # Build a child and parent map
       
   359     for n in nodes:
       
   360         parents = [p for p in parentfunc(n) if p in nodes]
       
   361         parentmap[n] = set(parents)
       
   362         for p in parents:
       
   363             childmap.setdefault(p, set()).add(n)
       
   364         if not parents:
       
   365             roots.append(n)
       
   366 
       
   367     roots.sort()
       
   368     # Process roots, adding children to the queue as they become roots
       
   369     results = []
       
   370     while roots:
       
   371         n = roots.pop(0)
       
   372         results.append(n)
       
   373         if n in childmap:
       
   374             children = childmap[n]
       
   375             for c in children:
       
   376                 childparents = parentmap[c]
       
   377                 childparents.remove(n)
       
   378                 if len(childparents) == 0:
       
   379                     # insert at the beginning, that way child nodes
       
   380                     # are likely to be output immediately after their
       
   381                     # parents.  This gives better compression results.
       
   382                     roots.insert(0, c)
       
   383 
       
   384     return results
       
   385 
       
   386 def readexactly(stream, n):
       
   387     '''read n bytes from stream.read and abort if less was available'''
       
   388     s = stream.read(n)
       
   389     if len(s) < n:
       
   390         raise error.Abort(_("stream ended unexpectedly"
       
   391                            " (got %d bytes, expected %d)")
       
   392                           % (len(s), n))
       
   393     return s
       
   394 
       
   395 def readunpack(stream, fmt):
       
   396     data = readexactly(stream, struct.calcsize(fmt))
       
   397     return struct.unpack(fmt, data)
       
   398 
       
   399 def readpath(stream):
       
   400     rawlen = readexactly(stream, constants.FILENAMESIZE)
       
   401     pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
       
   402     return readexactly(stream, pathlen)
       
   403 
       
   404 def readnodelist(stream):
       
   405     rawlen = readexactly(stream, constants.NODECOUNTSIZE)
       
   406     nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
       
   407     for i in pycompat.xrange(nodecount):
       
   408         yield readexactly(stream, constants.NODESIZE)
       
   409 
       
   410 def readpathlist(stream):
       
   411     rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
       
   412     pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
       
   413     for i in pycompat.xrange(pathcount):
       
   414         yield readpath(stream)
       
   415 
       
   416 def getgid(groupname):
       
   417     try:
       
   418         gid = grp.getgrnam(groupname).gr_gid
       
   419         return gid
       
   420     except KeyError:
       
   421         return None
       
   422 
       
   423 def setstickygroupdir(path, gid, warn=None):
       
   424     if gid is None:
       
   425         return
       
   426     try:
       
   427         os.chown(path, -1, gid)
       
   428         os.chmod(path, 0o2775)
       
   429     except (IOError, OSError) as ex:
       
   430         if warn:
       
   431             warn(_('unable to chown/chmod on %s: %s\n') % (path, ex))
       
   432 
       
   433 def mkstickygroupdir(ui, path):
       
   434     """Creates the given directory (if it doesn't exist) and give it a
       
   435     particular group with setgid enabled."""
       
   436     gid = None
       
   437     groupname = ui.config("remotefilelog", "cachegroup")
       
   438     if groupname:
       
   439         gid = getgid(groupname)
       
   440         if gid is None:
       
   441             ui.warn(_('unable to resolve group name: %s\n') % groupname)
       
   442 
       
   443     # we use a single stat syscall to test the existence and mode / group bit
       
   444     st = None
       
   445     try:
       
   446         st = os.stat(path)
       
   447     except OSError:
       
   448         pass
       
   449 
       
   450     if st:
       
   451         # exists
       
   452         if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
       
   453             # permission needs to be fixed
       
   454             setstickygroupdir(path, gid, ui.warn)
       
   455         return
       
   456 
       
   457     oldumask = os.umask(0o002)
       
   458     try:
       
   459         missingdirs = [path]
       
   460         path = os.path.dirname(path)
       
   461         while path and not os.path.exists(path):
       
   462             missingdirs.append(path)
       
   463             path = os.path.dirname(path)
       
   464 
       
   465         for path in reversed(missingdirs):
       
   466             try:
       
   467                 os.mkdir(path)
       
   468             except OSError as ex:
       
   469                 if ex.errno != errno.EEXIST:
       
   470                     raise
       
   471 
       
   472         for path in missingdirs:
       
   473             setstickygroupdir(path, gid, ui.warn)
       
   474     finally:
       
   475         os.umask(oldumask)
       
   476 
       
   477 def getusername(ui):
       
   478     try:
       
   479         return stringutil.shortuser(ui.username())
       
   480     except Exception:
       
   481         return 'unknown'
       
   482 
       
   483 def getreponame(ui):
       
   484     reponame = ui.config('paths', 'default')
       
   485     if reponame:
       
   486         return os.path.basename(reponame)
       
   487     return "unknown"