hgext/remotefilelog/debugcommands.py
changeset 40495 3a333a582d7b
child 40507 e2a1584e9e3f
equal deleted inserted replaced
40494:9aeb9e2d28a7 40495:3a333a582d7b
       
     1 # debugcommands.py - debug logic for remotefilelog
       
     2 #
       
     3 # Copyright 2013 Facebook, Inc.
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 from __future__ import absolute_import
       
     8 
       
     9 import hashlib
       
    10 import os
       
    11 
       
    12 from mercurial.node import bin, hex, nullid, short
       
    13 from mercurial.i18n import _
       
    14 from mercurial import (
       
    15     error,
       
    16     filelog,
       
    17     revlog,
       
    18 )
       
    19 from . import (
       
    20     constants,
       
    21     datapack,
       
    22     extutil,
       
    23     fileserverclient,
       
    24     historypack,
       
    25     lz4wrapper,
       
    26     repack,
       
    27     shallowrepo,
       
    28     shallowutil,
       
    29 )
       
    30 
       
    31 def debugremotefilelog(ui, path, **opts):
       
    32     decompress = opts.get('decompress')
       
    33 
       
    34     size, firstnode, mapping = parsefileblob(path, decompress)
       
    35 
       
    36     ui.status(_("size: %s bytes\n") % (size))
       
    37     ui.status(_("path: %s \n") % (path))
       
    38     ui.status(_("key: %s \n") % (short(firstnode)))
       
    39     ui.status(_("\n"))
       
    40     ui.status(_("%12s => %12s %13s %13s %12s\n") %
       
    41               ("node", "p1", "p2", "linknode", "copyfrom"))
       
    42 
       
    43     queue = [firstnode]
       
    44     while queue:
       
    45         node = queue.pop(0)
       
    46         p1, p2, linknode, copyfrom = mapping[node]
       
    47         ui.status(_("%s => %s  %s  %s  %s\n") %
       
    48             (short(node), short(p1), short(p2), short(linknode), copyfrom))
       
    49         if p1 != nullid:
       
    50             queue.append(p1)
       
    51         if p2 != nullid:
       
    52             queue.append(p2)
       
    53 
       
    54 def buildtemprevlog(repo, file):
       
    55     # get filename key
       
    56     filekey = hashlib.sha1(file).hexdigest()
       
    57     filedir = os.path.join(repo.path, 'store/data', filekey)
       
    58 
       
    59     # sort all entries based on linkrev
       
    60     fctxs = []
       
    61     for filenode in os.listdir(filedir):
       
    62         if '_old' not in filenode:
       
    63             fctxs.append(repo.filectx(file, fileid=bin(filenode)))
       
    64 
       
    65     fctxs = sorted(fctxs, key=lambda x: x.linkrev())
       
    66 
       
    67     # add to revlog
       
    68     temppath = repo.sjoin('data/temprevlog.i')
       
    69     if os.path.exists(temppath):
       
    70         os.remove(temppath)
       
    71     r = filelog.filelog(repo.svfs, 'temprevlog')
       
    72 
       
    73     class faket(object):
       
    74         def add(self, a, b, c):
       
    75             pass
       
    76     t = faket()
       
    77     for fctx in fctxs:
       
    78         if fctx.node() not in repo:
       
    79             continue
       
    80 
       
    81         p = fctx.filelog().parents(fctx.filenode())
       
    82         meta = {}
       
    83         if fctx.renamed():
       
    84             meta['copy'] = fctx.renamed()[0]
       
    85             meta['copyrev'] = hex(fctx.renamed()[1])
       
    86 
       
    87         r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
       
    88 
       
    89     return r
       
    90 
       
    91 def debugindex(orig, ui, repo, file_=None, **opts):
       
    92     """dump the contents of an index file"""
       
    93     if (opts.get('changelog') or
       
    94         opts.get('manifest') or
       
    95         opts.get('dir') or
       
    96         not shallowrepo.requirement in repo.requirements or
       
    97         not repo.shallowmatch(file_)):
       
    98         return orig(ui, repo, file_, **opts)
       
    99 
       
   100     r = buildtemprevlog(repo, file_)
       
   101 
       
   102     # debugindex like normal
       
   103     format = opts.get('format', 0)
       
   104     if format not in (0, 1):
       
   105         raise error.Abort(_("unknown format %d") % format)
       
   106 
       
   107     generaldelta = r.version & revlog.FLAG_GENERALDELTA
       
   108     if generaldelta:
       
   109         basehdr = ' delta'
       
   110     else:
       
   111         basehdr = '  base'
       
   112 
       
   113     if format == 0:
       
   114         ui.write(("   rev    offset  length " + basehdr + " linkrev"
       
   115                   " nodeid       p1           p2\n"))
       
   116     elif format == 1:
       
   117         ui.write(("   rev flag   offset   length"
       
   118                   "     size " + basehdr + "   link     p1     p2"
       
   119                   "       nodeid\n"))
       
   120 
       
   121     for i in r:
       
   122         node = r.node(i)
       
   123         if generaldelta:
       
   124             base = r.deltaparent(i)
       
   125         else:
       
   126             base = r.chainbase(i)
       
   127         if format == 0:
       
   128             try:
       
   129                 pp = r.parents(node)
       
   130             except Exception:
       
   131                 pp = [nullid, nullid]
       
   132             ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % (
       
   133                     i, r.start(i), r.length(i), base, r.linkrev(i),
       
   134                     short(node), short(pp[0]), short(pp[1])))
       
   135         elif format == 1:
       
   136             pr = r.parentrevs(i)
       
   137             ui.write("% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n" % (
       
   138                     i, r.flags(i), r.start(i), r.length(i), r.rawsize(i),
       
   139                     base, r.linkrev(i), pr[0], pr[1], short(node)))
       
   140 
       
   141 def debugindexdot(orig, ui, repo, file_):
       
   142     """dump an index DAG as a graphviz dot file"""
       
   143     if not shallowrepo.requirement in repo.requirements:
       
   144         return orig(ui, repo, file_)
       
   145 
       
   146     r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
       
   147 
       
   148     ui.write(("digraph G {\n"))
       
   149     for i in r:
       
   150         node = r.node(i)
       
   151         pp = r.parents(node)
       
   152         ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i))
       
   153         if pp[1] != nullid:
       
   154             ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
       
   155     ui.write("}\n")
       
   156 
       
   157 def verifyremotefilelog(ui, path, **opts):
       
   158     decompress = opts.get('decompress')
       
   159 
       
   160     for root, dirs, files in os.walk(path):
       
   161         for file in files:
       
   162             if file == "repos":
       
   163                 continue
       
   164             filepath = os.path.join(root, file)
       
   165             size, firstnode, mapping = parsefileblob(filepath, decompress)
       
   166             for p1, p2, linknode, copyfrom in mapping.itervalues():
       
   167                 if linknode == nullid:
       
   168                     actualpath = os.path.relpath(root, path)
       
   169                     key = fileserverclient.getcachekey("reponame", actualpath,
       
   170                                                        file)
       
   171                     ui.status("%s %s\n" % (key, os.path.relpath(filepath,
       
   172                                                                 path)))
       
   173 
       
   174 def parsefileblob(path, decompress):
       
   175     raw = None
       
   176     f = open(path, "r")
       
   177     try:
       
   178         raw = f.read()
       
   179     finally:
       
   180         f.close()
       
   181 
       
   182     if decompress:
       
   183         raw = lz4wrapper.lz4decompress(raw)
       
   184 
       
   185     offset, size, flags = shallowutil.parsesizeflags(raw)
       
   186     start = offset + size
       
   187 
       
   188     firstnode = None
       
   189 
       
   190     mapping = {}
       
   191     while start < len(raw):
       
   192         divider = raw.index('\0', start + 80)
       
   193 
       
   194         currentnode = raw[start:(start + 20)]
       
   195         if not firstnode:
       
   196             firstnode = currentnode
       
   197 
       
   198         p1 = raw[(start + 20):(start + 40)]
       
   199         p2 = raw[(start + 40):(start + 60)]
       
   200         linknode = raw[(start + 60):(start + 80)]
       
   201         copyfrom = raw[(start + 80):divider]
       
   202 
       
   203         mapping[currentnode] = (p1, p2, linknode, copyfrom)
       
   204         start = divider + 1
       
   205 
       
   206     return size, firstnode, mapping
       
   207 
       
   208 def debugdatapack(ui, *paths, **opts):
       
   209     for path in paths:
       
   210         if '.data' in path:
       
   211             path = path[:path.index('.data')]
       
   212         ui.write("%s:\n" % path)
       
   213         dpack = datapack.datapack(path)
       
   214         node = opts.get('node')
       
   215         if node:
       
   216             deltachain = dpack.getdeltachain('', bin(node))
       
   217             dumpdeltachain(ui, deltachain, **opts)
       
   218             return
       
   219 
       
   220         if opts.get('long'):
       
   221             hashformatter = hex
       
   222             hashlen = 42
       
   223         else:
       
   224             hashformatter = short
       
   225             hashlen = 14
       
   226 
       
   227         lastfilename = None
       
   228         totaldeltasize = 0
       
   229         totalblobsize = 0
       
   230         def printtotals():
       
   231             if lastfilename is not None:
       
   232                 ui.write("\n")
       
   233             if not totaldeltasize or not totalblobsize:
       
   234                 return
       
   235             difference = totalblobsize - totaldeltasize
       
   236             deltastr = "%0.1f%% %s" % (
       
   237                 (100.0 * abs(difference) / totalblobsize),
       
   238                 ("smaller" if difference > 0 else "bigger"))
       
   239 
       
   240             ui.write(("Total:%s%s  %s (%s)\n") % (
       
   241                 "".ljust(2 * hashlen - len("Total:")),
       
   242                 str(totaldeltasize).ljust(12),
       
   243                 str(totalblobsize).ljust(9),
       
   244                 deltastr
       
   245             ))
       
   246 
       
   247         bases = {}
       
   248         nodes = set()
       
   249         failures = 0
       
   250         for filename, node, deltabase, deltalen in dpack.iterentries():
       
   251             bases[node] = deltabase
       
   252             if node in nodes:
       
   253                 ui.write(("Bad entry: %s appears twice\n" % short(node)))
       
   254                 failures += 1
       
   255             nodes.add(node)
       
   256             if filename != lastfilename:
       
   257                 printtotals()
       
   258                 name = '(empty name)' if filename == '' else filename
       
   259                 ui.write("%s:\n" % name)
       
   260                 ui.write("%s%s%s%s\n" % (
       
   261                     "Node".ljust(hashlen),
       
   262                     "Delta Base".ljust(hashlen),
       
   263                     "Delta Length".ljust(14),
       
   264                     "Blob Size".ljust(9)))
       
   265                 lastfilename = filename
       
   266                 totalblobsize = 0
       
   267                 totaldeltasize = 0
       
   268 
       
   269             # Metadata could be missing, in which case it will be an empty dict.
       
   270             meta = dpack.getmeta(filename, node)
       
   271             if constants.METAKEYSIZE in meta:
       
   272                 blobsize = meta[constants.METAKEYSIZE]
       
   273                 totaldeltasize += deltalen
       
   274                 totalblobsize += blobsize
       
   275             else:
       
   276                 blobsize = "(missing)"
       
   277             ui.write("%s  %s  %s%s\n" % (
       
   278                 hashformatter(node),
       
   279                 hashformatter(deltabase),
       
   280                 str(deltalen).ljust(14),
       
   281                 blobsize))
       
   282 
       
   283         if filename is not None:
       
   284             printtotals()
       
   285 
       
   286         failures += _sanitycheck(ui, set(nodes), bases)
       
   287         if failures > 1:
       
   288             ui.warn(("%d failures\n" % failures))
       
   289             return 1
       
   290 
       
   291 def _sanitycheck(ui, nodes, bases):
       
   292     """
       
   293     Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
       
   294     mapping of node->base):
       
   295 
       
   296     - Each deltabase must itself be a node elsewhere in the pack
       
   297     - There must be no cycles
       
   298     """
       
   299     failures = 0
       
   300     for node in nodes:
       
   301         seen = set()
       
   302         current = node
       
   303         deltabase = bases[current]
       
   304 
       
   305         while deltabase != nullid:
       
   306             if deltabase not in nodes:
       
   307                 ui.warn(("Bad entry: %s has an unknown deltabase (%s)\n" %
       
   308                         (short(node), short(deltabase))))
       
   309                 failures += 1
       
   310                 break
       
   311 
       
   312             if deltabase in seen:
       
   313                 ui.warn(("Bad entry: %s has a cycle (at %s)\n" %
       
   314                         (short(node), short(deltabase))))
       
   315                 failures += 1
       
   316                 break
       
   317 
       
   318             current = deltabase
       
   319             seen.add(current)
       
   320             deltabase = bases[current]
       
   321         # Since ``node`` begins a valid chain, reset/memoize its base to nullid
       
   322         # so we don't traverse it again.
       
   323         bases[node] = nullid
       
   324     return failures
       
   325 
       
   326 def dumpdeltachain(ui, deltachain, **opts):
       
   327     hashformatter = hex
       
   328     hashlen = 40
       
   329 
       
   330     lastfilename = None
       
   331     for filename, node, filename, deltabasenode, delta in deltachain:
       
   332         if filename != lastfilename:
       
   333             ui.write("\n%s\n" % filename)
       
   334             lastfilename = filename
       
   335         ui.write("%s  %s  %s  %s\n" % (
       
   336             "Node".ljust(hashlen),
       
   337             "Delta Base".ljust(hashlen),
       
   338             "Delta SHA1".ljust(hashlen),
       
   339             "Delta Length".ljust(6),
       
   340         ))
       
   341 
       
   342         ui.write("%s  %s  %s  %s\n" % (
       
   343             hashformatter(node),
       
   344             hashformatter(deltabasenode),
       
   345             hashlib.sha1(delta).hexdigest(),
       
   346             len(delta)))
       
   347 
       
   348 def debughistorypack(ui, path):
       
   349     if '.hist' in path:
       
   350         path = path[:path.index('.hist')]
       
   351     hpack = historypack.historypack(path)
       
   352 
       
   353     lastfilename = None
       
   354     for entry in hpack.iterentries():
       
   355         filename, node, p1node, p2node, linknode, copyfrom = entry
       
   356         if filename != lastfilename:
       
   357             ui.write("\n%s\n" % filename)
       
   358             ui.write("%s%s%s%s%s\n" % (
       
   359                 "Node".ljust(14),
       
   360                 "P1 Node".ljust(14),
       
   361                 "P2 Node".ljust(14),
       
   362                 "Link Node".ljust(14),
       
   363                 "Copy From"))
       
   364             lastfilename = filename
       
   365         ui.write("%s  %s  %s  %s  %s\n" % (short(node), short(p1node),
       
   366             short(p2node), short(linknode), copyfrom))
       
   367 
       
   368 def debugwaitonrepack(repo):
       
   369     with extutil.flock(repack.repacklockvfs(repo).join('repacklock'), ''):
       
   370         return
       
   371 
       
   372 def debugwaitonprefetch(repo):
       
   373     with repo._lock(repo.svfs, "prefetchlock", True, None,
       
   374                          None, _('prefetching in %s') % repo.origroot):
       
   375         pass