Mercurial > hg
view mercurial/namespaces.py @ 30451:94ca0e13d1fc
perf: add command for measuring revlog chunk operations
Upcoming commits will teach revlogs to leverage the new compression
engine API so that new compression formats can more easily be
leveraged in revlogs. We want to be sure this refactoring doesn't
regress performance. So this commit introduces "perfrevchunks" to
explicitly test performance of reading, decompressing, and
recompressing revlog chunks.
Here is output when run on the mozilla-unified repo:
$ hg perfrevlogchunks -c
! read
! wall 0.346603 comb 0.350000 user 0.340000 sys 0.010000 (best of 28)
! read w/ reused fd
! wall 0.337707 comb 0.340000 user 0.320000 sys 0.020000 (best of 30)
! read batch
! wall 0.013206 comb 0.020000 user 0.000000 sys 0.020000 (best of 221)
! read batch w/ reused fd
! wall 0.013259 comb 0.030000 user 0.010000 sys 0.020000 (best of 222)
! chunk
! wall 1.909939 comb 1.910000 user 1.900000 sys 0.010000 (best of 6)
! chunk batch
! wall 1.750677 comb 1.760000 user 1.740000 sys 0.020000 (best of 6)
! compress
! wall 5.668004 comb 5.670000 user 5.670000 sys 0.000000 (best of 3)
$ hg perfrevlogchunks -m
! read
! wall 0.365834 comb 0.370000 user 0.350000 sys 0.020000 (best of 26)
! read w/ reused fd
! wall 0.350160 comb 0.350000 user 0.320000 sys 0.030000 (best of 28)
! read batch
! wall 0.024777 comb 0.020000 user 0.000000 sys 0.020000 (best of 119)
! read batch w/ reused fd
! wall 0.024895 comb 0.030000 user 0.000000 sys 0.030000 (best of 118)
! chunk
! wall 2.514061 comb 2.520000 user 2.480000 sys 0.040000 (best of 4)
! chunk batch
! wall 2.380788 comb 2.380000 user 2.360000 sys 0.020000 (best of 5)
! compress
! wall 9.815297 comb 9.820000 user 9.820000 sys 0.000000 (best of 3)
We already see some interesting data, such as how much slower
non-batched chunk reading is and that zlib compression appears to be
>2x slower than decompression.
I didn't have the data when I wrote this commit message, but I ran this
on Mozilla's NFS-based Mercurial server and the time for reading with a
reused file descriptor was faster. So I think it is worth testing both
with and without file descriptor reuse so we can make informed
decisions about recycling file descriptors.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 17 Nov 2016 20:17:51 -0800 |
parents | ca52512ac709 |
children | bd872f64a8ba |
line wrap: on
line source
from __future__ import absolute_import from .i18n import _ from . import ( templatekw, util, ) def tolist(val): """ a convenience method to return an empty list instead of None """ if val is None: return [] else: return [val] class namespaces(object): """provides an interface to register and operate on multiple namespaces. See the namespace class below for details on the namespace object. """ _names_version = 0 def __init__(self): self._names = util.sortdict() # we need current mercurial named objects (bookmarks, tags, and # branches) to be initialized somewhere, so that place is here bmknames = lambda repo: repo._bookmarks.keys() bmknamemap = lambda repo, name: tolist(repo._bookmarks.get(name)) bmknodemap = lambda repo, node: repo.nodebookmarks(node) n = namespace("bookmarks", templatename="bookmark", # i18n: column positioning for "hg log" logfmt=_("bookmark: %s\n"), listnames=bmknames, namemap=bmknamemap, nodemap=bmknodemap) self.addnamespace(n) tagnames = lambda repo: [t for t, n in repo.tagslist()] tagnamemap = lambda repo, name: tolist(repo._tagscache.tags.get(name)) tagnodemap = lambda repo, node: repo.nodetags(node) n = namespace("tags", templatename="tag", # i18n: column positioning for "hg log" logfmt=_("tag: %s\n"), listnames=tagnames, namemap=tagnamemap, nodemap=tagnodemap, deprecated=set(['tip'])) self.addnamespace(n) bnames = lambda repo: repo.branchmap().keys() bnamemap = lambda repo, name: tolist(repo.branchtip(name, True)) bnodemap = lambda repo, node: [repo[node].branch()] n = namespace("branches", templatename="branch", # i18n: column positioning for "hg log" logfmt=_("branch: %s\n"), listnames=bnames, namemap=bnamemap, nodemap=bnodemap) self.addnamespace(n) def __getitem__(self, namespace): """returns the namespace object""" return self._names[namespace] def __iter__(self): return self._names.__iter__() def iteritems(self): return self._names.iteritems() def addnamespace(self, namespace, order=None): """register a namespace namespace: the name to be registered (in plural form) order: optional argument to specify the order of namespaces (e.g. 'branches' should be listed before 'bookmarks') """ if order is not None: self._names.insert(order, namespace.name, namespace) else: self._names[namespace.name] = namespace # we only generate a template keyword if one does not already exist if namespace.name not in templatekw.keywords: def generatekw(**args): return templatekw.shownames(namespace.name, **args) templatekw.keywords[namespace.name] = generatekw def singlenode(self, repo, name): """ Return the 'best' node for the given name. Best means the first node in the first nonempty list returned by a name-to-nodes mapping function in the defined precedence order. Raises a KeyError if there is no such node. """ for ns, v in self._names.iteritems(): n = v.namemap(repo, name) if n: # return max revision number if len(n) > 1: cl = repo.changelog maxrev = max(cl.rev(node) for node in n) return cl.node(maxrev) return n[0] raise KeyError(_('no such name: %s') % name) class namespace(object): """provides an interface to a namespace Namespaces are basically generic many-to-many mapping between some (namespaced) names and nodes. The goal here is to control the pollution of jamming things into tags or bookmarks (in extension-land) and to simplify internal bits of mercurial: log output, tab completion, etc. More precisely, we define a mapping of names to nodes, and a mapping from nodes to names. Each mapping returns a list. Furthermore, each name mapping will be passed a name to lookup which might not be in its domain. In this case, each method should return an empty list and not raise an error. This namespace object will define the properties we need: 'name': the namespace (plural form) 'templatename': name to use for templating (usually the singular form of the plural namespace name) 'listnames': list of all names in the namespace (usually the keys of a dictionary) 'namemap': function that takes a name and returns a list of nodes 'nodemap': function that takes a node and returns a list of names 'deprecated': set of names to be masked for ordinary use """ def __init__(self, name, templatename=None, logname=None, colorname=None, logfmt=None, listnames=None, namemap=None, nodemap=None, deprecated=None): """create a namespace name: the namespace to be registered (in plural form) templatename: the name to use for templating logname: the name to use for log output; if not specified templatename is used colorname: the name to use for colored log output; if not specified logname is used logfmt: the format to use for (i18n-ed) log output; if not specified it is composed from logname listnames: function to list all names namemap: function that inputs a name, output node(s) nodemap: function that inputs a node, output name(s) deprecated: set of names to be masked for ordinary use """ self.name = name self.templatename = templatename self.logname = logname self.colorname = colorname self.logfmt = logfmt self.listnames = listnames self.namemap = namemap self.nodemap = nodemap # if logname is not specified, use the template name as backup if self.logname is None: self.logname = self.templatename # if colorname is not specified, just use the logname as a backup if self.colorname is None: self.colorname = self.logname # if logfmt is not specified, compose it from logname as backup if self.logfmt is None: # i18n: column positioning for "hg log" self.logfmt = ("%s:" % self.logname).ljust(13) + "%s\n" if deprecated is None: self.deprecated = set() else: self.deprecated = deprecated def names(self, repo, node): """method that returns a (sorted) list of names in a namespace that match a given node""" return sorted(self.nodemap(repo, node)) def nodes(self, repo, name): """method that returns a list of nodes in a namespace that match a given name. """ return sorted(self.namemap(repo, name))