revlog: read/cache chunks in fixed windows of 64 KB
When reading a revlog chunk, instead of reading up to 64 KB ahead of the
request offset and caching that, this change caches a fixed window before
and after the requested data that falls on 64 KB boundaries. This increases
cache hits when reading revlogs backwards.
Running perfmoonwalk on the Mercurial repo (with almost 20,000 changesets) on
Mac OS X with an SSD, before this change:
$ hg perfmoonwalk
! wall 2.307994 comb 2.310000 user 2.120000 sys 0.190000 (best of 5)
(Each run has 10,668 cache hits and 9,304 misses.)
After this change:
$ hg perfmoonwalk
! wall 1.814117 comb 1.810000 user 1.810000 sys 0.000000 (best of 6)
(19,931 cache hits, 62 misses.)
On a busy NFS share, before this change:
$ hg perfmoonwalk
! wall 17.000034 comb 4.100000 user 3.270000 sys 0.830000 (best of 3)
After:
$ hg perfmoonwalk
! wall 1.746115 comb 1.670000 user 1.660000 sys 0.010000 (best of 5)
"Fixer that translates some APIs ignored by the default 2to3 fixers."
# FIXME: This fixer has some ugly hacks. Its main design is based on that of
# fix_imports, from lib2to3. Unfortunately, the fix_imports framework only
# changes module names "without dots", meaning it won't work for some changes
# in the email module/package. Thus this fixer was born. I believe that with a
# bit more thinking, a more generic fixer can be implemented, but I'll leave
# that as future work.
from lib2to3.fixer_util import Name
from lib2to3.fixes import fix_imports
# This maps the old names to the new names. Note that a drawback of the current
# design is that the dictionary keys MUST have EXACTLY one dot (.) in them,
# otherwise things will break. (If you don't need a module hierarchy, you're
# better of just inherit from fix_imports and overriding the MAPPING dict.)
MAPPING = {'email.Utils': 'email.utils',
'email.Errors': 'email.errors',
'email.Header': 'email.header',
'email.Parser': 'email.parser',
'email.Encoders': 'email.encoders',
'email.MIMEText': 'email.mime.text',
'email.MIMEBase': 'email.mime.base',
'email.Generator': 'email.generator',
'email.MIMEMultipart': 'email.mime.multipart',
}
def alternates(members):
return "(" + "|".join(map(repr, members)) + ")"
def build_pattern(mapping=MAPPING):
packages = {}
for key in mapping:
# What we are doing here is the following: with dotted names, we'll
# have something like package_name <trailer '.' module>. Then, we are
# making a dictionary to copy this structure. For example, if
# mapping={'A.B': 'a.b', 'A.C': 'a.c'}, it will generate the dictionary
# {'A': ['b', 'c']} to, then, generate something like "A <trailer '.'
# ('b' | 'c')".
name = key.split('.')
prefix = name[0]
if prefix in packages:
packages[prefix].append(name[1:][0])
else:
packages[prefix] = name[1:]
mod_list = ' | '.join(["'%s' '.' ('%s')" %
(key, "' | '".join(packages[key])) for key in packages])
mod_list = '(' + mod_list + ' )'
yield """name_import=import_name< 'import' module_name=dotted_name< %s > >
""" % mod_list
yield """name_import=import_name< 'import'
multiple_imports=dotted_as_names< any*
module_name=dotted_name< %s >
any* >
>""" % mod_list
packs = ' | '.join(["'%s' trailer<'.' ('%s')>" % (key,
"' | '".join(packages[key])) for key in packages])
yield "power< package=(%s) trailer<'.' any > any* >" % packs
class FixLeftoverImports(fix_imports.FixImports):
# We want to run this fixer after fix_import has run (this shouldn't matter
# for hg, though, as setup3k prefers to run the default fixers first)
mapping = MAPPING
def build_pattern(self):
return "|".join(build_pattern(self.mapping))
def transform(self, node, results):
# Mostly copied from fix_imports.py
import_mod = results.get("module_name")
if import_mod:
try:
mod_name = import_mod.value
except AttributeError:
# XXX: A hack to remove whitespace prefixes and suffixes
mod_name = str(import_mod).strip()
new_name = self.mapping[mod_name]
import_mod.replace(Name(new_name, prefix=import_mod.prefix))
if "name_import" in results:
# If it's not a "from x import x, y" or "import x as y" import,
# marked its usage to be replaced.
self.replace[mod_name] = new_name
if "multiple_imports" in results:
# This is a nasty hack to fix multiple imports on a line (e.g.,
# "import StringIO, urlparse"). The problem is that I can't
# figure out an easy way to make a pattern recognize the keys of
# MAPPING randomly sprinkled in an import statement.
results = self.match(node)
if results:
self.transform(node, results)
else:
# Replace usage of the module.
# Now this is, mostly, a hack
bare_name = results["package"][0]
bare_name_text = ''.join(map(str, results['package'])).strip()
new_name = self.replace.get(bare_name_text)
prefix = results['package'][0].prefix
if new_name:
bare_name.replace(Name(new_name, prefix=prefix))
results["package"][1].replace(Name(''))