revlog: use existing file handle when reading during _addrevision
_addrevision() may need to read from revlogs as part of computing
deltas. Previously, we would flush existing file handles and open
a new, short-lived file handle to perform the reading.
If we have an existing file handle, it seems logical to reuse it
for reading instead of opening a new file handle. This patch
makes that the new behavior.
After this patch, revlog files are only reopened when adding
revisions if the revlog is switched from inline to non-inline.
On Linux when unbundling a bundle of the mozilla-central repo, this
patch has the following impact on system call counts:
Call Before After Delta
write 827,639 673,390 -154,249
open 700,103 684,089 -16,014
read 74,489 74,489 0
fstat 493,924 461,896 -32,028
close 249,131 233,117 -16,014
stat 242,001 242,001 0
lstat 18,676 18,676 0
lseek 20,268 20,268 0
ioctl 14,652 13,173 -1,479
TOTAL 3,180,758 2,930,679 -250,079
It's worth noting that many of the open() calls fail due to missing
files. That's why there are many more open() calls than close().
Despite the significant system call reduction, this change does not
seem to have a significant performance impact on Linux.
On Windows 10 (not a VM, on a SSD), this patch appears to reduce
unbundle time for mozilla-central from ~960s to ~920s. This isn't
as significant as I was hoping. But a decrease it is nonetheless.
Still, Windows unbundle performance is still >2x slower than Linux.
Despite the lack of significant gains, fewer system calls is fewer
system calls. If nothing else, this will narrow the focus of potential
areas to optimize in the future.
# parser.py - simple top-down operator precedence parser for mercurial
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
# see http://effbot.org/zone/simple-top-down-parsing.htm and
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
# for background
# takes a tokenizer and elements
# tokenizer is an iterator that returns (type, value, pos) tuples
# elements is a mapping of types to binding strength, primary, prefix, infix
# and suffix actions
# an action is a tree node name, a tree label, and an optional match
# __call__(program) parses program into a labeled tree
from __future__ import absolute_import
from .i18n import _
from . import error
class parser(object):
def __init__(self, elements, methods=None):
self._elements = elements
self._methods = methods
self.current = None
def _advance(self):
'advance the tokenizer'
t = self.current
self.current = next(self._iter, None)
return t
def _hasnewterm(self):
'True if next token may start new term'
return any(self._elements[self.current[0]][1:3])
def _match(self, m):
'make sure the tokenizer matches an end condition'
if self.current[0] != m:
raise error.ParseError(_("unexpected token: %s") % self.current[0],
self.current[2])
self._advance()
def _parseoperand(self, bind, m=None):
'gather right-hand-side operand until an end condition or binding met'
if m and self.current[0] == m:
expr = None
else:
expr = self._parse(bind)
if m:
self._match(m)
return expr
def _parse(self, bind=0):
token, value, pos = self._advance()
# handle prefix rules on current token, take as primary if unambiguous
primary, prefix = self._elements[token][1:3]
if primary and not (prefix and self._hasnewterm()):
expr = (primary, value)
elif prefix:
expr = (prefix[0], self._parseoperand(*prefix[1:]))
else:
raise error.ParseError(_("not a prefix: %s") % token, pos)
# gather tokens until we meet a lower binding strength
while bind < self._elements[self.current[0]][0]:
token, value, pos = self._advance()
# handle infix rules, take as suffix if unambiguous
infix, suffix = self._elements[token][3:]
if suffix and not (infix and self._hasnewterm()):
expr = (suffix[0], expr)
elif infix:
expr = (infix[0], expr, self._parseoperand(*infix[1:]))
else:
raise error.ParseError(_("not an infix: %s") % token, pos)
return expr
def parse(self, tokeniter):
'generate a parse tree from tokens'
self._iter = tokeniter
self._advance()
res = self._parse()
token, value, pos = self.current
return res, pos
def eval(self, tree):
'recursively evaluate a parse tree using node methods'
if not isinstance(tree, tuple):
return tree
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
def __call__(self, tokeniter):
'parse tokens into a parse tree and evaluate if methods given'
t = self.parse(tokeniter)
if self._methods:
return self.eval(t)
return t
def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
"""Build dict from list containing positional and keyword arguments
Invalid keywords or too many positional arguments are rejected, but
missing arguments are just omitted.
"""
if len(trees) > len(keys):
raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
% {'func': funcname, 'nargs': len(keys)})
args = {}
# consume positional arguments
for k, x in zip(keys, trees):
if x[0] == keyvaluenode:
break
args[k] = x
# remainder should be keyword arguments
for x in trees[len(args):]:
if x[0] != keyvaluenode or x[1][0] != keynode:
raise error.ParseError(_("%(func)s got an invalid argument")
% {'func': funcname})
k = x[1][1]
if k not in keys:
raise error.ParseError(_("%(func)s got an unexpected keyword "
"argument '%(key)s'")
% {'func': funcname, 'key': k})
if k in args:
raise error.ParseError(_("%(func)s got multiple values for keyword "
"argument '%(key)s'")
% {'func': funcname, 'key': k})
args[k] = x[2]
return args
def unescapestr(s):
try:
return s.decode("string_escape")
except ValueError as e:
# mangle Python's exception into our format
raise error.ParseError(str(e).lower())
def _prettyformat(tree, leafnodes, level, lines):
if not isinstance(tree, tuple) or tree[0] in leafnodes:
lines.append((level, str(tree)))
else:
lines.append((level, '(%s' % tree[0]))
for s in tree[1:]:
_prettyformat(s, leafnodes, level + 1, lines)
lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
def prettyformat(tree, leafnodes):
lines = []
_prettyformat(tree, leafnodes, 0, lines)
output = '\n'.join((' ' * l + s) for l, s in lines)
return output
def simplifyinfixops(tree, targetnodes):
"""Flatten chained infix operations to reduce usage of Python stack
>>> def f(tree):
... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
>>> f(('or',
... ('or',
... ('symbol', '1'),
... ('symbol', '2')),
... ('symbol', '3')))
(or
('symbol', '1')
('symbol', '2')
('symbol', '3'))
>>> f(('func',
... ('symbol', 'p1'),
... ('or',
... ('or',
... ('func',
... ('symbol', 'sort'),
... ('list',
... ('or',
... ('or',
... ('symbol', '1'),
... ('symbol', '2')),
... ('symbol', '3')),
... ('negate',
... ('symbol', 'rev')))),
... ('and',
... ('symbol', '4'),
... ('group',
... ('or',
... ('or',
... ('symbol', '5'),
... ('symbol', '6')),
... ('symbol', '7'))))),
... ('symbol', '8'))))
(func
('symbol', 'p1')
(or
(func
('symbol', 'sort')
(list
(or
('symbol', '1')
('symbol', '2')
('symbol', '3'))
(negate
('symbol', 'rev'))))
(and
('symbol', '4')
(group
(or
('symbol', '5')
('symbol', '6')
('symbol', '7'))))
('symbol', '8')))
"""
if not isinstance(tree, tuple):
return tree
op = tree[0]
if op not in targetnodes:
return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
# walk down left nodes taking each right node. no recursion to left nodes
# because infix operators are left-associative, i.e. left tree is deep.
# e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
simplified = []
x = tree
while x[0] == op:
l, r = x[1:]
simplified.append(simplifyinfixops(r, targetnodes))
x = l
simplified.append(simplifyinfixops(x, targetnodes))
simplified.append(op)
return tuple(reversed(simplified))