Mercurial > hg
view mercurial/httpconnection.py @ 47343:9f798c1b0d89 stable
cext: fix memory leak in phases computation
Without this a buffer whose size in bytes is the number of
changesets in the repository is leaked each time the repository is
opened and changeset phases are computed.
Impact: the current code in hgwebdir creates a new `localrepository`
instance for each HTTP request. Since any pull or push is made of several
requests, a team of 100 people can easily produce thousands of such
requests per day.
Being a low-level malloc, this leak can't be seen with the gc module and
tools relying on that, but was spotted by valgrind immediately.
Reproduction
------------
for i in range(cl_args.iterations):
repo = hg.repository(baseui, repo_path)
rev = repo.revs(rev).first()
ctx = repo[rev]
del ctx
del repo
# avoid any pollution by other type of leak
# (that should be fixed in 5.8)
repoview._filteredrepotypes.clear()
gc.collect()
Measurements
------------
Resident Set Size (RSS), taken on a clone of
mozilla-central for performance analysis (440 000
changesets).
before:
5.8+hg19.5ac0f2a8ba72 1000 iterations: 1606MB
5.8+hg19.5ac0f2a8ba72 10000 iterations: 5723MB
after:
5.8+hg20.e2084d39e145 1000 iterations: 555MB
5.8+hg20.e2084d39e145 10000 iterations: 555MB
(double checked, not a copy/paste error)
(e2084d39e14 is the present changeset, before amendment
of the message to add the measurements)
author | Georges Racinet <georges.racinet@octobus.net> |
---|---|
date | Sun, 06 Jun 2021 01:24:30 +0200 |
parents | ffd3e823a7e5 |
children | 6000f5b25c9b |
line wrap: on
line source
# httpconnection.py - urllib2 handler for new http support # # Copyright 2005, 2006, 2007, 2008 Olivia Mackall <olivia@selenic.com> # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> # Copyright 2011 Google, Inc. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import os from .i18n import _ from .pycompat import open from . import ( pycompat, util, ) from .utils import ( urlutil, ) urlerr = util.urlerr urlreq = util.urlreq # moved here from url.py to avoid a cycle class httpsendfile(object): """This is a wrapper around the objects returned by python's "open". Its purpose is to send file-like objects via HTTP. It do however not define a __len__ attribute because the length might be more than Py_ssize_t can handle. """ def __init__(self, ui, *args, **kwargs): self.ui = ui self._data = open(*args, **kwargs) self.seek = self._data.seek self.close = self._data.close self.write = self._data.write self.length = os.fstat(self._data.fileno()).st_size self._pos = 0 self._progress = self._makeprogress() def _makeprogress(self): # We pass double the max for total because we currently have # to send the bundle twice in the case of a server that # requires authentication. Since we can't know until we try # once whether authentication will be required, just lie to # the user and maybe the push succeeds suddenly at 50%. return self.ui.makeprogress( _(b'sending'), unit=_(b'kb'), total=(self.length // 1024 * 2) ) def read(self, *args, **kwargs): ret = self._data.read(*args, **kwargs) if not ret: self._progress.complete() return ret self._pos += len(ret) self._progress.update(self._pos // 1024) return ret def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() # moved here from url.py to avoid a cycle def readauthforuri(ui, uri, user): uri = pycompat.bytesurl(uri) # Read configuration groups = {} for key, val in ui.configitems(b'auth'): if key in (b'cookiefile',): continue if b'.' not in key: ui.warn(_(b"ignoring invalid [auth] key '%s'\n") % key) continue group, setting = key.rsplit(b'.', 1) gdict = groups.setdefault(group, {}) if setting in (b'username', b'cert', b'key'): val = util.expandpath(val) gdict[setting] = val # Find the best match scheme, hostpath = uri.split(b'://', 1) bestuser = None bestlen = 0 bestauth = None for group, auth in pycompat.iteritems(groups): if user and user != auth.get(b'username', user): # If a username was set in the URI, the entry username # must either match it or be unset continue prefix = auth.get(b'prefix') if not prefix: continue prefixurl = urlutil.url(prefix) if prefixurl.user and prefixurl.user != user: # If a username was set in the prefix, it must match the username in # the URI. continue # The URI passed in has been stripped of credentials, so erase the user # here to allow simpler matching. prefixurl.user = None prefix = bytes(prefixurl) p = prefix.split(b'://', 1) if len(p) > 1: schemes, prefix = [p[0]], p[1] else: schemes = (auth.get(b'schemes') or b'https').split() if ( (prefix == b'*' or hostpath.startswith(prefix)) and ( len(prefix) > bestlen or ( len(prefix) == bestlen and not bestuser and b'username' in auth ) ) and scheme in schemes ): bestlen = len(prefix) bestauth = group, auth bestuser = auth.get(b'username') if user and not bestuser: auth[b'username'] = user return bestauth