# HG changeset patch # User Pulkit Goyal # Date 1542888864 -10800 # Node ID a564870811098d6721a6be5110cd6982b370c32e # Parent 51685c6dcca327e8db3ef1ee42e738966dd3ca6b store: don't read the whole fncache in memory In large repositories with lot of files, the fncache grows more than 100 MB and reading that whole thing into memory slows things down. Let's not read the whole thing into memory. This patch changes fncache loading code to read 1 MB at once. Loading 1 MB at once saves ~1 sec on perffncacheload for our internal repository. I tried various values such as 0.5 MB, 5 MB, 10 MB but best results were produced using 1 MB as the chunksize. On a narrow clone with fncache around 40 MB, this patch saves ~0.04 seconds on average on perffncacheload. To test the code, I have coded an extension in test-fncache.t which set chunksize to 1 byte, and the test passes with that. Differential Revision: https://phab.mercurial-scm.org/D5296 diff -r 51685c6dcca3 -r a56487081109 mercurial/store.py --- a/mercurial/store.py Fri Mar 08 10:20:33 2019 -0800 +++ b/mercurial/store.py Thu Nov 22 15:14:24 2018 +0300 @@ -8,6 +8,7 @@ from __future__ import absolute_import import errno +import functools import hashlib import os import stat @@ -23,6 +24,9 @@ ) parsers = policy.importmod(r'parsers') +# how much bytes should be read from fncache in one read +# It is done to prevent loading large fncache files into memory +fncache_chunksize = 10 ** 6 def _matchtrackedpath(path, matcher): """parses a fncache entry and returns whether the entry is tracking a path @@ -463,7 +467,20 @@ # skip nonexistent file self.entries = set() return - self.entries = set(decodedir(fp.read()).splitlines()) + + self.entries = set() + chunk = b'' + for c in iter(functools.partial(fp.read, fncache_chunksize), b''): + chunk += c + try: + p = chunk.rindex(b'\n') + self.entries.update(decodedir(chunk[:p + 1]).splitlines()) + chunk = chunk[p + 1:] + except ValueError: + # substring '\n' not found, maybe the entry is bigger than the + # chunksize, so let's keep iterating + pass + self._checkentries(fp) fp.close() diff -r 51685c6dcca3 -r a56487081109 tests/test-fncache.t --- a/tests/test-fncache.t Fri Mar 08 10:20:33 2019 -0800 +++ b/tests/test-fncache.t Thu Nov 22 15:14:24 2018 +0300 @@ -1,5 +1,19 @@ #require repofncache +An extension which will set fncache chunksize to 1 byte to make sure that logic +does not break + + $ cat > chunksize.py < from __future__ import absolute_import + > from mercurial import store + > store.fncache_chunksize = 1 + > EOF + + $ cat >> $HGRCPATH < [extensions] + > chunksize = $TESTTMP/chunksize.py + > EOF + Init repo1: $ hg init repo1