Mercurial > hg
view hgext/censor.py @ 51683:5f37c36f36b9
revlog: use mmap by default is pre-population is available
Using mmap has a great impact of memory usage on server, and a good impact on
performance in multiple case. Now that we pre-populate memory mapping by
default, there is case where it using mmap is slower. So we use it by default
(if pre-population is available).
Further work to reduce the performance impact of the pre-population will be done
later.
Some benchmark below (using the same setup as 522b4d729e89):
As for 522b4d729e89 the impact on small repository like Mercurial or Pypy is
tiny, ~1% best. However for large repositories we see some performance
improvement without seeing the performance regression that we could have without
pre-populate.
##### For netbeans
### data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog
## benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# benchmark.variants.limit-rev = 1
# benchmark.variants.patch = yes
no-mmap: 0.171579
mmap: 0.166311 (-3.07%, -0.01)
# bin-env-vars.hg.flavor = default
no-mmap: 0.170716
mmap: 0.165218 (-3.22%, -0.01)
# benchmark.variants.patch = no
# benchmark.variants.rev = tip
no-mmap: 0.140862
mmap: 0.137566 (-2.34%, -0.00)
## benchmark.name = hg.command.unbundle
# bin-env-vars.hg.flavor = rust
# benchmark.variants.issue6528 = disabled
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-1-extra-rev
# benchmark.variants.source = unbundle
no-mmap: 0.238038
mmap: 0.239912
no-populate: 0.cbd4c9 (+11.71%, +0.03)
#### For Mozilla
### data-env-vars.name = mozilla-try-2019-02-18-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1
# benchmark.variants.patch = yes
no-mmap: 0.258440
mmap: 0.237813 (-7.98%, -0.02)
# benchmark.variants.limit-rev = 10
no-mmap: 1.235323
mmap: 1.213578 (-1.76%, -0.02)
## benchmark.name = hg.command.push
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.explicit-rev = none
# benchmark.variants.issue6528 = disabled
# benchmark.variants.protocol = ssh
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-1-extra-rev
no-mmap: 4.790135
mmap: 4.668971 (-2.53%, -0.12)
no-populate: 4.841141 (+1.06%, +0.05)
### data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog
## benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.rev = tip
no-mmap: 0.206187
mmap: 0.197348 (-4.29%, -0.01)
## benchmark.name = hg.command.push
# bin-env-vars.hg.flavor = default
# benchmark.variants.explicit-rev = none
# benchmark.variants.issue6528 = disabled
# benchmark.variants.protocol = ssh
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-1-extra-rev
no-mmap: 4.768259
mmap: 4.798632
no-populate: 4.953295 (+3.88%, +0.19)
# benchmark.variants.revs = any-100-extra-rev
no-mmap: 4.785946
mmap: 4.903618
no-populate: 5.014963 (+4.79%, +0.23)
## benchmark.name = hg.command.unbundle
# bin-env-vars.hg.flavor = default
# benchmark.variants.issue6528 = disabled
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-1-extra-rev
# benchmark.variants.source = unbundle
no-mmap: 1.400121
mmap: 1.423411
no-populate: 1.585365 (+13.23%, +0.19)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 08 Jul 2024 15:48:34 +0200 |
parents | ceeb8fa23cc8 |
children | cd72a88c5599 |
line wrap: on
line source
# Copyright (C) 2015 - Mike Edgar <adgar@google.com> # # This extension enables removal of file content at a given revision, # rewriting the data/metadata of successive revisions to preserve revision log # integrity. """erase file content at a given revision The censor command instructs Mercurial to erase all content of a file at a given revision *without updating the changeset hash.* This allows existing history to remain valid while preventing future clones/pulls from receiving the erased data. Typical uses for censor are due to security or legal requirements, including:: * Passwords, private keys, cryptographic material * Licensed data/code/libraries for which the license has expired * Personally Identifiable Information or other private data Censored nodes can interrupt mercurial's typical operation whenever the excised data needs to be materialized. Some commands, like ``hg cat``/``hg revert``, simply fail when asked to produce censored data. Others, like ``hg verify`` and ``hg update``, must be capable of tolerating censored data to continue to function in a meaningful way. Such commands only tolerate censored file As having a censored version in a checkout is impractical. The current head revisions of the repository are checked. If the revision to be censored is in any of them the command will abort. A few informative commands such as ``hg grep`` will unconditionally ignore censored data and merely report that it was encountered. """ from mercurial.i18n import _ from mercurial.node import short from mercurial import ( error, registrar, scmutil, ) cmdtable = {} command = registrar.command(cmdtable) # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should # be specifying the version(s) of Mercurial they are tested with, or # leave the attribute unspecified. testedwith = b'ships-with-hg-core' @command( b'censor', [ ( b'r', b'rev', [], _(b'censor file from specified revision'), _(b'REV'), ), ( b'', b'check-heads', True, _(b'check that repository heads are not affected'), ), (b't', b'tombstone', b'', _(b'replacement tombstone data'), _(b'TEXT')), ], _(b'-r REV [-t TEXT] [FILE]'), helpcategory=command.CATEGORY_MAINTENANCE, ) def censor(ui, repo, path, rev=(), tombstone=b'', check_heads=True, **opts): with repo.wlock(), repo.lock(): return _docensor( ui, repo, path, rev, tombstone, check_heads=check_heads, **opts, ) def _docensor(ui, repo, path, revs=(), tombstone=b'', check_heads=True, **opts): if not path: raise error.Abort(_(b'must specify file path to censor')) if not revs: raise error.Abort(_(b'must specify revisions to censor')) wctx = repo[None] m = scmutil.match(wctx, (path,)) if m.anypats() or len(m.files()) != 1: raise error.Abort(_(b'can only specify an explicit filename')) path = m.files()[0] flog = repo.file(path) if not len(flog): raise error.Abort(_(b'cannot censor file with no history')) revs = scmutil.revrange(repo, revs) if not revs: raise error.Abort(_(b'no matching revisions')) file_nodes = set() for r in revs: try: ctx = repo[r] file_nodes.add(ctx.filectx(path).filenode()) except error.LookupError: raise error.Abort(_(b'file does not exist at revision %s') % ctx) if check_heads: heads = [] repo_heads = repo.heads() msg = b'checking for the censored content in %d heads\n' msg %= len(repo_heads) ui.status(msg) for headnode in repo_heads: hc = repo[headnode] if path in hc and hc.filenode(path) in file_nodes: heads.append(hc) if heads: headlist = b', '.join([short(c.node()) for c in heads]) raise error.Abort( _(b'cannot censor file in heads (%s)') % headlist, hint=_(b'clean/delete and commit first'), ) msg = b'checking for the censored content in the working directory\n' ui.status(msg) wp = wctx.parents() if ctx.node() in [p.node() for p in wp]: raise error.Abort( _(b'cannot censor working directory'), hint=_(b'clean/delete/update first'), ) msg = b'censoring %d file revisions\n' msg %= len(file_nodes) ui.status(msg) with repo.transaction(b'censor') as tr: flog.censorrevision(tr, file_nodes, tombstone=tombstone)