view tests/test-fileset.t @ 28487:98d98a645e9d

changelog: add class to represent parsed changelog revisions Currently, changelog entries are parsed into their respective components at read time. Many operations are only interested in a subset of fields of a changelog entry. The parsing and storing of all the fields adds avoidable overhead. This patch introduces the "changelogrevision" class. It takes changelog raw text and exposes the parsed results as attributes. The code for parsing changelog entries has been moved into its construction function. changelog.read() has been modified to use the new class internally while maintaining its existing API. Future patches will make revision parsing lazy. We implement the construction function of the new class with __new__ instead of __init__ so we can use a named tuple to represent the empty revision. This saves overhead and complexity of coercing later versions of this class to represent an empty instance. While we are here, we add a method on changelog to obtain an instance of the new type. The overhead of constructing the new class regresses performance of revsets accessing this data: author(mpm) 0.896565 0.929984 desc(bug) 0.887169 0.935642 105% date(2015) 0.878797 0.908094 extra(rebase_source) 0.865446 0.922624 106% author(mpm) or author(greg) 1.801832 1.902112 105% author(mpm) or desc(bug) 1.812438 1.860977 date(2015) or branch(default) 0.968276 1.005824 author(mpm) or desc(bug) or date(2015) or extra(rebase_source) 3.656193 3.743381 Once lazy parsing is implemented, these revsets will all be faster than before. There is no performance change on revsets that do not access this data. There /could/ be a performance regression on operations that perform several changelog reads. However, I can't think of anything outside of revsets and `hg log` (basically the same as a revset) that would be impacted.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 06 Mar 2016 14:28:02 -0800
parents 4eb5496c2bd4
children 4140d49d2efb
line wrap: on
line source

  $ fileset() {
  >   hg debugfileset "$@"
  > }

  $ hg init repo
  $ cd repo
  $ echo a > a1
  $ echo a > a2
  $ echo b > b1
  $ echo b > b2
  $ hg ci -Am addfiles
  adding a1
  adding a2
  adding b1
  adding b2

Test operators and basic patterns

  $ fileset -v a1
  ('symbol', 'a1')
  a1
  $ fileset -v 'a*'
  ('symbol', 'a*')
  a1
  a2
  $ fileset -v '"re:a\d"'
  ('string', 're:a\\d')
  a1
  a2
  $ fileset -v 'a1 or a2'
  (or
    ('symbol', 'a1')
    ('symbol', 'a2'))
  a1
  a2
  $ fileset 'a1 | a2'
  a1
  a2
  $ fileset 'a* and "*1"'
  a1
  $ fileset 'a* & "*1"'
  a1
  $ fileset 'not (r"a*")'
  b1
  b2
  $ fileset '! ("a*")'
  b1
  b2
  $ fileset 'a* - a1'
  a2
  $ fileset 'a_b'
  $ fileset '"\xy"'
  hg: parse error: invalid \x escape
  [255]

Test files status

  $ rm a1
  $ hg rm a2
  $ echo b >> b2
  $ hg cp b1 c1
  $ echo c > c2
  $ echo c > c3
  $ cat > .hgignore <<EOF
  > \.hgignore
  > 2$
  > EOF
  $ fileset 'modified()'
  b2
  $ fileset 'added()'
  c1
  $ fileset 'removed()'
  a2
  $ fileset 'deleted()'
  a1
  $ fileset 'missing()'
  a1
  $ fileset 'unknown()'
  c3
  $ fileset 'ignored()'
  .hgignore
  c2
  $ fileset 'hgignore()'
  a2
  b2
  $ fileset 'clean()'
  b1
  $ fileset 'copied()'
  c1

Test files properties

  >>> file('bin', 'wb').write('\0a')
  $ fileset 'binary()'
  $ fileset 'binary() and unknown()'
  bin
  $ echo '^bin$' >> .hgignore
  $ fileset 'binary() and ignored()'
  bin
  $ hg add bin
  $ fileset 'binary()'
  bin

  $ fileset 'grep("b{1}")'
  b2
  c1
  b1
  $ fileset 'grep("missingparens(")'
  hg: parse error: invalid match pattern: unbalanced parenthesis
  [255]

#if execbit
  $ chmod +x b2
  $ fileset 'exec()'
  b2
#endif

#if symlink
  $ ln -s b2 b2link
  $ fileset 'symlink() and unknown()'
  b2link
  $ hg add b2link
#endif

#if no-windows
  $ echo foo > con.xml
  $ fileset 'not portable()'
  con.xml
  $ hg --config ui.portablefilenames=ignore add con.xml
#endif

  >>> file('1k', 'wb').write(' '*1024)
  >>> file('2k', 'wb').write(' '*2048)
  $ hg add 1k 2k
  $ fileset 'size("bar")'
  hg: parse error: couldn't parse size: bar
  [255]
  $ fileset '(1k, 2k)'
  hg: parse error: can't use a list in this context
  (see hg help "filesets.x or y")
  [255]
  $ fileset 'size(1k)'
  1k
  $ fileset '(1k or 2k) and size("< 2k")'
  1k
  $ fileset '(1k or 2k) and size("<=2k")'
  1k
  2k
  $ fileset '(1k or 2k) and size("> 1k")'
  2k
  $ fileset '(1k or 2k) and size(">=1K")'
  1k
  2k
  $ fileset '(1k or 2k) and size(".5KB - 1.5kB")'
  1k
  $ fileset 'size("1M")'
  $ fileset 'size("1 GB")'

Test merge states

  $ hg ci -m manychanges
  $ hg up -C 0
  * files updated, 0 files merged, * files removed, 0 files unresolved (glob)
  $ echo c >> b2
  $ hg ci -m diverging b2
  created new head
  $ fileset 'resolved()'
  $ fileset 'unresolved()'
  $ hg merge
  merging b2
  warning: conflicts while merging b2! (edit, then use 'hg resolve --mark')
  * files updated, 0 files merged, 1 files removed, 1 files unresolved (glob)
  use 'hg resolve' to retry unresolved file merges or 'hg update -C .' to abandon
  [1]
  $ fileset 'resolved()'
  $ fileset 'unresolved()'
  b2
  $ echo e > b2
  $ hg resolve -m b2
  (no more unresolved files)
  $ fileset 'resolved()'
  b2
  $ fileset 'unresolved()'
  $ hg ci -m merge

Test subrepo predicate

  $ hg init sub
  $ echo a > sub/suba
  $ hg -R sub add sub/suba
  $ hg -R sub ci -m sub
  $ echo 'sub = sub' > .hgsub
  $ hg init sub2
  $ echo b > sub2/b
  $ hg -R sub2 ci -Am sub2
  adding b
  $ echo 'sub2 = sub2' >> .hgsub
  $ fileset 'subrepo()'
  $ hg add .hgsub
  $ fileset 'subrepo()'
  sub
  sub2
  $ fileset 'subrepo("sub")'
  sub
  $ fileset 'subrepo("glob:*")'
  sub
  sub2
  $ hg ci -m subrepo

Test that .hgsubstate is updated as appropriate during a conversion.  The
saverev property is enough to alter the hashes of the subrepo.

  $ hg init ../converted
  $ hg --config extensions.convert= convert --config convert.hg.saverev=True  \
  >      sub ../converted/sub
  initializing destination ../converted/sub repository
  scanning source...
  sorting...
  converting...
  0 sub
  $ hg clone -U sub2 ../converted/sub2
  $ hg --config extensions.convert= convert --config convert.hg.saverev=True  \
  >      . ../converted
  scanning source...
  sorting...
  converting...
  4 addfiles
  3 manychanges
  2 diverging
  1 merge
  0 subrepo
  no ".hgsubstate" updates will be made for "sub2"
  $ hg up -q -R ../converted -r tip
  $ hg --cwd ../converted cat sub/suba sub2/b -r tip
  a
  b
  $ oldnode=`hg log -r tip -T "{node}\n"`
  $ newnode=`hg log -R ../converted -r tip -T "{node}\n"`
  $ [ "$oldnode" != "$newnode" ] || echo "nothing changed"

Test with a revision

  $ hg log -G --template '{rev} {desc}\n'
  @  4 subrepo
  |
  o    3 merge
  |\
  | o  2 diverging
  | |
  o |  1 manychanges
  |/
  o  0 addfiles
  
  $ echo unknown > unknown
  $ fileset -r1 'modified()'
  b2
  $ fileset -r1 'added() and c1'
  c1
  $ fileset -r1 'removed()'
  a2
  $ fileset -r1 'deleted()'
  $ fileset -r1 'unknown()'
  $ fileset -r1 'ignored()'
  $ fileset -r1 'hgignore()'
  b2
  bin
  $ fileset -r1 'binary()'
  bin
  $ fileset -r1 'size(1k)'
  1k
  $ fileset -r3 'resolved()'
  $ fileset -r3 'unresolved()'

#if execbit
  $ fileset -r1 'exec()'
  b2
#endif

#if symlink
  $ fileset -r1 'symlink()'
  b2link
#endif

#if no-windows
  $ fileset -r1 'not portable()'
  con.xml
  $ hg forget 'con.xml'
#endif

  $ fileset -r4 'subrepo("re:su.*")'
  sub
  sub2
  $ fileset -r4 'subrepo("sub")'
  sub
  $ fileset -r4 'b2 or c1'
  b2
  c1

  >>> open('dos', 'wb').write("dos\r\n")
  >>> open('mixed', 'wb').write("dos\r\nunix\n")
  >>> open('mac', 'wb').write("mac\r")
  $ hg add dos mixed mac

(remove a1, to examine safety of 'eol' on removed files)
  $ rm a1

  $ fileset 'eol(dos)'
  dos
  mixed
  $ fileset 'eol(unix)'
  mixed
  .hgsub
  .hgsubstate
  b1
  b2
  c1
  $ fileset 'eol(mac)'
  mac

Test safety of 'encoding' on removed files

#if symlink
  $ fileset 'encoding("ascii")'
  dos
  mac
  mixed
  .hgsub
  .hgsubstate
  1k
  2k
  b1
  b2
  b2link
  bin
  c1
#else
  $ fileset 'encoding("ascii")'
  dos
  mac
  mixed
  .hgsub
  .hgsubstate
  1k
  2k
  b1
  b2
  bin
  c1
#endif

Test detection of unintentional 'matchctx.existing()' invocation

  $ cat > $TESTTMP/existingcaller.py <<EOF
  > from mercurial import registrar
  > 
  > filesetpredicate = registrar.filesetpredicate()
  > @filesetpredicate('existingcaller()', callexisting=False)
  > def existingcaller(mctx, x):
  >     # this 'mctx.existing()' invocation is unintentional
  >     return [f for f in mctx.existing()]
  > EOF

  $ cat >> .hg/hgrc <<EOF
  > [extensions]
  > existingcaller = $TESTTMP/existingcaller.py
  > EOF

  $ fileset 'existingcaller()' 2>&1 | tail -1
  AssertionError: unexpected existing() invocation