contrib/bdiff-torture.py
author Simon Sapin <simon.sapin@octobus.net>
Mon, 13 Sep 2021 18:48:48 +0200
changeset 47965 f9e6f2bb721d
parent 43076 2372284d9457
child 48875 6000f5b25c9b
permissions -rw-r--r--
rhg: Don’t compare ambiguous files one byte at a time Even though the use of `BufReader` reduces the number of syscalls to read the file from disk, `.bytes()` yields a separate `Result` for every byte. Creating those results and dispatching on them is most likely costly. Instead, this commit opts for simplicity by reading the entire file into memory and comparing a single pair of byte strings. Note that memory already needs to contain the entire previous contents of the file, as read from the filelog. So with an extremely large file this doubles memory use but does not make it grow by orders of magnitude. At first I wrote code that still avoids reading the entire file into memory and compares one buffer at a time with `BufReader`. Find this code below for posterity. However its correctness is subtle. I ended up preferring the simplicity of the obviously-correct single comparison. ```rust let mut reader = BufReader::new(fobj); let mut expected = &contents_in_p1[..]; loop { let buf = reader.fill_buf().when_reading_file(&fs_path)?; if buf.is_empty() { // Found EOF return Ok(expected.is_empty()); } else if let Some(rest) = expected.drop_prefix(buf) { // What we read so far matches the expected content, continue reading let buf_len = buf.len(); reader.consume(buf_len); expected = rest } else { // Found different content return Ok(false); } } ``` Differential Revision: https://phab.mercurial-scm.org/D11412
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     1
# Randomized torture test generation for bdiff
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     2
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     3
from __future__ import absolute_import, print_function
29209
eccfd6500636 py3: make contrib/bdiff-torture.py conform to our import style
Yuya Nishihara <yuya@tcha.org>
parents: 29012
diff changeset
     4
import random
eccfd6500636 py3: make contrib/bdiff-torture.py conform to our import style
Yuya Nishihara <yuya@tcha.org>
parents: 29012
diff changeset
     5
import sys
eccfd6500636 py3: make contrib/bdiff-torture.py conform to our import style
Yuya Nishihara <yuya@tcha.org>
parents: 29012
diff changeset
     6
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     7
from mercurial import (
32202
ded48ad55146 bdiff: proxy through mdiff module
Yuya Nishihara <yuya@tcha.org>
parents: 29209
diff changeset
     8
    mdiff,
43009
e05c141511dd contrib: use pycompat.xrange in bdiff-torture.py
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42857
diff changeset
     9
    pycompat,
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    10
)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    11
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    12
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    13
def reducetest(a, b):
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    14
    tries = 0
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    15
    reductions = 0
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    16
    print("reducing...")
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    17
    while tries < 1000:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    18
        a2 = (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    19
            "\n".join(l for l in a.splitlines() if random.randint(0, 100) > 0)
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    20
            + "\n"
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    21
        )
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    22
        b2 = (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    23
            "\n".join(l for l in b.splitlines() if random.randint(0, 100) > 0)
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    24
            + "\n"
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    25
        )
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    26
        if a2 == a and b2 == b:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    27
            continue
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    28
        if a2 == b2:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    29
            continue
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    30
        tries += 1
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    31
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    32
        try:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    33
            test1(a, b)
41365
876494fd967d cleanup: delete lots of unused local variables
Martin von Zweigbergk <martinvonz@google.com>
parents: 32203
diff changeset
    34
        except Exception:
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    35
            reductions += 1
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    36
            tries = 0
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    37
            a = a2
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    38
            b = b2
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    39
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    40
    print("reduced:", reductions, len(a) + len(b), repr(a), repr(b))
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    41
    try:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    42
        test1(a, b)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    43
    except Exception as inst:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    44
        print("failed:", inst)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    45
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    46
    sys.exit(0)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    47
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    48
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    49
def test1(a, b):
32202
ded48ad55146 bdiff: proxy through mdiff module
Yuya Nishihara <yuya@tcha.org>
parents: 29209
diff changeset
    50
    d = mdiff.textdiff(a, b)
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    51
    if not d:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    52
        raise ValueError("empty")
32203
0c73634d0570 mpatch: proxy through mdiff module
Yuya Nishihara <yuya@tcha.org>
parents: 32202
diff changeset
    53
    c = mdiff.patches(a, [d])
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    54
    if c != b:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    55
        raise ValueError("bad")
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    56
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    57
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    58
def testwrap(a, b):
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    59
    try:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    60
        test1(a, b)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    61
        return
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    62
    except Exception as inst:
42857
3316e59b0105 bdiff-torture: fix pyflakes warning reporting undefined name 'inst'
Pulkit Goyal <pulkit@yandex-team.ru>
parents: 41365
diff changeset
    63
        print("exception:", inst)
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    64
    reducetest(a, b)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    65
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    66
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    67
def test(a, b):
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    68
    testwrap(a, b)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    69
    testwrap(b, a)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    70
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    71
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    72
def rndtest(size, noise):
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    73
    a = []
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    74
    src = "                aaaaaaaabbbbccd"
43009
e05c141511dd contrib: use pycompat.xrange in bdiff-torture.py
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42857
diff changeset
    75
    for x in pycompat.xrange(size):
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    76
        a.append(src[random.randint(0, len(src) - 1)])
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    77
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    78
    while True:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    79
        b = [c for c in a if random.randint(0, 99) > noise]
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    80
        b2 = []
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    81
        for c in b:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    82
            b2.append(c)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    83
            while random.randint(0, 99) < noise:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    84
                b2.append(src[random.randint(0, len(src) - 1)])
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    85
        if b2 != a:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    86
            break
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    87
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    88
    a = "\n".join(a) + "\n"
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    89
    b = "\n".join(b2) + "\n"
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    90
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    91
    test(a, b)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    92
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 43009
diff changeset
    93
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    94
maxvol = 10000
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    95
startsize = 2
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    96
while True:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    97
    size = startsize
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    98
    count = 0
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    99
    while size < maxvol:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   100
        print(size)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   101
        volume = 0
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   102
        while volume < maxvol:
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   103
            rndtest(size, 2)
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   104
            volume += size
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   105
            count += 2
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   106
        size *= 2
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   107
    maxvol *= 4
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   108
    startsize *= 4