Mercurial > hg
annotate tests/test-pathencode.py @ 30561:7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
It seems quite common that files don't change completely. New lines are often
pretty much appended, and modifications will often only change a small section
of the file which on average will be in the middle.
There can thus be a big win by pruning a common prefix before starting the more
expensive search for longest common substrings.
Worst case, it will scan through a long sequence of similar bytes without
encountering a newline. Splitlines will then have to do the same again ...
twice for each side. If similar lines are found, splitlines will save the
double iteration and hashing of the lines ... plus there will be less lines to
find common substrings in.
This change might in some cases make the algorith pick shorter or less optimal
common substrings. We can't have the cake and eat it.
This make hg --time bundle --base null -r 4.0 go from 14.5 to 15 s - a 3%
increase.
On mozilla-unified:
perfbdiff -m 3041e4d59df2
! wall 0.053088 comb 0.060000 user 0.060000 sys 0.000000 (best of 100) to
! wall 0.024618 comb 0.020000 user 0.020000 sys 0.000000 (best of 116)
perfbdiff 0e9928989e9c --alldata --count 10
! wall 0.702075 comb 0.700000 user 0.700000 sys 0.000000 (best of 15) to
! wall 0.579235 comb 0.580000 user 0.580000 sys 0.000000 (best of 18)
author | Mads Kiilerich <madski@unity3d.com> |
---|---|
date | Wed, 16 Nov 2016 19:45:35 +0100 |
parents | 59481bfdb7f3 |
children | 0f200e2310ca |
rev | line source |
---|---|
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
1 # This is a randomized test that generates different pathnames every |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
2 # time it is invoked, and tests the encoding of those pathnames. |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
3 # |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
4 # It uses a simple probabilistic model to generate valid pathnames |
26098 | 5 # that have proven likely to expose bugs and divergent behavior in |
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
6 # different encoding implementations. |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
7 |
28928
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
8 from __future__ import absolute_import, print_function |
28918
72f683260f31
tests: make test-pathencode use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
26849
diff
changeset
|
9 |
28928
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
10 import binascii |
17935
9c888b945b65
test-pathencode: make a 2.4-safe import of collections
Bryan O'Sullivan <bryano@fb.com>
parents:
17934
diff
changeset
|
11 import collections |
28928
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
12 import itertools |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
13 import math |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
14 import os |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
15 import random |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
16 import sys |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
17 import time |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
18 from mercurial import ( |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
19 store, |
59481bfdb7f3
tests: make test-pathencode use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28918
diff
changeset
|
20 ) |
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
21 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
22 validchars = set(map(chr, range(0, 256))) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
23 alphanum = range(ord('A'), ord('Z')) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
24 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
25 for c in '\0/': |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
26 validchars.remove(c) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
27 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
28 winreserved = ('aux con prn nul'.split() + |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
29 ['com%d' % i for i in xrange(1, 10)] + |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
30 ['lpt%d' % i for i in xrange(1, 10)]) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
31 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
32 def casecombinations(names): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
33 '''Build all case-diddled combinations of names.''' |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
34 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
35 combos = set() |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
36 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
37 for r in names: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
38 for i in xrange(len(r) + 1): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
39 for c in itertools.combinations(xrange(len(r)), i): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
40 d = r |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
41 for j in c: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
42 d = ''.join((d[:j], d[j].upper(), d[j + 1:])) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
43 combos.add(d) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
44 return sorted(combos) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
45 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
46 def buildprobtable(fp, cmd='hg manifest tip'): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
47 '''Construct and print a table of probabilities for path name |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
48 components. The numbers are percentages.''' |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
49 |
17935
9c888b945b65
test-pathencode: make a 2.4-safe import of collections
Bryan O'Sullivan <bryano@fb.com>
parents:
17934
diff
changeset
|
50 counts = collections.defaultdict(lambda: 0) |
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
51 for line in os.popen(cmd).read().splitlines(): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
52 if line[-2:] in ('.i', '.d'): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
53 line = line[:-2] |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
54 if line.startswith('data/'): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
55 line = line[5:] |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
56 for c in line: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
57 counts[c] += 1 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
58 for c in '\r/\n': |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
59 counts.pop(c, None) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
60 t = sum(counts.itervalues()) / 100.0 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
61 fp.write('probtable = (') |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
62 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1], |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
63 reverse=True)): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
64 if (i % 5) == 0: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
65 fp.write('\n ') |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
66 vt = v / t |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
67 if vt < 0.0005: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
68 break |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
69 fp.write('(%r, %.03f), ' % (k, vt)) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
70 fp.write('\n )\n') |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
71 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
72 # A table of character frequencies (as percentages), gleaned by |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
73 # looking at filelog names from a real-world, very large repo. |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
74 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
75 probtable = ( |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
76 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
77 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
78 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
79 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
80 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
81 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
82 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
83 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
84 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
85 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
86 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
87 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
88 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
89 ) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
90 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
91 for c, _ in probtable: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
92 validchars.remove(c) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
93 validchars = list(validchars) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
94 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
95 def pickfrom(rng, table): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
96 c = 0 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
97 r = rng.random() * sum(i[1] for i in table) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
98 for i, p in table: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
99 c += p |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
100 if c >= r: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
101 return i |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
102 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
103 reservedcombos = casecombinations(winreserved) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
104 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
105 # The first component of a name following a slash. |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
106 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
107 firsttable = ( |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
108 (lambda rng: pickfrom(rng, probtable), 90), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
109 (lambda rng: rng.choice(validchars), 5), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
110 (lambda rng: rng.choice(reservedcombos), 5), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
111 ) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
112 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
113 # Components of a name following the first. |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
114 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
115 resttable = firsttable[:-1] |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
116 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
117 # Special suffixes. |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
118 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
119 internalsuffixcombos = casecombinations('.hg .i .d'.split()) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
120 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
121 # The last component of a path, before a slash or at the end of a name. |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
122 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
123 lasttable = resttable + ( |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
124 (lambda rng: '', 95), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
125 (lambda rng: rng.choice(internalsuffixcombos), 5), |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
126 ) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
127 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
128 def makepart(rng, k): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
129 '''Construct a part of a pathname, without slashes.''' |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
130 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
131 p = pickfrom(rng, firsttable)(rng) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
132 l = len(p) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
133 ps = [p] |
19319
ec17ddecdf64
test-pathencode: randomize length of each path component
Siddharth Agarwal <sid0@fb.com>
parents:
19318
diff
changeset
|
134 maxl = rng.randint(1, k) |
ec17ddecdf64
test-pathencode: randomize length of each path component
Siddharth Agarwal <sid0@fb.com>
parents:
19318
diff
changeset
|
135 while l < maxl: |
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
136 p = pickfrom(rng, resttable)(rng) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
137 l += len(p) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
138 ps.append(p) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
139 ps.append(pickfrom(rng, lasttable)(rng)) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
140 return ''.join(ps) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
141 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
142 def makepath(rng, j, k): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
143 '''Construct a complete pathname.''' |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
144 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
145 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) + |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
146 rng.choice(['.d', '.i'])) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
147 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
148 def genpath(rng, count): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
149 '''Generate random pathnames with gradually increasing lengths.''' |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
150 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
151 mink, maxk = 1, 4096 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
152 def steps(): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
153 for i in xrange(count): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
154 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
155 for k in steps(): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
156 x = rng.randint(1, k) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
157 y = rng.randint(1, k) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
158 yield makepath(rng, x, y) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
159 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
160 def runtests(rng, seed, count): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
161 nerrs = 0 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
162 for p in genpath(rng, count): |
18435
8c019d2fd7c0
store: switch to C-based hashed path encoding
Bryan O'Sullivan <bryano@fb.com>
parents:
18110
diff
changeset
|
163 h = store._pathencode(p) # uses C implementation, if available |
18094
8ceabb34f1cb
test-pathencode: compare current pathencoding implementations
Adrian Buehlmann <adrian@cadifra.com>
parents:
17947
diff
changeset
|
164 r = store._hybridencode(p, True) # reference implementation in Python |
8ceabb34f1cb
test-pathencode: compare current pathencoding implementations
Adrian Buehlmann <adrian@cadifra.com>
parents:
17947
diff
changeset
|
165 if h != r: |
8ceabb34f1cb
test-pathencode: compare current pathencoding implementations
Adrian Buehlmann <adrian@cadifra.com>
parents:
17947
diff
changeset
|
166 if nerrs == 0: |
28918
72f683260f31
tests: make test-pathencode use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
26849
diff
changeset
|
167 print('seed:', hex(seed)[:-1], file=sys.stderr) |
72f683260f31
tests: make test-pathencode use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
26849
diff
changeset
|
168 print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr) |
72f683260f31
tests: make test-pathencode use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
26849
diff
changeset
|
169 print("h: '%s'" % h.encode("string_escape"), file=sys.stderr) |
72f683260f31
tests: make test-pathencode use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
26849
diff
changeset
|
170 print("r: '%s'" % r.encode("string_escape"), file=sys.stderr) |
18094
8ceabb34f1cb
test-pathencode: compare current pathencoding implementations
Adrian Buehlmann <adrian@cadifra.com>
parents:
17947
diff
changeset
|
171 nerrs += 1 |
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
172 return nerrs |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
173 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
174 def main(): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
175 import getopt |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
176 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
177 # Empirically observed to take about a second to run |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
178 count = 100 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
179 seed = None |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
180 opts, args = getopt.getopt(sys.argv[1:], 'c:s:', |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
181 ['build', 'count=', 'seed=']) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
182 for o, a in opts: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
183 if o in ('-c', '--count'): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
184 count = int(a) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
185 elif o in ('-s', '--seed'): |
18110
acfc6fab1361
test-pathencode: accept --seed parameter in hex as well
Adrian Buehlmann <adrian@cadifra.com>
parents:
18094
diff
changeset
|
186 seed = long(a, base=0) # accepts base 10 or 16 strings |
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
187 elif o == '--build': |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
188 buildprobtable(sys.stdout, |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
189 'find .hg/store/data -type f && ' |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
190 'cat .hg/store/fncache 2>/dev/null') |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
191 sys.exit(0) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
192 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
193 if seed is None: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
194 try: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
195 seed = long(binascii.hexlify(os.urandom(16)), 16) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
196 except AttributeError: |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
197 seed = long(time.time() * 1000) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
198 |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
199 rng = random.Random(seed) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
200 if runtests(rng, seed, count): |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
201 sys.exit(1) |
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
202 |
17947
f945caa5e963
test-pathencode: more aggressively check for python < 2.6
Bryan O'Sullivan <bryano@fb.com>
parents:
17935
diff
changeset
|
203 if __name__ == '__main__': |
17934
736f1c09f321
tests: add a randomized test for pathencode
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
204 main() |