--- a/tests/test-pathencode.py Sat Oct 05 10:29:34 2019 -0400
+++ b/tests/test-pathencode.py Sun Oct 06 09:45:02 2019 -0400
@@ -31,9 +31,12 @@
for c in (b'\0', b'/'):
validchars.remove(c)
-winreserved = (b'aux con prn nul'.split() +
- [b'com%d' % i for i in xrange(1, 10)] +
- [b'lpt%d' % i for i in xrange(1, 10)])
+winreserved = (
+ b'aux con prn nul'.split()
+ + [b'com%d' % i for i in xrange(1, 10)]
+ + [b'lpt%d' % i for i in xrange(1, 10)]
+)
+
def casecombinations(names):
'''Build all case-diddled combinations of names.'''
@@ -45,10 +48,11 @@
for c in itertools.combinations(xrange(len(r)), i):
d = r
for j in c:
- d = b''.join((d[:j], d[j:j + 1].upper(), d[j + 1:]))
+ d = b''.join((d[:j], d[j : j + 1].upper(), d[j + 1 :]))
combos.add(d)
return sorted(combos)
+
def buildprobtable(fp, cmd='hg manifest tip'):
'''Construct and print a table of probabilities for path name
components. The numbers are percentages.'''
@@ -65,8 +69,9 @@
counts.pop(c, None)
t = sum(counts.itervalues()) / 100.0
fp.write('probtable = (')
- for i, (k, v) in enumerate(sorted(counts.items(), key=lambda x: x[1],
- reverse=True)):
+ for i, (k, v) in enumerate(
+ sorted(counts.items(), key=lambda x: x[1], reverse=True)
+ ):
if (i % 5) == 0:
fp.write('\n ')
vt = v / t
@@ -75,29 +80,83 @@
fp.write('(%r, %.03f), ' % (k, vt))
fp.write('\n )\n')
+
# A table of character frequencies (as percentages), gleaned by
# looking at filelog names from a real-world, very large repo.
probtable = (
- (b't', 9.828), (b'e', 9.042), (b's', 8.011), (b'a', 6.801), (b'i', 6.618),
- (b'g', 5.053), (b'r', 5.030), (b'o', 4.887), (b'p', 4.363), (b'n', 4.258),
- (b'l', 3.830), (b'h', 3.693), (b'_', 3.659), (b'.', 3.377), (b'm', 3.194),
- (b'u', 2.364), (b'd', 2.296), (b'c', 2.163), (b'b', 1.739), (b'f', 1.625),
- (b'6', 0.666), (b'j', 0.610), (b'y', 0.554), (b'x', 0.487), (b'w', 0.477),
- (b'k', 0.476), (b'v', 0.473), (b'3', 0.336), (b'1', 0.335), (b'2', 0.326),
- (b'4', 0.310), (b'5', 0.305), (b'9', 0.302), (b'8', 0.300), (b'7', 0.299),
- (b'q', 0.298), (b'0', 0.250), (b'z', 0.223), (b'-', 0.118), (b'C', 0.095),
- (b'T', 0.087), (b'F', 0.085), (b'B', 0.077), (b'S', 0.076), (b'P', 0.076),
- (b'L', 0.059), (b'A', 0.058), (b'N', 0.051), (b'D', 0.049), (b'M', 0.046),
- (b'E', 0.039), (b'I', 0.035), (b'R', 0.035), (b'G', 0.028), (b'U', 0.026),
- (b'W', 0.025), (b'O', 0.017), (b'V', 0.015), (b'H', 0.013), (b'Q', 0.011),
- (b'J', 0.007), (b'K', 0.005), (b'+', 0.004), (b'X', 0.003), (b'Y', 0.001),
- )
+ (b't', 9.828),
+ (b'e', 9.042),
+ (b's', 8.011),
+ (b'a', 6.801),
+ (b'i', 6.618),
+ (b'g', 5.053),
+ (b'r', 5.030),
+ (b'o', 4.887),
+ (b'p', 4.363),
+ (b'n', 4.258),
+ (b'l', 3.830),
+ (b'h', 3.693),
+ (b'_', 3.659),
+ (b'.', 3.377),
+ (b'm', 3.194),
+ (b'u', 2.364),
+ (b'd', 2.296),
+ (b'c', 2.163),
+ (b'b', 1.739),
+ (b'f', 1.625),
+ (b'6', 0.666),
+ (b'j', 0.610),
+ (b'y', 0.554),
+ (b'x', 0.487),
+ (b'w', 0.477),
+ (b'k', 0.476),
+ (b'v', 0.473),
+ (b'3', 0.336),
+ (b'1', 0.335),
+ (b'2', 0.326),
+ (b'4', 0.310),
+ (b'5', 0.305),
+ (b'9', 0.302),
+ (b'8', 0.300),
+ (b'7', 0.299),
+ (b'q', 0.298),
+ (b'0', 0.250),
+ (b'z', 0.223),
+ (b'-', 0.118),
+ (b'C', 0.095),
+ (b'T', 0.087),
+ (b'F', 0.085),
+ (b'B', 0.077),
+ (b'S', 0.076),
+ (b'P', 0.076),
+ (b'L', 0.059),
+ (b'A', 0.058),
+ (b'N', 0.051),
+ (b'D', 0.049),
+ (b'M', 0.046),
+ (b'E', 0.039),
+ (b'I', 0.035),
+ (b'R', 0.035),
+ (b'G', 0.028),
+ (b'U', 0.026),
+ (b'W', 0.025),
+ (b'O', 0.017),
+ (b'V', 0.015),
+ (b'H', 0.013),
+ (b'Q', 0.011),
+ (b'J', 0.007),
+ (b'K', 0.005),
+ (b'+', 0.004),
+ (b'X', 0.003),
+ (b'Y', 0.001),
+)
for c, _ in probtable:
validchars.remove(c)
validchars = list(validchars)
+
def pickfrom(rng, table):
c = 0
r = rng.random() * sum(i[1] for i in table)
@@ -106,6 +165,7 @@
if c >= r:
return i
+
reservedcombos = casecombinations(winreserved)
# The first component of a name following a slash.
@@ -114,7 +174,7 @@
(lambda rng: pickfrom(rng, probtable), 90),
(lambda rng: rng.choice(validchars), 5),
(lambda rng: rng.choice(reservedcombos), 5),
- )
+)
# Components of a name following the first.
@@ -129,7 +189,8 @@
lasttable = resttable + (
(lambda rng: b'', 95),
(lambda rng: rng.choice(internalsuffixcombos), 5),
- )
+)
+
def makepart(rng, k):
'''Construct a part of a pathname, without slashes.'''
@@ -145,29 +206,37 @@
ps.append(pickfrom(rng, lasttable)(rng))
return b''.join(ps)
+
def makepath(rng, j, k):
'''Construct a complete pathname.'''
- return (b'data/' + b'/'.join(makepart(rng, k) for _ in xrange(j)) +
- rng.choice([b'.d', b'.i']))
+ return (
+ b'data/'
+ + b'/'.join(makepart(rng, k) for _ in xrange(j))
+ + rng.choice([b'.d', b'.i'])
+ )
+
def genpath(rng, count):
'''Generate random pathnames with gradually increasing lengths.'''
mink, maxk = 1, 4096
+
def steps():
for i in xrange(count):
yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
+
for k in steps():
x = rng.randint(1, k)
y = rng.randint(1, k)
yield makepath(rng, x, y)
+
def runtests(rng, seed, count):
nerrs = 0
for p in genpath(rng, count):
- h = store._pathencode(p) # uses C implementation, if available
- r = store._hybridencode(p, True) # reference implementation in Python
+ h = store._pathencode(p) # uses C implementation, if available
+ r = store._hybridencode(p, True) # reference implementation in Python
if h != r:
if nerrs == 0:
print('seed:', hex(seed)[:-1], file=sys.stderr)
@@ -177,23 +246,27 @@
nerrs += 1
return nerrs
+
def main():
import getopt
# Empirically observed to take about a second to run
count = 100
seed = None
- opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
- ['build', 'count=', 'seed='])
+ opts, args = getopt.getopt(
+ sys.argv[1:], 'c:s:', ['build', 'count=', 'seed=']
+ )
for o, a in opts:
if o in ('-c', '--count'):
count = int(a)
elif o in ('-s', '--seed'):
- seed = int(a, base=0) # accepts base 10 or 16 strings
+ seed = int(a, base=0) # accepts base 10 or 16 strings
elif o == '--build':
- buildprobtable(sys.stdout,
- 'find .hg/store/data -type f && '
- 'cat .hg/store/fncache 2>/dev/null')
+ buildprobtable(
+ sys.stdout,
+ 'find .hg/store/data -type f && '
+ 'cat .hg/store/fncache 2>/dev/null',
+ )
sys.exit(0)
if seed is None:
@@ -206,5 +279,6 @@
if runtests(rng, seed, count):
sys.exit(1)
+
if __name__ == '__main__':
main()