Make compression more intelligent:
- we don't attempt to compress things under 44 bytes (empirical)
- we check whether larger objects actually compress
- we tag objects to indicate their compression
NUL means uncompressed and starts with NUL
x means gzipped and starts with x (handy)
u means uncompressed, drop the u
#!/usr/bin/env python
import sys, os, sha, base64, re
from mercurial import hg
ui = hg.ui()
repo = hg.repository(ui=ui)
known = {}
def encodepath(path):
s = sha.sha(path).digest()
s = base64.encodestring(s)[:-3]
s = re.sub("\+", "%", s)
s = re.sub("/", "_", s)
return s
for i in range(repo.changelog.count()):
n = repo.changelog.node(i)
changes = repo.changelog.read(n)
for f in changes[3]:
known[f] = 1
try:
os.mkdir(".hg/data-new")
except:
pass
files = known.keys()
files.sort()
for f in files:
pb = ".hg/data/" + encodepath(f)
pn = ".hg/data-new/" + f
print f
try:
file(pn+".i", "w").write(file(pb+"i").read())
except:
os.makedirs(os.path.dirname(pn))
# we actually copy the files to get nice disk layout
file(pn+".i", "w").write(file(pb+"i").read())
file(pn+".d", "w").write(file(pb).read())
os.rename(".hg/data", ".hg/data-old")
os.rename(".hg/data-new", ".hg/data")