changelog: specify checkambig=True to revlog.__init__, to avoid ambiguity
If steps below occurs at "the same time in sec", all of mtime, ctime
and size are same between (1) and (3).
1. append data to 00changelog.i (and close transaction)
2. discard appended data by truncation (strip or rollback)
3. append same size but different data to 00changelog.i again
Therefore, cache validation doesn't work after (3) as expected.
To avoid such file stat ambiguity around truncation, this patch
specifies checkambig=True to revlog.__init__(). This makes revlog
write changes out with checkambig=True.
Even though changes of 00changelog.i themselves are written out at
changelog._finalize(), this checkambig=True is needed, because
revlog.checkinlinesize(), which is invoked at the end of
changelog._finalize(), might replace already changed 00changelog.i by
converted one.
Even after this patch, avoiding file stat ambiguity of 00changelog.i
around truncation isn't yet completed, because truncation side isn't
aware of this issue.
This is a part of ExactCacheValidationPlan.
https://www.mercurial-scm.org/wiki/ExactCacheValidationPlan
# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import array
import difflib
import re
import struct
from . import policy
policynocffi = policy.policynocffi
modulepolicy = policy.policy
def splitnewlines(text):
'''like str.splitlines, but only split on newlines.'''
lines = [l + '\n' for l in text.split('\n')]
if lines:
if lines[-1] == '\n':
lines.pop()
else:
lines[-1] = lines[-1][:-1]
return lines
def _normalizeblocks(a, b, blocks):
prev = None
r = []
for curr in blocks:
if prev is None:
prev = curr
continue
shift = 0
a1, b1, l1 = prev
a1end = a1 + l1
b1end = b1 + l1
a2, b2, l2 = curr
a2end = a2 + l2
b2end = b2 + l2
if a1end == a2:
while (a1end + shift < a2end and
a[a1end + shift] == b[b1end + shift]):
shift += 1
elif b1end == b2:
while (b1end + shift < b2end and
a[a1end + shift] == b[b1end + shift]):
shift += 1
r.append((a1, b1, l1 + shift))
prev = a2 + shift, b2 + shift, l2 - shift
r.append(prev)
return r
def _tostring(c):
if type(c) is array.array:
# this copy overhead isn't ideal
return c.tostring()
return str(c)
def bdiff(a, b):
a = _tostring(a).splitlines(True)
b = _tostring(b).splitlines(True)
if not a:
s = "".join(b)
return s and (struct.pack(">lll", 0, 0, len(s)) + s)
bin = []
p = [0]
for i in a: p.append(p[-1] + len(i))
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
d = _normalizeblocks(a, b, d)
la = 0
lb = 0
for am, bm, size in d:
s = "".join(b[lb:bm])
if am > la or s:
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
la = am + size
lb = bm + size
return "".join(bin)
def blocks(a, b):
an = splitnewlines(a)
bn = splitnewlines(b)
d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
d = _normalizeblocks(an, bn, d)
return [(i, i + n, j, j + n) for (i, j, n) in d]
def fixws(text, allws):
if allws:
text = re.sub('[ \t\r]+', '', text)
else:
text = re.sub('[ \t\r]+', ' ', text)
text = text.replace(' \n', '\n')
return text
if modulepolicy not in policynocffi:
try:
from _bdiff_cffi import ffi, lib
except ImportError:
if modulepolicy == 'cffi': # strict cffi import
raise
else:
def blocks(sa, sb):
a = ffi.new("struct bdiff_line**")
b = ffi.new("struct bdiff_line**")
ac = ffi.new("char[]", sa)
bc = ffi.new("char[]", sb)
l = ffi.new("struct bdiff_hunk*")
try:
an = lib.bdiff_splitlines(ac, len(sa), a)
bn = lib.bdiff_splitlines(bc, len(sb), b)
if not a[0] or not b[0]:
raise MemoryError
count = lib.bdiff_diff(a[0], an, b[0], bn, l)
if count < 0:
raise MemoryError
rl = [None] * count
h = l.next
i = 0
while h:
rl[i] = (h.a1, h.a2, h.b1, h.b2)
h = h.next
i += 1
finally:
lib.free(a[0])
lib.free(b[0])
lib.bdiff_freehunks(l.next)
return rl
def bdiff(sa, sb):
a = ffi.new("struct bdiff_line**")
b = ffi.new("struct bdiff_line**")
ac = ffi.new("char[]", sa)
bc = ffi.new("char[]", sb)
l = ffi.new("struct bdiff_hunk*")
try:
an = lib.bdiff_splitlines(ac, len(sa), a)
bn = lib.bdiff_splitlines(bc, len(sb), b)
if not a[0] or not b[0]:
raise MemoryError
count = lib.bdiff_diff(a[0], an, b[0], bn, l)
if count < 0:
raise MemoryError
rl = []
h = l.next
la = lb = 0
while h:
if h.a1 != la or h.b1 != lb:
lgt = (b[0] + h.b1).l - (b[0] + lb).l
rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
(a[0] + h.a1).l - a[0].l, lgt))
rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
la = h.a2
lb = h.b2
h = h.next
finally:
lib.free(a[0])
lib.free(b[0])
lib.bdiff_freehunks(l.next)
return "".join(rl)