Mercurial > hg
view mercurial/pure/bdiff.py @ 39814:d059cb669632
wireprotov2: allow multiple fields to follow revision maps
The *data wire protocol commands emit a series of CBOR values.
Because revision/delta data may be large, their data is emitted
outside the map as a top-level bytestring value.
Before this commit, we'd emit a single optional bytestring
value after the revision descriptor map. This got the job done.
But it was limiting in that we could only send a single field.
And, it required the consumer to know that the presence of a
key in the map implied the existence of a following bytestring
value.
This commit changes the encoding strategy so top-level bytestring
values in the stream are explicitly denoted in a "fieldsfollowing"
key. This key contains an array defining what fields that follow
and the expected size of each field.
By defining things this way, we can easily send N bytestring
values without any ambiguity about their order. In addition,
clients only need to know how to parse ``fieldsfollowing`` to
know if extra values are present.
Because this breaks backwards compatibility, we've bumped the version
number of the wire protocol version 2 API endpoint.
Differential Revision: https://phab.mercurial-scm.org/D4620
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 20 Sep 2018 12:57:23 -0700 |
parents | 29dd37a418aa |
children | 8b7973d40a01 |
line wrap: on
line source
# bdiff.py - Python implementation of bdiff.c # # Copyright 2009 Matt Mackall <mpm@selenic.com> and others # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import difflib import re import struct def splitnewlines(text): '''like str.splitlines, but only split on newlines.''' lines = [l + '\n' for l in text.split('\n')] if lines: if lines[-1] == '\n': lines.pop() else: lines[-1] = lines[-1][:-1] return lines def _normalizeblocks(a, b, blocks): prev = None r = [] for curr in blocks: if prev is None: prev = curr continue shift = 0 a1, b1, l1 = prev a1end = a1 + l1 b1end = b1 + l1 a2, b2, l2 = curr a2end = a2 + l2 b2end = b2 + l2 if a1end == a2: while (a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]): shift += 1 elif b1end == b2: while (b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]): shift += 1 r.append((a1, b1, l1 + shift)) prev = a2 + shift, b2 + shift, l2 - shift r.append(prev) return r def bdiff(a, b): a = bytes(a).splitlines(True) b = bytes(b).splitlines(True) if not a: s = "".join(b) return s and (struct.pack(">lll", 0, 0, len(s)) + s) bin = [] p = [0] for i in a: p.append(p[-1] + len(i)) d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() d = _normalizeblocks(a, b, d) la = 0 lb = 0 for am, bm, size in d: s = "".join(b[lb:bm]) if am > la or s: bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) la = am + size lb = bm + size return "".join(bin) def blocks(a, b): an = splitnewlines(a) bn = splitnewlines(b) d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() d = _normalizeblocks(an, bn, d) return [(i, i + n, j, j + n) for (i, j, n) in d] def fixws(text, allws): if allws: text = re.sub('[ \t\r]+', '', text) else: text = re.sub('[ \t\r]+', ' ', text) text = text.replace(' \n', '\n') return text def splitnewlines(text): '''like str.splitlines, but only split on newlines.''' lines = [l + '\n' for l in text.split('\n')] if lines: if lines[-1] == '\n': lines.pop() else: lines[-1] = lines[-1][:-1] return lines