author | Gregory Szorc <gregory.szorc@gmail.com> |
Mon, 24 Sep 2018 14:54:28 -0700 | |
changeset 39880 | 1b65fb4d43d6 |
parent 39879 | d269ddbf54f0 |
child 39881 | d63153611ed5 |
permissions | -rw-r--r-- |
39877
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1 |
# storageutil.py - Storage functionality agnostic of backend implementation. |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
2 |
# |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
3 |
# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com> |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
4 |
# |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
5 |
# This software may be used and distributed according to the terms of the |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
6 |
# GNU General Public License version 2 or any later version. |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
7 |
|
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
8 |
from __future__ import absolute_import |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
9 |
|
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
10 |
import hashlib |
39878
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
11 |
import re |
39877
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
12 |
|
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
13 |
from ..node import ( |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
14 |
nullid, |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
15 |
) |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
16 |
|
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
17 |
_nullhash = hashlib.sha1(nullid) |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
18 |
|
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
19 |
def hashrevisionsha1(text, p1, p2): |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
20 |
"""Compute the SHA-1 for revision data and its parents. |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
21 |
|
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
22 |
This hash combines both the current file contents and its history |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
23 |
in a manner that makes it easy to distinguish nodes with the same |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
24 |
content in the revision graph. |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
25 |
""" |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
26 |
# As of now, if one of the parent node is null, p2 is null |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
27 |
if p2 == nullid: |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
28 |
# deep copy of a hash is faster than creating one |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
29 |
s = _nullhash.copy() |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
30 |
s.update(p1) |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
31 |
else: |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
32 |
# none of the parent nodes are nullid |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
33 |
if p1 < p2: |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
34 |
a = p1 |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
35 |
b = p2 |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
36 |
else: |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
37 |
a = p2 |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
38 |
b = p1 |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
39 |
s = hashlib.sha1(a) |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
40 |
s.update(b) |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
41 |
s.update(text) |
f8eb71f9e3bd
storageutil: new module for storage primitives (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
42 |
return s.digest() |
39878
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
43 |
|
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
44 |
METADATA_RE = re.compile(b'\x01\n') |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
45 |
|
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
46 |
def parsemeta(text): |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
47 |
"""Parse metadata header from revision data. |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
48 |
|
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
49 |
Returns a 2-tuple of (metadata, offset), where both can be None if there |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
50 |
is no metadata. |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
51 |
""" |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
52 |
# text can be buffer, so we can't use .startswith or .index |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
53 |
if text[:2] != b'\x01\n': |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
54 |
return None, None |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
55 |
s = METADATA_RE.search(text, 2).start() |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
56 |
mtext = text[2:s] |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
57 |
meta = {} |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
58 |
for l in mtext.splitlines(): |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
59 |
k, v = l.split(b': ', 1) |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
60 |
meta[k] = v |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
61 |
return meta, s + 2 |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
62 |
|
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
63 |
def packmeta(meta, text): |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
64 |
"""Add metadata to fulltext to produce revision text.""" |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
65 |
keys = sorted(meta) |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
66 |
metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys) |
3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39877
diff
changeset
|
67 |
return b'\x01\n%s\x01\n%s' % (metatext, text) |
39879
d269ddbf54f0
storageutil: move _censoredtext() from revlog
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39878
diff
changeset
|
68 |
|
d269ddbf54f0
storageutil: move _censoredtext() from revlog
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39878
diff
changeset
|
69 |
def iscensoredtext(text): |
d269ddbf54f0
storageutil: move _censoredtext() from revlog
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39878
diff
changeset
|
70 |
meta = parsemeta(text)[0] |
d269ddbf54f0
storageutil: move _censoredtext() from revlog
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39878
diff
changeset
|
71 |
return meta and b'censored' in meta |
39880
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
72 |
|
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
73 |
def filtermetadata(text): |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
74 |
"""Extract just the revision data from source text. |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
75 |
|
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
76 |
Returns ``text`` unless it has a metadata header, in which case we return |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
77 |
a new buffer without hte metadata. |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
78 |
""" |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
79 |
if not text.startswith(b'\x01\n'): |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
80 |
return text |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
81 |
|
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
82 |
offset = text.index(b'\x01\n', 2) |
1b65fb4d43d6
storageutil: new function for extracting metadata-less content from text
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39879
diff
changeset
|
83 |
return text[offset + 2:] |