Mercurial > hg
comparison hgext/censor.py @ 24347:1bcfecbbf569
censor: add censor command to hgext with basic client-side tests
The censor command is a core extension which can replace the contents of a
historical file revision with a censor "tombstone" which can be exchanged
with older clients in place of the real revision data. The command rewrites
the filelog by copying revision-by-revision.
Care must be taken to expand the fulltext of the children of the censored
revision before copying them to the new filelog; they might be stored as
deltas against the uncensored revision, and those deltas will be invalidated.
For more background on the censorship feature design, see:
http://mercurial.selenic.com/wiki/CensorPlan
author | Mike Edgar <adgar@google.com> |
---|---|
date | Sun, 15 Mar 2015 21:52:17 -0400 |
parents | |
children | 5e111acc1170 |
comparison
equal
deleted
inserted
replaced
24346:31edcea517c1 | 24347:1bcfecbbf569 |
---|---|
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com> | |
2 # | |
3 # This extension enables removal of file content at a given revision, | |
4 # rewriting the data/metadata of successive revisions to preserve revision log | |
5 # integrity. | |
6 | |
7 """erase file content at a given revision | |
8 | |
9 The censor command instructs Mercurial to erase all content of a file at a given | |
10 revision *without updating the changeset hash.* This allows existing history to | |
11 remain valid while preventing future clones/pulls from receiving the erased | |
12 data. | |
13 | |
14 Typical uses for censor are due to security or legal requirements, including:: | |
15 | |
16 * Passwords, private keys, crytographic material | |
17 * Licensed data/code/libraries for which the license has expired | |
18 * Personally Identifiable Information or other private data | |
19 | |
20 Censored file revisions are listed in a tracked file called .hgcensored stored | |
21 in the repository root. The censor command adds an entry to the .hgcensored file | |
22 in the working directory and commits it (much like ``hg tag`` and .hgtags). The | |
23 censored file data is then replaced with a pointer to the new commit, enabling | |
24 verification. | |
25 | |
26 Censored nodes can interrupt mercurial's typical operation whenever the excised | |
27 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``, | |
28 simply fail when asked to produce censored data. Others, like ``hg verify`` and | |
29 ``hg update``, must be capable of tolerating censored data to continue to | |
30 function in a meaningful way. Such commands only tolerate censored file | |
31 revisions if they are allowed by the policy specified by the "censor.allow" | |
32 config option. | |
33 """ | |
34 | |
35 from mercurial.node import short | |
36 from mercurial import cmdutil, error, filelog, revlog, scmutil, util | |
37 from mercurial.i18n import _ | |
38 | |
39 cmdtable = {} | |
40 command = cmdutil.command(cmdtable) | |
41 testedwith = 'internal' | |
42 | |
43 @command('censor', | |
44 [('r', 'rev', '', _('censor file from specified revision'), _('REV')), | |
45 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))], | |
46 _('-r REV [-t TEXT] [FILE]')) | |
47 def censor(ui, repo, path, rev='', tombstone='', **opts): | |
48 if not path: | |
49 raise util.Abort(_('must specify file path to censor')) | |
50 if not rev: | |
51 raise util.Abort(_('must specify revision to censor')) | |
52 | |
53 flog = repo.file(path) | |
54 if not len(flog): | |
55 raise util.Abort(_('cannot censor file with no history')) | |
56 | |
57 rev = scmutil.revsingle(repo, rev, rev).rev() | |
58 try: | |
59 ctx = repo[rev] | |
60 except KeyError: | |
61 raise util.Abort(_('invalid revision identifier %s') % rev) | |
62 | |
63 try: | |
64 fctx = ctx.filectx(path) | |
65 except error.LookupError: | |
66 raise util.Abort(_('file does not exist at revision %s') % rev) | |
67 | |
68 fnode = fctx.filenode() | |
69 headctxs = [repo[c] for c in repo.heads()] | |
70 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode] | |
71 if heads: | |
72 headlist = ', '.join([short(c.node()) for c in heads]) | |
73 raise util.Abort(_('cannot censor file in heads (%s)') % headlist, | |
74 hint=_('clean/delete and commit first')) | |
75 | |
76 wctx = repo[None] | |
77 wp = wctx.parents() | |
78 if ctx.node() in [p.node() for p in wp]: | |
79 raise util.Abort(_('cannot censor working directory'), | |
80 hint=_('clean/delete/update first')) | |
81 | |
82 flogv = flog.version & 0xFFFF | |
83 if flogv != revlog.REVLOGNG: | |
84 raise util.Abort( | |
85 _('censor does not support revlog version %d') % (flogv,)) | |
86 | |
87 tombstone = filelog.packmeta({"censored": tombstone}, "") | |
88 | |
89 crev = fctx.filerev() | |
90 | |
91 if len(tombstone) > flog.rawsize(crev): | |
92 raise util.Abort(_( | |
93 'censor tombstone must be no longer than censored data')) | |
94 | |
95 # Using two files instead of one makes it easy to rewrite entry-by-entry | |
96 idxread = repo.svfs(flog.indexfile, 'r') | |
97 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True) | |
98 if flog.version & revlog.REVLOGNGINLINEDATA: | |
99 dataread, datawrite = idxread, idxwrite | |
100 else: | |
101 dataread = repo.svfs(flog.datafile, 'r') | |
102 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True) | |
103 | |
104 # Copy all revlog data up to the entry to be censored. | |
105 rio = revlog.revlogio() | |
106 offset = flog.start(crev) | |
107 | |
108 for chunk in util.filechunkiter(idxread, limit=crev * rio.size): | |
109 idxwrite.write(chunk) | |
110 for chunk in util.filechunkiter(dataread, limit=offset): | |
111 datawrite.write(chunk) | |
112 | |
113 def rewriteindex(r, newoffs, newdata=None): | |
114 """Rewrite the index entry with a new data offset and optional new data. | |
115 | |
116 The newdata argument, if given, is a tuple of three positive integers: | |
117 (new compressed, new uncompressed, added flag bits). | |
118 """ | |
119 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r] | |
120 flags = revlog.gettype(offlags) | |
121 if newdata: | |
122 comp, uncomp, nflags = newdata | |
123 flags |= nflags | |
124 offlags = revlog.offset_type(newoffs, flags) | |
125 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) | |
126 idxwrite.write(rio.packentry(e, None, flog.version, r)) | |
127 idxread.seek(rio.size, 1) | |
128 | |
129 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS): | |
130 """Write the given full text to the filelog with the given data offset. | |
131 | |
132 Returns: | |
133 The integer number of data bytes written, for tracking data offsets. | |
134 """ | |
135 flag, compdata = flog.compress(data) | |
136 newcomp = len(flag) + len(compdata) | |
137 rewriteindex(r, offs, (newcomp, len(data), nflags)) | |
138 datawrite.write(flag) | |
139 datawrite.write(compdata) | |
140 dataread.seek(flog.length(r), 1) | |
141 return newcomp | |
142 | |
143 # Rewrite censored revlog entry with (padded) tombstone data. | |
144 pad = ' ' * (flog.rawsize(crev) - len(tombstone)) | |
145 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED) | |
146 | |
147 # Rewrite all following filelog revisions fixing up offsets and deltas. | |
148 for srev in xrange(crev + 1, len(flog)): | |
149 if crev in flog.parentrevs(srev): | |
150 # Immediate children of censored node must be re-added as fulltext. | |
151 try: | |
152 revdata = flog.revision(srev) | |
153 except error.CensoredNodeError, e: | |
154 revdata = e.tombstone | |
155 dlen = rewrite(srev, offset, revdata) | |
156 else: | |
157 # Copy any other revision data verbatim after fixing up the offset. | |
158 rewriteindex(srev, offset) | |
159 dlen = flog.length(srev) | |
160 for chunk in util.filechunkiter(dataread, limit=dlen): | |
161 datawrite.write(chunk) | |
162 offset += dlen | |
163 | |
164 idxread.close() | |
165 idxwrite.close() | |
166 if dataread is not idxread: | |
167 dataread.close() | |
168 datawrite.close() |