Mercurial > hg
comparison hgext/git/gitlog.py @ 44477:ad718271a9eb
git: skeleton of a new extension to _directly_ operate on git repos
This is based in part of work I did years ago in hgit, but it's mostly
new code since I'm using pygit2 instead of dulwich and the hg storage
interfaces have improved. Some cleanup of old hgit code by Pulkit,
which I greatly appreciate.
test-git-interop.t does not cover a whole lot of cases, but it
passes. It includes status, diff, making a new commit, and `hg annotate`
working on the git repository.
This is _not_ (yet) production quality code: this is an
experiment. Known technical debt lurking in this implementation:
* Writing bookmarks just totally ignores transactions.
* The way progress is threaded down into the gitstore is awful.
* Ideally we'd find a way to incrementally reindex DAGs. I'm not sure
how to do that efficiently, so we might need a "known only fast-forwards"
mode on the DAG indexer for use on `hg commit` and friends.
* We don't even _try_ to do anything reasonable for `hg pull` or `hg push`.
* Mercurial need an interface for the changelog type.
Tests currently require git 2.24 as far as I'm aware: `git status` has
some changed output that I didn't try and handle in a compatible way.
This patch has produced some interesting cleanups, most recently on
the manifest type. I expect continuing down this road will produce
other meritorious cleanups throughout our code.
Differential Revision: https://phab.mercurial-scm.org/D6734
author | Augie Fackler <augie@google.com> |
---|---|
date | Tue, 11 Feb 2020 00:44:59 -0500 |
parents | |
children | 6d953b3fc2bd |
comparison
equal
deleted
inserted
replaced
44470:a08bbdf839ae | 44477:ad718271a9eb |
---|---|
1 from __future__ import absolute_import | |
2 | |
3 import pygit2 | |
4 | |
5 from mercurial.i18n import _ | |
6 | |
7 from mercurial import ( | |
8 ancestor, | |
9 changelog as hgchangelog, | |
10 dagop, | |
11 encoding, | |
12 error, | |
13 manifest, | |
14 node as nodemod, | |
15 pycompat, | |
16 ) | |
17 from mercurial.interfaces import ( | |
18 repository, | |
19 util as interfaceutil, | |
20 ) | |
21 from mercurial.utils import stringutil | |
22 from . import ( | |
23 gitutil, | |
24 index, | |
25 manifest as gitmanifest, | |
26 ) | |
27 | |
28 | |
29 class baselog(object): # revlog.revlog): | |
30 """Common implementations between changelog and manifestlog.""" | |
31 | |
32 def __init__(self, gr, db): | |
33 self.gitrepo = gr | |
34 self._db = db | |
35 | |
36 def __len__(self): | |
37 return int( | |
38 self._db.execute('SELECT COUNT(*) FROM changelog').fetchone()[0] | |
39 ) | |
40 | |
41 def rev(self, n): | |
42 if n == nodemod.nullid: | |
43 return -1 | |
44 t = self._db.execute( | |
45 'SELECT rev FROM changelog WHERE node = ?', (gitutil.togitnode(n),) | |
46 ).fetchone() | |
47 if t is None: | |
48 raise error.LookupError(n, b'00changelog.i', _(b'no node %d')) | |
49 return t[0] | |
50 | |
51 def node(self, r): | |
52 if r == nodemod.nullrev: | |
53 return nodemod.nullid | |
54 t = self._db.execute( | |
55 'SELECT node FROM changelog WHERE rev = ?', (r,) | |
56 ).fetchone() | |
57 if t is None: | |
58 raise error.LookupError(r, b'00changelog.i', _(b'no node')) | |
59 return nodemod.bin(t[0]) | |
60 | |
61 def hasnode(self, n): | |
62 t = self._db.execute( | |
63 'SELECT node FROM changelog WHERE node = ?', (n,) | |
64 ).fetchone() | |
65 return t is not None | |
66 | |
67 | |
68 class baselogindex(object): | |
69 def __init__(self, log): | |
70 self._log = log | |
71 | |
72 def has_node(self, n): | |
73 return self._log.rev(n) != -1 | |
74 | |
75 def __len__(self): | |
76 return len(self._log) | |
77 | |
78 def __getitem__(self, idx): | |
79 p1rev, p2rev = self._log.parentrevs(idx) | |
80 # TODO: it's messy that the index leaks so far out of the | |
81 # storage layer that we have to implement things like reading | |
82 # this raw tuple, which exposes revlog internals. | |
83 return ( | |
84 # Pretend offset is just the index, since we don't really care. | |
85 idx, | |
86 # Same with lengths | |
87 idx, # length | |
88 idx, # rawsize | |
89 -1, # delta base | |
90 idx, # linkrev TODO is this right? | |
91 p1rev, | |
92 p2rev, | |
93 self._log.node(idx), | |
94 ) | |
95 | |
96 | |
97 # TODO: an interface for the changelog type? | |
98 class changelog(baselog): | |
99 def __contains__(self, rev): | |
100 try: | |
101 self.node(rev) | |
102 return True | |
103 except error.LookupError: | |
104 return False | |
105 | |
106 @property | |
107 def filteredrevs(self): | |
108 # TODO: we should probably add a refs/hg/ namespace for hidden | |
109 # heads etc, but that's an idea for later. | |
110 return set() | |
111 | |
112 @property | |
113 def index(self): | |
114 return baselogindex(self) | |
115 | |
116 @property | |
117 def nodemap(self): | |
118 r = { | |
119 nodemod.bin(v[0]): v[1] | |
120 for v in self._db.execute('SELECT node, rev FROM changelog') | |
121 } | |
122 r[nodemod.nullid] = nodemod.nullrev | |
123 return r | |
124 | |
125 def tip(self): | |
126 t = self._db.execute( | |
127 'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1' | |
128 ).fetchone() | |
129 if t: | |
130 return nodemod.bin(t[0]) | |
131 return nodemod.nullid | |
132 | |
133 def revs(self, start=0, stop=None): | |
134 if stop is None: | |
135 stop = self.tip() | |
136 t = self._db.execute( | |
137 'SELECT rev FROM changelog ' | |
138 'WHERE rev >= ? AND rev <= ? ' | |
139 'ORDER BY REV ASC', | |
140 (start, stop), | |
141 ) | |
142 return (int(r[0]) for r in t) | |
143 | |
144 def _partialmatch(self, id): | |
145 if nodemod.wdirhex.startswith(id): | |
146 raise error.WdirUnsupported | |
147 candidates = [ | |
148 nodemod.bin(x[0]) | |
149 for x in self._db.execute( | |
150 'SELECT node FROM changelog WHERE node LIKE ?', (id + b'%',) | |
151 ) | |
152 ] | |
153 if nodemod.nullhex.startswith(id): | |
154 candidates.append(nodemod.nullid) | |
155 if len(candidates) > 1: | |
156 raise error.AmbiguousPrefixLookupError( | |
157 id, b'00changelog.i', _(b'ambiguous identifier') | |
158 ) | |
159 if candidates: | |
160 return candidates[0] | |
161 return None | |
162 | |
163 def flags(self, rev): | |
164 return 0 | |
165 | |
166 def shortest(self, node, minlength=1): | |
167 nodehex = nodemod.hex(node) | |
168 for attempt in pycompat.xrange(minlength, len(nodehex) + 1): | |
169 candidate = nodehex[:attempt] | |
170 matches = int( | |
171 self._db.execute( | |
172 'SELECT COUNT(*) FROM changelog WHERE node LIKE ?', | |
173 (pycompat.sysstr(nodehex + b'%'),), | |
174 ).fetchone()[0] | |
175 ) | |
176 if matches == 1: | |
177 return candidate | |
178 return nodehex | |
179 | |
180 def headrevs(self, revs=None): | |
181 realheads = [ | |
182 int(x[0]) | |
183 for x in self._db.execute( | |
184 'SELECT rev FROM changelog ' | |
185 'INNER JOIN heads ON changelog.node = heads.node' | |
186 ) | |
187 ] | |
188 if revs: | |
189 return sorted([r for r in revs if r in realheads]) | |
190 return sorted(realheads) | |
191 | |
192 def changelogrevision(self, nodeorrev): | |
193 # Ensure we have a node id | |
194 if isinstance(nodeorrev, int): | |
195 n = self.node(nodeorrev) | |
196 else: | |
197 n = nodeorrev | |
198 # handle looking up nullid | |
199 if n == nodemod.nullid: | |
200 return hgchangelog._changelogrevision(extra={}) | |
201 hn = gitutil.togitnode(n) | |
202 # We've got a real commit! | |
203 files = [ | |
204 r[0] | |
205 for r in self._db.execute( | |
206 'SELECT filename FROM changedfiles ' | |
207 'WHERE node = ? and filenode != ?', | |
208 (hn, gitutil.nullgit), | |
209 ) | |
210 ] | |
211 filesremoved = [ | |
212 r[0] | |
213 for r in self._db.execute( | |
214 'SELECT filename FROM changedfiles ' | |
215 'WHERE node = ? and filenode = ?', | |
216 (hn, nodemod.nullhex), | |
217 ) | |
218 ] | |
219 c = self.gitrepo[hn] | |
220 return hgchangelog._changelogrevision( | |
221 manifest=n, # pretend manifest the same as the commit node | |
222 user=b'%s <%s>' | |
223 % (c.author.name.encode('utf8'), c.author.email.encode('utf8')), | |
224 date=(c.author.time, -c.author.offset * 60), | |
225 files=files, | |
226 # TODO filesadded in the index | |
227 filesremoved=filesremoved, | |
228 description=c.message.encode('utf8'), | |
229 # TODO do we want to handle extra? how? | |
230 extra={b'branch': b'default'}, | |
231 ) | |
232 | |
233 def ancestors(self, revs, stoprev=0, inclusive=False): | |
234 revs = list(revs) | |
235 tip = self.rev(self.tip()) | |
236 for r in revs: | |
237 if r > tip: | |
238 raise IndexError(b'Invalid rev %r' % r) | |
239 return ancestor.lazyancestors( | |
240 self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive | |
241 ) | |
242 | |
243 # Cleanup opportunity: this is *identical* to the revlog.py version | |
244 def descendants(self, revs): | |
245 return dagop.descendantrevs(revs, self.revs, self.parentrevs) | |
246 | |
247 def reachableroots(self, minroot, heads, roots, includepath=False): | |
248 return dagop._reachablerootspure( | |
249 self.parentrevs, minroot, roots, heads, includepath | |
250 ) | |
251 | |
252 # Cleanup opportunity: this is *identical* to the revlog.py version | |
253 def isancestor(self, a, b): | |
254 a, b = self.rev(a), self.rev(b) | |
255 return self.isancestorrev(a, b) | |
256 | |
257 # Cleanup opportunity: this is *identical* to the revlog.py version | |
258 def isancestorrev(self, a, b): | |
259 if a == nodemod.nullrev: | |
260 return True | |
261 elif a == b: | |
262 return True | |
263 elif a > b: | |
264 return False | |
265 return bool(self.reachableroots(a, [b], [a], includepath=False)) | |
266 | |
267 def parentrevs(self, rev): | |
268 n = self.node(rev) | |
269 hn = gitutil.togitnode(n) | |
270 c = self.gitrepo[hn] | |
271 p1 = p2 = nodemod.nullrev | |
272 if c.parents: | |
273 p1 = self.rev(c.parents[0].id.raw) | |
274 if len(c.parents) > 2: | |
275 raise error.Abort(b'TODO octopus merge handling') | |
276 if len(c.parents) == 2: | |
277 p2 = self.rev(c.parents[0].id.raw) | |
278 return p1, p2 | |
279 | |
280 # Private method is used at least by the tags code. | |
281 _uncheckedparentrevs = parentrevs | |
282 | |
283 def commonancestorsheads(self, a, b): | |
284 # TODO the revlog verson of this has a C path, so we probably | |
285 # need to optimize this... | |
286 a, b = self.rev(a), self.rev(b) | |
287 return [ | |
288 self.node(n) | |
289 for n in ancestor.commonancestorsheads(self.parentrevs, a, b) | |
290 ] | |
291 | |
292 def branchinfo(self, rev): | |
293 """Git doesn't do named branches, so just put everything on default.""" | |
294 return b'default', False | |
295 | |
296 def delayupdate(self, tr): | |
297 # TODO: I think we can elide this because we're just dropping | |
298 # an object in the git repo? | |
299 pass | |
300 | |
301 def add( | |
302 self, | |
303 manifest, | |
304 files, | |
305 desc, | |
306 transaction, | |
307 p1, | |
308 p2, | |
309 user, | |
310 date=None, | |
311 extra=None, | |
312 p1copies=None, | |
313 p2copies=None, | |
314 filesadded=None, | |
315 filesremoved=None, | |
316 ): | |
317 parents = [] | |
318 hp1, hp2 = gitutil.togitnode(p1), gitutil.togitnode(p2) | |
319 if p1 != nodemod.nullid: | |
320 parents.append(hp1) | |
321 if p2 and p2 != nodemod.nullid: | |
322 parents.append(hp2) | |
323 assert date is not None | |
324 timestamp, tz = date | |
325 sig = pygit2.Signature( | |
326 encoding.unifromlocal(stringutil.person(user)), | |
327 encoding.unifromlocal(stringutil.email(user)), | |
328 timestamp, | |
329 -(tz // 60), | |
330 ) | |
331 oid = self.gitrepo.create_commit( | |
332 None, sig, sig, desc, gitutil.togitnode(manifest), parents | |
333 ) | |
334 # Set up an internal reference to force the commit into the | |
335 # changelog. Hypothetically, we could even use this refs/hg/ | |
336 # namespace to allow for anonymous heads on git repos, which | |
337 # would be neat. | |
338 self.gitrepo.references.create( | |
339 'refs/hg/internal/latest-commit', oid, force=True | |
340 ) | |
341 # Reindex now to pick up changes. We omit the progress | |
342 # callback because this will be very quick. | |
343 index._index_repo(self.gitrepo, self._db) | |
344 return oid.raw | |
345 | |
346 | |
347 class manifestlog(baselog): | |
348 def __getitem__(self, node): | |
349 return self.get(b'', node) | |
350 | |
351 def get(self, relpath, node): | |
352 if node == nodemod.nullid: | |
353 # TODO: this should almost certainly be a memgittreemanifestctx | |
354 return manifest.memtreemanifestctx(self, relpath) | |
355 commit = self.gitrepo[gitutil.togitnode(node)] | |
356 t = commit.tree | |
357 if relpath: | |
358 parts = relpath.split(b'/') | |
359 for p in parts: | |
360 te = t[p] | |
361 t = self.gitrepo[te.id] | |
362 return gitmanifest.gittreemanifestctx(self.gitrepo, t) | |
363 | |
364 | |
365 @interfaceutil.implementer(repository.ifilestorage) | |
366 class filelog(baselog): | |
367 def __init__(self, gr, db, path): | |
368 super(filelog, self).__init__(gr, db) | |
369 assert isinstance(path, bytes) | |
370 self.path = path | |
371 | |
372 def read(self, node): | |
373 if node == nodemod.nullid: | |
374 return b'' | |
375 return self.gitrepo[gitutil.togitnode(node)].data | |
376 | |
377 def lookup(self, node): | |
378 if len(node) not in (20, 40): | |
379 node = int(node) | |
380 if isinstance(node, int): | |
381 assert False, b'todo revnums for nodes' | |
382 if len(node) == 40: | |
383 node = nodemod.bin(node) | |
384 hnode = gitutil.togitnode(node) | |
385 if hnode in self.gitrepo: | |
386 return node | |
387 raise error.LookupError(self.path, node, _(b'no match found')) | |
388 | |
389 def cmp(self, node, text): | |
390 """Returns True if text is different than content at `node`.""" | |
391 return self.read(node) != text | |
392 | |
393 def add(self, text, meta, transaction, link, p1=None, p2=None): | |
394 assert not meta # Should we even try to handle this? | |
395 return self.gitrepo.create_blob(text).raw | |
396 | |
397 def __iter__(self): | |
398 for clrev in self._db.execute( | |
399 ''' | |
400 SELECT rev FROM changelog | |
401 INNER JOIN changedfiles ON changelog.node = changedfiles.node | |
402 WHERE changedfiles.filename = ? AND changedfiles.filenode != ? | |
403 ''', | |
404 (pycompat.fsdecode(self.path), gitutil.nullgit), | |
405 ): | |
406 yield clrev[0] | |
407 | |
408 def linkrev(self, fr): | |
409 return fr | |
410 | |
411 def rev(self, node): | |
412 row = self._db.execute( | |
413 ''' | |
414 SELECT rev FROM changelog | |
415 INNER JOIN changedfiles ON changelog.node = changedfiles.node | |
416 WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''', | |
417 (pycompat.fsdecode(self.path), gitutil.togitnode(node)), | |
418 ).fetchone() | |
419 if row is None: | |
420 raise error.LookupError(self.path, node, _(b'no such node')) | |
421 return int(row[0]) | |
422 | |
423 def node(self, rev): | |
424 maybe = self._db.execute( | |
425 '''SELECT filenode FROM changedfiles | |
426 INNER JOIN changelog ON changelog.node = changedfiles.node | |
427 WHERE changelog.rev = ? AND filename = ? | |
428 ''', | |
429 (rev, pycompat.fsdecode(self.path)), | |
430 ).fetchone() | |
431 if maybe is None: | |
432 raise IndexError('gitlog %r out of range %d' % (self.path, rev)) | |
433 return nodemod.bin(maybe[0]) | |
434 | |
435 def parents(self, node): | |
436 gn = gitutil.togitnode(node) | |
437 gp = pycompat.fsdecode(self.path) | |
438 ps = [] | |
439 for p in self._db.execute( | |
440 '''SELECT p1filenode, p2filenode FROM changedfiles | |
441 WHERE filenode = ? AND filename = ? | |
442 ''', | |
443 (gn, gp), | |
444 ).fetchone(): | |
445 if p is None: | |
446 commit = self._db.execute( | |
447 "SELECT node FROM changedfiles " | |
448 "WHERE filenode = ? AND filename = ?", | |
449 (gn, gp), | |
450 ).fetchone()[0] | |
451 # This filelog is missing some data. Build the | |
452 # filelog, then recurse (which will always find data). | |
453 if pycompat.ispy3: | |
454 commit = commit.decode('ascii') | |
455 index.fill_in_filelog(self.gitrepo, self._db, commit, gp, gn) | |
456 return self.parents(node) | |
457 else: | |
458 ps.append(nodemod.bin(p)) | |
459 return ps | |
460 | |
461 def renamed(self, node): | |
462 # TODO: renames/copies | |
463 return False |