comparison hgext/git/gitlog.py @ 44477:ad718271a9eb

git: skeleton of a new extension to _directly_ operate on git repos This is based in part of work I did years ago in hgit, but it's mostly new code since I'm using pygit2 instead of dulwich and the hg storage interfaces have improved. Some cleanup of old hgit code by Pulkit, which I greatly appreciate. test-git-interop.t does not cover a whole lot of cases, but it passes. It includes status, diff, making a new commit, and `hg annotate` working on the git repository. This is _not_ (yet) production quality code: this is an experiment. Known technical debt lurking in this implementation: * Writing bookmarks just totally ignores transactions. * The way progress is threaded down into the gitstore is awful. * Ideally we'd find a way to incrementally reindex DAGs. I'm not sure how to do that efficiently, so we might need a "known only fast-forwards" mode on the DAG indexer for use on `hg commit` and friends. * We don't even _try_ to do anything reasonable for `hg pull` or `hg push`. * Mercurial need an interface for the changelog type. Tests currently require git 2.24 as far as I'm aware: `git status` has some changed output that I didn't try and handle in a compatible way. This patch has produced some interesting cleanups, most recently on the manifest type. I expect continuing down this road will produce other meritorious cleanups throughout our code. Differential Revision: https://phab.mercurial-scm.org/D6734
author Augie Fackler <augie@google.com>
date Tue, 11 Feb 2020 00:44:59 -0500
parents
children 6d953b3fc2bd
comparison
equal deleted inserted replaced
44470:a08bbdf839ae 44477:ad718271a9eb
1 from __future__ import absolute_import
2
3 import pygit2
4
5 from mercurial.i18n import _
6
7 from mercurial import (
8 ancestor,
9 changelog as hgchangelog,
10 dagop,
11 encoding,
12 error,
13 manifest,
14 node as nodemod,
15 pycompat,
16 )
17 from mercurial.interfaces import (
18 repository,
19 util as interfaceutil,
20 )
21 from mercurial.utils import stringutil
22 from . import (
23 gitutil,
24 index,
25 manifest as gitmanifest,
26 )
27
28
29 class baselog(object): # revlog.revlog):
30 """Common implementations between changelog and manifestlog."""
31
32 def __init__(self, gr, db):
33 self.gitrepo = gr
34 self._db = db
35
36 def __len__(self):
37 return int(
38 self._db.execute('SELECT COUNT(*) FROM changelog').fetchone()[0]
39 )
40
41 def rev(self, n):
42 if n == nodemod.nullid:
43 return -1
44 t = self._db.execute(
45 'SELECT rev FROM changelog WHERE node = ?', (gitutil.togitnode(n),)
46 ).fetchone()
47 if t is None:
48 raise error.LookupError(n, b'00changelog.i', _(b'no node %d'))
49 return t[0]
50
51 def node(self, r):
52 if r == nodemod.nullrev:
53 return nodemod.nullid
54 t = self._db.execute(
55 'SELECT node FROM changelog WHERE rev = ?', (r,)
56 ).fetchone()
57 if t is None:
58 raise error.LookupError(r, b'00changelog.i', _(b'no node'))
59 return nodemod.bin(t[0])
60
61 def hasnode(self, n):
62 t = self._db.execute(
63 'SELECT node FROM changelog WHERE node = ?', (n,)
64 ).fetchone()
65 return t is not None
66
67
68 class baselogindex(object):
69 def __init__(self, log):
70 self._log = log
71
72 def has_node(self, n):
73 return self._log.rev(n) != -1
74
75 def __len__(self):
76 return len(self._log)
77
78 def __getitem__(self, idx):
79 p1rev, p2rev = self._log.parentrevs(idx)
80 # TODO: it's messy that the index leaks so far out of the
81 # storage layer that we have to implement things like reading
82 # this raw tuple, which exposes revlog internals.
83 return (
84 # Pretend offset is just the index, since we don't really care.
85 idx,
86 # Same with lengths
87 idx, # length
88 idx, # rawsize
89 -1, # delta base
90 idx, # linkrev TODO is this right?
91 p1rev,
92 p2rev,
93 self._log.node(idx),
94 )
95
96
97 # TODO: an interface for the changelog type?
98 class changelog(baselog):
99 def __contains__(self, rev):
100 try:
101 self.node(rev)
102 return True
103 except error.LookupError:
104 return False
105
106 @property
107 def filteredrevs(self):
108 # TODO: we should probably add a refs/hg/ namespace for hidden
109 # heads etc, but that's an idea for later.
110 return set()
111
112 @property
113 def index(self):
114 return baselogindex(self)
115
116 @property
117 def nodemap(self):
118 r = {
119 nodemod.bin(v[0]): v[1]
120 for v in self._db.execute('SELECT node, rev FROM changelog')
121 }
122 r[nodemod.nullid] = nodemod.nullrev
123 return r
124
125 def tip(self):
126 t = self._db.execute(
127 'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1'
128 ).fetchone()
129 if t:
130 return nodemod.bin(t[0])
131 return nodemod.nullid
132
133 def revs(self, start=0, stop=None):
134 if stop is None:
135 stop = self.tip()
136 t = self._db.execute(
137 'SELECT rev FROM changelog '
138 'WHERE rev >= ? AND rev <= ? '
139 'ORDER BY REV ASC',
140 (start, stop),
141 )
142 return (int(r[0]) for r in t)
143
144 def _partialmatch(self, id):
145 if nodemod.wdirhex.startswith(id):
146 raise error.WdirUnsupported
147 candidates = [
148 nodemod.bin(x[0])
149 for x in self._db.execute(
150 'SELECT node FROM changelog WHERE node LIKE ?', (id + b'%',)
151 )
152 ]
153 if nodemod.nullhex.startswith(id):
154 candidates.append(nodemod.nullid)
155 if len(candidates) > 1:
156 raise error.AmbiguousPrefixLookupError(
157 id, b'00changelog.i', _(b'ambiguous identifier')
158 )
159 if candidates:
160 return candidates[0]
161 return None
162
163 def flags(self, rev):
164 return 0
165
166 def shortest(self, node, minlength=1):
167 nodehex = nodemod.hex(node)
168 for attempt in pycompat.xrange(minlength, len(nodehex) + 1):
169 candidate = nodehex[:attempt]
170 matches = int(
171 self._db.execute(
172 'SELECT COUNT(*) FROM changelog WHERE node LIKE ?',
173 (pycompat.sysstr(nodehex + b'%'),),
174 ).fetchone()[0]
175 )
176 if matches == 1:
177 return candidate
178 return nodehex
179
180 def headrevs(self, revs=None):
181 realheads = [
182 int(x[0])
183 for x in self._db.execute(
184 'SELECT rev FROM changelog '
185 'INNER JOIN heads ON changelog.node = heads.node'
186 )
187 ]
188 if revs:
189 return sorted([r for r in revs if r in realheads])
190 return sorted(realheads)
191
192 def changelogrevision(self, nodeorrev):
193 # Ensure we have a node id
194 if isinstance(nodeorrev, int):
195 n = self.node(nodeorrev)
196 else:
197 n = nodeorrev
198 # handle looking up nullid
199 if n == nodemod.nullid:
200 return hgchangelog._changelogrevision(extra={})
201 hn = gitutil.togitnode(n)
202 # We've got a real commit!
203 files = [
204 r[0]
205 for r in self._db.execute(
206 'SELECT filename FROM changedfiles '
207 'WHERE node = ? and filenode != ?',
208 (hn, gitutil.nullgit),
209 )
210 ]
211 filesremoved = [
212 r[0]
213 for r in self._db.execute(
214 'SELECT filename FROM changedfiles '
215 'WHERE node = ? and filenode = ?',
216 (hn, nodemod.nullhex),
217 )
218 ]
219 c = self.gitrepo[hn]
220 return hgchangelog._changelogrevision(
221 manifest=n, # pretend manifest the same as the commit node
222 user=b'%s <%s>'
223 % (c.author.name.encode('utf8'), c.author.email.encode('utf8')),
224 date=(c.author.time, -c.author.offset * 60),
225 files=files,
226 # TODO filesadded in the index
227 filesremoved=filesremoved,
228 description=c.message.encode('utf8'),
229 # TODO do we want to handle extra? how?
230 extra={b'branch': b'default'},
231 )
232
233 def ancestors(self, revs, stoprev=0, inclusive=False):
234 revs = list(revs)
235 tip = self.rev(self.tip())
236 for r in revs:
237 if r > tip:
238 raise IndexError(b'Invalid rev %r' % r)
239 return ancestor.lazyancestors(
240 self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive
241 )
242
243 # Cleanup opportunity: this is *identical* to the revlog.py version
244 def descendants(self, revs):
245 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
246
247 def reachableroots(self, minroot, heads, roots, includepath=False):
248 return dagop._reachablerootspure(
249 self.parentrevs, minroot, roots, heads, includepath
250 )
251
252 # Cleanup opportunity: this is *identical* to the revlog.py version
253 def isancestor(self, a, b):
254 a, b = self.rev(a), self.rev(b)
255 return self.isancestorrev(a, b)
256
257 # Cleanup opportunity: this is *identical* to the revlog.py version
258 def isancestorrev(self, a, b):
259 if a == nodemod.nullrev:
260 return True
261 elif a == b:
262 return True
263 elif a > b:
264 return False
265 return bool(self.reachableroots(a, [b], [a], includepath=False))
266
267 def parentrevs(self, rev):
268 n = self.node(rev)
269 hn = gitutil.togitnode(n)
270 c = self.gitrepo[hn]
271 p1 = p2 = nodemod.nullrev
272 if c.parents:
273 p1 = self.rev(c.parents[0].id.raw)
274 if len(c.parents) > 2:
275 raise error.Abort(b'TODO octopus merge handling')
276 if len(c.parents) == 2:
277 p2 = self.rev(c.parents[0].id.raw)
278 return p1, p2
279
280 # Private method is used at least by the tags code.
281 _uncheckedparentrevs = parentrevs
282
283 def commonancestorsheads(self, a, b):
284 # TODO the revlog verson of this has a C path, so we probably
285 # need to optimize this...
286 a, b = self.rev(a), self.rev(b)
287 return [
288 self.node(n)
289 for n in ancestor.commonancestorsheads(self.parentrevs, a, b)
290 ]
291
292 def branchinfo(self, rev):
293 """Git doesn't do named branches, so just put everything on default."""
294 return b'default', False
295
296 def delayupdate(self, tr):
297 # TODO: I think we can elide this because we're just dropping
298 # an object in the git repo?
299 pass
300
301 def add(
302 self,
303 manifest,
304 files,
305 desc,
306 transaction,
307 p1,
308 p2,
309 user,
310 date=None,
311 extra=None,
312 p1copies=None,
313 p2copies=None,
314 filesadded=None,
315 filesremoved=None,
316 ):
317 parents = []
318 hp1, hp2 = gitutil.togitnode(p1), gitutil.togitnode(p2)
319 if p1 != nodemod.nullid:
320 parents.append(hp1)
321 if p2 and p2 != nodemod.nullid:
322 parents.append(hp2)
323 assert date is not None
324 timestamp, tz = date
325 sig = pygit2.Signature(
326 encoding.unifromlocal(stringutil.person(user)),
327 encoding.unifromlocal(stringutil.email(user)),
328 timestamp,
329 -(tz // 60),
330 )
331 oid = self.gitrepo.create_commit(
332 None, sig, sig, desc, gitutil.togitnode(manifest), parents
333 )
334 # Set up an internal reference to force the commit into the
335 # changelog. Hypothetically, we could even use this refs/hg/
336 # namespace to allow for anonymous heads on git repos, which
337 # would be neat.
338 self.gitrepo.references.create(
339 'refs/hg/internal/latest-commit', oid, force=True
340 )
341 # Reindex now to pick up changes. We omit the progress
342 # callback because this will be very quick.
343 index._index_repo(self.gitrepo, self._db)
344 return oid.raw
345
346
347 class manifestlog(baselog):
348 def __getitem__(self, node):
349 return self.get(b'', node)
350
351 def get(self, relpath, node):
352 if node == nodemod.nullid:
353 # TODO: this should almost certainly be a memgittreemanifestctx
354 return manifest.memtreemanifestctx(self, relpath)
355 commit = self.gitrepo[gitutil.togitnode(node)]
356 t = commit.tree
357 if relpath:
358 parts = relpath.split(b'/')
359 for p in parts:
360 te = t[p]
361 t = self.gitrepo[te.id]
362 return gitmanifest.gittreemanifestctx(self.gitrepo, t)
363
364
365 @interfaceutil.implementer(repository.ifilestorage)
366 class filelog(baselog):
367 def __init__(self, gr, db, path):
368 super(filelog, self).__init__(gr, db)
369 assert isinstance(path, bytes)
370 self.path = path
371
372 def read(self, node):
373 if node == nodemod.nullid:
374 return b''
375 return self.gitrepo[gitutil.togitnode(node)].data
376
377 def lookup(self, node):
378 if len(node) not in (20, 40):
379 node = int(node)
380 if isinstance(node, int):
381 assert False, b'todo revnums for nodes'
382 if len(node) == 40:
383 node = nodemod.bin(node)
384 hnode = gitutil.togitnode(node)
385 if hnode in self.gitrepo:
386 return node
387 raise error.LookupError(self.path, node, _(b'no match found'))
388
389 def cmp(self, node, text):
390 """Returns True if text is different than content at `node`."""
391 return self.read(node) != text
392
393 def add(self, text, meta, transaction, link, p1=None, p2=None):
394 assert not meta # Should we even try to handle this?
395 return self.gitrepo.create_blob(text).raw
396
397 def __iter__(self):
398 for clrev in self._db.execute(
399 '''
400 SELECT rev FROM changelog
401 INNER JOIN changedfiles ON changelog.node = changedfiles.node
402 WHERE changedfiles.filename = ? AND changedfiles.filenode != ?
403 ''',
404 (pycompat.fsdecode(self.path), gitutil.nullgit),
405 ):
406 yield clrev[0]
407
408 def linkrev(self, fr):
409 return fr
410
411 def rev(self, node):
412 row = self._db.execute(
413 '''
414 SELECT rev FROM changelog
415 INNER JOIN changedfiles ON changelog.node = changedfiles.node
416 WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''',
417 (pycompat.fsdecode(self.path), gitutil.togitnode(node)),
418 ).fetchone()
419 if row is None:
420 raise error.LookupError(self.path, node, _(b'no such node'))
421 return int(row[0])
422
423 def node(self, rev):
424 maybe = self._db.execute(
425 '''SELECT filenode FROM changedfiles
426 INNER JOIN changelog ON changelog.node = changedfiles.node
427 WHERE changelog.rev = ? AND filename = ?
428 ''',
429 (rev, pycompat.fsdecode(self.path)),
430 ).fetchone()
431 if maybe is None:
432 raise IndexError('gitlog %r out of range %d' % (self.path, rev))
433 return nodemod.bin(maybe[0])
434
435 def parents(self, node):
436 gn = gitutil.togitnode(node)
437 gp = pycompat.fsdecode(self.path)
438 ps = []
439 for p in self._db.execute(
440 '''SELECT p1filenode, p2filenode FROM changedfiles
441 WHERE filenode = ? AND filename = ?
442 ''',
443 (gn, gp),
444 ).fetchone():
445 if p is None:
446 commit = self._db.execute(
447 "SELECT node FROM changedfiles "
448 "WHERE filenode = ? AND filename = ?",
449 (gn, gp),
450 ).fetchone()[0]
451 # This filelog is missing some data. Build the
452 # filelog, then recurse (which will always find data).
453 if pycompat.ispy3:
454 commit = commit.decode('ascii')
455 index.fill_in_filelog(self.gitrepo, self._db, commit, gp, gn)
456 return self.parents(node)
457 else:
458 ps.append(nodemod.bin(p))
459 return ps
460
461 def renamed(self, node):
462 # TODO: renames/copies
463 return False