Mercurial > hg
changeset 1083:30974cf73435
Add some docstrings to revlog.py
author | mpm@selenic.com |
---|---|
date | Sat, 27 Aug 2005 01:43:48 -0700 |
parents | ce96e316278a |
children | 069b4311a81b |
files | mercurial/revlog.py |
diffstat | 1 files changed, 94 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/revlog.py Sat Aug 27 01:13:28 2005 -0700 +++ b/mercurial/revlog.py Sat Aug 27 01:43:48 2005 -0700 @@ -1,12 +1,14 @@ -# revlog.py - storage back-end for mercurial -# -# This provides efficient delta storage with O(1) retrieve and append -# and O(changes) merge between branches -# -# Copyright 2005 Matt Mackall <mpm@selenic.com> -# -# This software may be used and distributed according to the terms -# of the GNU General Public License, incorporated herein by reference. +""" +revlog.py - storage back-end for mercurial + +This provides efficient delta storage with O(1) retrieve and append +and O(changes) merge between branches + +Copyright 2005 Matt Mackall <mpm@selenic.com> + +This software may be used and distributed according to the terms +of the GNU General Public License, incorporated herein by reference. +""" import zlib, struct, sha, binascii, heapq from mercurial import mdiff @@ -16,6 +18,7 @@ def short(node): return hex(node[:6]) def compress(text): + """ generate a possibly-compressed representation of text """ if not text: return text if len(text) < 44: if text[0] == '\0': return text @@ -27,6 +30,7 @@ return bin def decompress(bin): + """ decompress the given input """ if not bin: return bin t = bin[0] if t == '\0': return bin @@ -35,6 +39,12 @@ raise RevlogError("unknown compression type %s" % t) def hash(text, p1, p2): + """generate a hash from the given text and its parent hashes + + This hash combines both the current file contents and its history + in a manner that makes it easy to distinguish nodes with the same + content in the revision graph. + """ l = [p1, p2] l.sort() s = sha.new(l[0]) @@ -46,6 +56,15 @@ indexformat = ">4l20s20s20s" class lazyparser: + """ + this class avoids the need to parse the entirety of large indices + + By default we parse and load 1000 entries at a time. + + If no position is specified, we load the whole index, and replace + the lazy objects in revlog with the underlying objects for + efficiency in cases where we look at most of the nodes. + """ def __init__(self, data, revlog): self.data = data self.s = struct.calcsize(indexformat) @@ -76,6 +95,7 @@ i += 1 class lazyindex: + """a lazy version of the index array""" def __init__(self, parser): self.p = parser def __len__(self): @@ -89,6 +109,7 @@ self.p.index.append(e) class lazymap: + """a lazy version of the node map""" def __init__(self, parser): self.p = parser def load(self, key): @@ -123,7 +144,37 @@ class RevlogError(Exception): pass class revlog: + """ + the underlying revision storage object + + A revlog consists of two parts, an index and the revision data. + + The index is a file with a fixed record size containing + information on each revision, includings its nodeid (hash), the + nodeids of its parents, the position and offset of its data within + the data file, and the revision it's based on. Finally, each entry + contains a linkrev entry that can serve as a pointer to external + data. + + The revision data itself is a linear collection of data chunks. + Each chunk represents a revision and is usually represented as a + delta against the previous chunk. To bound lookup time, runs of + deltas are limited to about 2 times the length of the original + version data. This makes retrieval of a version proportional to + its size, or O(1) relative to the number of revisions. + + Both pieces of the revlog are written to in an append-only + fashion, which means we never need to rewrite a file to insert or + remove data, and can use some simple techniques to avoid the need + for locking while reading. + """ def __init__(self, opener, indexfile, datafile): + """ + create a revlog object + + opener is a function that abstracts the file opening operation + and can be used to implement COW semantics or the like. + """ self.indexfile = indexfile self.datafile = datafile self.opener = opener @@ -193,12 +244,13 @@ return reachable def heads(self, stop=None): + """return the list of all nodes that have no children""" p = {} h = [] stoprev = 0 if stop and stop in self.nodemap: stoprev = self.rev(stop) - + for r in range(self.count() - 1, -1, -1): n = self.node(r) if n not in p: @@ -212,6 +264,7 @@ return h def children(self, node): + """find the children of a given node""" c = [] p = self.rev(node) for r in range(p + 1, self.count()): @@ -225,6 +278,7 @@ return c def lookup(self, id): + """locate a node based on revision number or subset of hex nodeid""" try: rev = int(id) if str(rev) != id: raise ValueError @@ -243,12 +297,15 @@ return None def diff(self, a, b): + """return a delta between two revisions""" return mdiff.textdiff(a, b) def patches(self, t, pl): + """apply a list of patches to a string""" return mdiff.patches(t, pl) def delta(self, node): + """return or calculate a delta between a node and its predecessor""" r = self.rev(node) b = self.base(r) if r == b: @@ -261,15 +318,18 @@ return decompress(data) def revision(self, node): + """return an uncompressed revision of a given""" if node == nullid: return "" if self.cache and self.cache[0] == node: return self.cache[2] + # look up what we need to read text = None rev = self.rev(node) start, length, base, link, p1, p2, node = self.index[rev] end = start + length if base != rev: start = self.start(base) + # do we have useful data cached? if self.cache and self.cache[1] >= base and self.cache[1] < rev: base = self.cache[1] start = self.start(base + 1) @@ -300,6 +360,14 @@ return text def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): + """add a revision to the log + + text - the revision data to add + transaction - the transaction object used for rollback + link - the linkrev data to add + p1, p2 - the parent nodeids of the revision + d - an optional precomputed delta + """ if text is None: text = "" if p1 is None: p1 = self.tip() if p2 is None: p2 = nullid @@ -349,6 +417,7 @@ return node def ancestor(self, a, b): + """calculate the least common ancestor of nodes a and b""" # calculate the distance of every node from root dist = {nullid: 0} for i in xrange(self.count()): @@ -387,12 +456,14 @@ lx = x.next() def group(self, linkmap): - # given a list of changeset revs, return a set of deltas and - # metadata corresponding to nodes. the first delta is - # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to - # have this parent as it has all history before these - # changesets. parent is parent[0] + """calculate a delta group + Given a list of changeset revs, return a set of deltas and + metadata corresponding to nodes. the first delta is + parent(nodes[0]) -> nodes[0] the receiver is guaranteed to + have this parent as it has all history before these + changesets. parent is parent[0] + """ revs = [] needed = {} @@ -498,11 +569,15 @@ yield struct.pack(">l", 0) def addgroup(self, revs, linkmapper, transaction, unique=0): - # given a set of deltas, add them to the revision log. the - # first delta is against its parent, which should be in our - # log, the rest are against the previous delta. + """ + add a delta group - # track the base of the current delta log + given a set of deltas, add them to the revision log. the + first delta is against its parent, which should be in our + log, the rest are against the previous delta. + """ + + #track the base of the current delta log r = self.count() t = r - 1 node = nullid