comparison mercurial/revlog.py @ 27070:7860366b46c9

revlog: improve documentation There are a lot of functions and variables doing similar things. Document the role and functionality of each to make it easier to grok.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 22 Nov 2015 16:23:20 -0800
parents dfab6edb98e3
children 5ebc4a192550
comparison
equal deleted inserted replaced
27069:4e554a7df1e9 27070:7860366b46c9
203 and can be used to implement COW semantics or the like. 203 and can be used to implement COW semantics or the like.
204 """ 204 """
205 self.indexfile = indexfile 205 self.indexfile = indexfile
206 self.datafile = indexfile[:-2] + ".d" 206 self.datafile = indexfile[:-2] + ".d"
207 self.opener = opener 207 self.opener = opener
208 # 3-tuple of (node, rev, text) for a raw revision.
208 self._cache = None 209 self._cache = None
210 # 2-tuple of (rev, baserev) defining the base revision the delta chain
211 # begins at for a revision.
209 self._basecache = None 212 self._basecache = None
213 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
210 self._chunkcache = (0, '') 214 self._chunkcache = (0, '')
215 # How much data to read and cache into the raw revlog data cache.
211 self._chunkcachesize = 65536 216 self._chunkcachesize = 65536
212 self._maxchainlen = None 217 self._maxchainlen = None
213 self._aggressivemergedeltas = False 218 self._aggressivemergedeltas = False
214 self.index = [] 219 self.index = []
220 # Mapping of partial identifiers to full nodes.
215 self._pcache = {} 221 self._pcache = {}
222 # Mapping of revision integer to full node.
216 self._nodecache = {nullid: nullrev} 223 self._nodecache = {nullid: nullrev}
217 self._nodepos = None 224 self._nodepos = None
218 225
219 v = REVLOG_DEFAULT_VERSION 226 v = REVLOG_DEFAULT_VERSION
220 opts = getattr(opener, 'options', None) 227 opts = getattr(opener, 'options', None)
924 """ 931 """
925 p1, p2 = self.parents(node) 932 p1, p2 = self.parents(node)
926 return hash(text, p1, p2) != node 933 return hash(text, p1, p2) != node
927 934
928 def _addchunk(self, offset, data): 935 def _addchunk(self, offset, data):
936 """Add a segment to the revlog cache.
937
938 Accepts an absolute offset and the data that is at that location.
939 """
929 o, d = self._chunkcache 940 o, d = self._chunkcache
930 # try to add to existing cache 941 # try to add to existing cache
931 if o + len(d) == offset and len(d) + len(data) < _chunksize: 942 if o + len(d) == offset and len(d) + len(data) < _chunksize:
932 self._chunkcache = o, d + data 943 self._chunkcache = o, d + data
933 else: 944 else:
934 self._chunkcache = offset, data 945 self._chunkcache = offset, data
935 946
936 def _loadchunk(self, offset, length, df=None): 947 def _loadchunk(self, offset, length, df=None):
937 """Load a chunk/segment from the revlog. 948 """Load a segment of raw data from the revlog.
938 949
939 Accepts absolute offset, length to read, and an optional existing 950 Accepts an absolute offset, length to read, and an optional existing
940 file handle to read from. 951 file handle to read from.
941 952
942 If an existing file handle is passed, it will be seeked and the 953 If an existing file handle is passed, it will be seeked and the
943 original seek position will NOT be restored. 954 original seek position will NOT be restored.
955
956 Returns a str or buffer of raw byte data.
944 """ 957 """
945 if df is not None: 958 if df is not None:
946 closehandle = False 959 closehandle = False
947 else: 960 else:
948 if self._inline: 961 if self._inline:
966 if offset != realoffset or reallength != length: 979 if offset != realoffset or reallength != length:
967 return util.buffer(d, offset - realoffset, length) 980 return util.buffer(d, offset - realoffset, length)
968 return d 981 return d
969 982
970 def _getchunk(self, offset, length, df=None): 983 def _getchunk(self, offset, length, df=None):
984 """Obtain a segment of raw data from the revlog.
985
986 Accepts an absolute offset, length of bytes to obtain, and an
987 optional file handle to the already-opened revlog. If the file
988 handle is used, it's original seek position will not be preserved.
989
990 Requests for data may be returned from a cache.
991
992 Returns a str or a buffer instance of raw byte data.
993 """
971 o, d = self._chunkcache 994 o, d = self._chunkcache
972 l = len(d) 995 l = len(d)
973 996
974 # is it in the cache? 997 # is it in the cache?
975 cachestart = offset - o 998 cachestart = offset - o
980 return util.buffer(d, cachestart, cacheend - cachestart) 1003 return util.buffer(d, cachestart, cacheend - cachestart)
981 1004
982 return self._loadchunk(offset, length, df=df) 1005 return self._loadchunk(offset, length, df=df)
983 1006
984 def _chunkraw(self, startrev, endrev, df=None): 1007 def _chunkraw(self, startrev, endrev, df=None):
1008 """Obtain a segment of raw data corresponding to a range of revisions.
1009
1010 Accepts the start and end revisions and an optional already-open
1011 file handle to be used for reading. If the file handle is read, its
1012 seek position will not be preserved.
1013
1014 Requests for data may be satisfied by a cache.
1015
1016 Returns a str or a buffer instance of raw byte data. Callers will
1017 need to call ``self.start(rev)`` and ``self.length()`` to determine
1018 where each revision's data begins and ends.
1019 """
985 start = self.start(startrev) 1020 start = self.start(startrev)
986 end = self.end(endrev) 1021 end = self.end(endrev)
987 if self._inline: 1022 if self._inline:
988 start += (startrev + 1) * self._io.size 1023 start += (startrev + 1) * self._io.size
989 end += (endrev + 1) * self._io.size 1024 end += (endrev + 1) * self._io.size
990 length = end - start 1025 length = end - start
991 return self._getchunk(start, length, df=df) 1026 return self._getchunk(start, length, df=df)
992 1027
993 def _chunk(self, rev, df=None): 1028 def _chunk(self, rev, df=None):
1029 """Obtain a single decompressed chunk for a revision.
1030
1031 Accepts an integer revision and an optional already-open file handle
1032 to be used for reading. If used, the seek position of the file will not
1033 be preserved.
1034
1035 Returns a str holding uncompressed data for the requested revision.
1036 """
994 return decompress(self._chunkraw(rev, rev, df=df)) 1037 return decompress(self._chunkraw(rev, rev, df=df))
995 1038
996 def _chunks(self, revs, df=None): 1039 def _chunks(self, revs, df=None):
997 '''faster version of [self._chunk(rev) for rev in revs] 1040 """Obtain decompressed chunks for the specified revisions.
998 1041
999 Assumes that revs is in ascending order.''' 1042 Accepts an iterable of numeric revisions that are assumed to be in
1043 ascending order. Also accepts an optional already-open file handle
1044 to be used for reading. If used, the seek position of the file will
1045 not be preserved.
1046
1047 This function is similar to calling ``self._chunk()`` multiple times,
1048 but is faster.
1049
1050 Returns a list with decompressed data for each requested revision.
1051 """
1000 if not revs: 1052 if not revs:
1001 return [] 1053 return []
1002 start = self.start 1054 start = self.start
1003 length = self.length 1055 length = self.length
1004 inline = self._inline 1056 inline = self._inline
1030 ladd(decompress(buffer(data, chunkstart - offset, chunklength))) 1082 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1031 1083
1032 return l 1084 return l
1033 1085
1034 def _chunkclear(self): 1086 def _chunkclear(self):
1087 """Clear the raw chunk cache."""
1035 self._chunkcache = (0, '') 1088 self._chunkcache = (0, '')
1036 1089
1037 def deltaparent(self, rev): 1090 def deltaparent(self, rev):
1038 """return deltaparent of the given revision""" 1091 """return deltaparent of the given revision"""
1039 base = self.index[rev][3] 1092 base = self.index[rev][3]