mercurial/revlog.py
changeset 2079 ee96ca273f32
parent 2078 441ea218414e
child 2080 1cbb14c048cb
equal deleted inserted replaced
2078:441ea218414e 2079:ee96ca273f32
    62     if t == 'x': return zlib.decompress(bin)
    62     if t == 'x': return zlib.decompress(bin)
    63     if t == 'u': return bin[1:]
    63     if t == 'u': return bin[1:]
    64     raise RevlogError(_("unknown compression type %r") % t)
    64     raise RevlogError(_("unknown compression type %r") % t)
    65 
    65 
    66 indexformatv0 = ">4l20s20s20s"
    66 indexformatv0 = ">4l20s20s20s"
       
    67 v0shaoffset = 56
    67 # index ng:
    68 # index ng:
    68 # 6 bytes offset
    69 # 6 bytes offset
    69 # 2 bytes flags
    70 # 2 bytes flags
    70 # 4 bytes compressed length
    71 # 4 bytes compressed length
    71 # 4 bytes uncompressed length
    72 # 4 bytes uncompressed length
    73 # 4 bytes link rev
    74 # 4 bytes link rev
    74 # 4 bytes parent 1 rev
    75 # 4 bytes parent 1 rev
    75 # 4 bytes parent 2 rev
    76 # 4 bytes parent 2 rev
    76 # 32 bytes: nodeid
    77 # 32 bytes: nodeid
    77 indexformatng = ">Qiiiiii20s12x"
    78 indexformatng = ">Qiiiiii20s12x"
       
    79 ngshaoffset = 32
    78 versionformat = ">i"
    80 versionformat = ">i"
    79 
    81 
    80 class lazyparser(object):
    82 class lazyparser(object):
    81     """
    83     """
    82     this class avoids the need to parse the entirety of large indices
    84     this class avoids the need to parse the entirety of large indices
    83 
       
    84     By default we parse and load 1000 entries at a time.
       
    85 
       
    86     If no position is specified, we load the whole index, and replace
       
    87     the lazy objects in revlog with the underlying objects for
       
    88     efficiency in cases where we look at most of the nodes.
       
    89     """
    85     """
    90     def __init__(self, data, revlog, indexformat):
    86     def __init__(self, dataf, size, indexformat, shaoffset):
    91         self.data = data
    87         self.dataf = dataf
       
    88         self.format = indexformat
    92         self.s = struct.calcsize(indexformat)
    89         self.s = struct.calcsize(indexformat)
    93         self.indexformat = indexformat
    90         self.indexformat = indexformat
    94         self.l = len(data)/self.s
    91         self.datasize = size
       
    92         self.l = size/self.s
    95         self.index = [None] * self.l
    93         self.index = [None] * self.l
    96         self.map = {nullid: -1}
    94         self.map = {nullid: -1}
       
    95         self.allmap = 0
    97         self.all = 0
    96         self.all = 0
    98         self.revlog = revlog
    97         self.mapfind_count = 0
    99 
    98         self.shaoffset = shaoffset
   100     def load(self, pos=None):
    99 
       
   100     def loadmap(self):
       
   101         """
       
   102         during a commit, we need to make sure the rev being added is
       
   103         not a duplicate.  This requires loading the entire index,
       
   104         which is fairly slow.  loadmap can load up just the node map,
       
   105         which takes much less time.
       
   106         """
       
   107         if self.allmap: return
       
   108         start = 0
       
   109         end = self.datasize
       
   110         self.allmap = 1
       
   111         cur = 0
       
   112         count = 0
       
   113         blocksize = self.s * 256
       
   114         self.dataf.seek(0)
       
   115         while cur < end:
       
   116             data = self.dataf.read(blocksize)
       
   117             off = 0
       
   118             for x in xrange(256):
       
   119                 n = data[off + self.shaoffset:off + self.shaoffset + 20]
       
   120                 self.map[n] = count
       
   121                 count += 1
       
   122                 if count >= self.l:
       
   123                     break
       
   124                 off += self.s
       
   125             cur += blocksize
       
   126 
       
   127     def loadblock(self, blockstart, blocksize, data=None):
   101         if self.all: return
   128         if self.all: return
   102         if pos is not None:
   129         if data is None:
   103             block = pos / 1000
   130             self.dataf.seek(blockstart)
   104             i = block * 1000
   131             data = self.dataf.read(blocksize)
   105             end = min(self.l, i + 1000)
   132         lend = len(data) / self.s
   106         else:
   133         i = blockstart / self.s
   107             self.all = 1
   134         off = 0
   108             i = 0
   135         for x in xrange(lend):
   109             end = self.l
   136             if self.index[i + x] == None:
   110             self.revlog.index = self.index
   137                 b = data[off : off + self.s]
   111             self.revlog.nodemap = self.map
   138                 e = struct.unpack(self.format, b)
   112 
   139                 self.index[i + x] = e
   113         while i < end:
   140                 self.map[e[-1]] = i + x
   114             if not self.index[i]:
   141             off += self.s
   115                 d = self.data[i * self.s: (i + 1) * self.s]
   142 
   116                 e = struct.unpack(self.indexformat, d)
   143     def findnode(self, node):
   117                 self.index[i] = e
   144         """search backwards through the index file for a specific node"""
   118                 self.map[e[-1]] = i
   145         if self.allmap: return None
   119             i += 1
   146 
       
   147         # hg log will cause many many searches for the manifest
       
   148         # nodes.  After we get called a few times, just load the whole
       
   149         # thing.
       
   150         if self.mapfind_count > 8:
       
   151             self.loadmap()
       
   152             if node in self.map:
       
   153                 return node
       
   154             return None
       
   155         self.mapfind_count += 1
       
   156         last = self.l - 1
       
   157         while self.index[last] != None:
       
   158             if last == 0:
       
   159                 self.all = 1
       
   160                 self.allmap = 1
       
   161                 return None
       
   162             last -= 1
       
   163         end = (last + 1) * self.s
       
   164         blocksize = self.s * 256
       
   165         while end >= 0:
       
   166             start = max(end - blocksize, 0)
       
   167             self.dataf.seek(start)
       
   168             data = self.dataf.read(end - start)
       
   169             findend = end - start
       
   170             while True:
       
   171                 # we're searching backwards, so weh have to make sure
       
   172                 # we don't find a changeset where this node is a parent
       
   173                 off = data.rfind(node, 0, findend)
       
   174                 findend = off
       
   175                 if off >= 0:
       
   176                     i = off / self.s
       
   177                     off = i * self.s
       
   178                     n = data[off + self.shaoffset:off + self.shaoffset + 20]
       
   179                     if n == node:
       
   180                         self.map[n] = i + start / self.s
       
   181                         return node
       
   182                 else:
       
   183                     break
       
   184             end -= blocksize
       
   185         return None
       
   186 
       
   187     def loadindex(self, i=None, end=None):
       
   188         if self.all: return
       
   189         all = False
       
   190         if i == None:
       
   191             blockstart = 0
       
   192             blocksize = (512 / self.s) * self.s
       
   193             end = self.datasize
       
   194             all = True
       
   195         else:
       
   196             if end:
       
   197                 blockstart = i * self.s
       
   198                 end = end * self.s
       
   199                 blocksize = end - blockstart
       
   200             else:
       
   201                 blockstart = (i & ~(32)) * self.s
       
   202                 blocksize = self.s * 64
       
   203                 end = blockstart + blocksize
       
   204         while blockstart < end:
       
   205             self.loadblock(blockstart, blocksize)
       
   206             blockstart += blocksize
       
   207         if all: self.all = True
   120 
   208 
   121 class lazyindex(object):
   209 class lazyindex(object):
   122     """a lazy version of the index array"""
   210     """a lazy version of the index array"""
   123     def __init__(self, parser):
   211     def __init__(self, parser):
   124         self.p = parser
   212         self.p = parser
   125     def __len__(self):
   213     def __len__(self):
   126         return len(self.p.index)
   214         return len(self.p.index)
   127     def load(self, pos):
   215     def load(self, pos):
   128         if pos < 0:
   216         if pos < 0:
   129             pos += len(self.p.index)
   217             pos += len(self.p.index)
   130         self.p.load(pos)
   218         self.p.loadindex(pos)
   131         return self.p.index[pos]
   219         return self.p.index[pos]
   132     def __getitem__(self, pos):
   220     def __getitem__(self, pos):
   133         return self.p.index[pos] or self.load(pos)
   221         return self.p.index[pos] or self.load(pos)
   134     def __setitem__(self, pos, item):
   222     def __setitem__(self, pos, item):
   135         self.p.index[pos] = item
   223         self.p.index[pos] = item
   141 class lazymap(object):
   229 class lazymap(object):
   142     """a lazy version of the node map"""
   230     """a lazy version of the node map"""
   143     def __init__(self, parser):
   231     def __init__(self, parser):
   144         self.p = parser
   232         self.p = parser
   145     def load(self, key):
   233     def load(self, key):
   146         if self.p.all: return
   234         n = self.p.findnode(key)
   147         n = self.p.data.find(key)
   235         if n == None:
   148         if n < 0:
       
   149             raise KeyError(key)
   236             raise KeyError(key)
   150         pos = n / self.p.s
       
   151         self.p.load(pos)
       
   152     def __contains__(self, key):
   237     def __contains__(self, key):
   153         self.p.load()
   238         if key in self.p.map:
       
   239             return True
       
   240         self.p.loadmap()
   154         return key in self.p.map
   241         return key in self.p.map
   155     def __iter__(self):
   242     def __iter__(self):
   156         yield nullid
   243         yield nullid
   157         for i in xrange(self.p.l):
   244         for i in xrange(self.p.l):
   158             try:
   245             try:
   159                 yield self.p.index[i][-1]
   246                 yield self.p.index[i][-1]
   160             except:
   247             except:
   161                 self.p.load(i)
   248                 self.p.loadindex(i)
   162                 yield self.p.index[i][-1]
   249                 yield self.p.index[i][-1]
   163     def __getitem__(self, key):
   250     def __getitem__(self, key):
   164         try:
   251         try:
   165             return self.p.map[key]
   252             return self.p.map[key]
   166         except KeyError:
   253         except KeyError:
   220 
   307 
   221     def load(self):
   308     def load(self):
   222         v = self.defversion
   309         v = self.defversion
   223         try:
   310         try:
   224             f = self.opener(self.indexfile)
   311             f = self.opener(self.indexfile)
   225             i = f.read()
   312             i = f.read(4)
       
   313             f.seek(0)
   226         except IOError, inst:
   314         except IOError, inst:
   227             if inst.errno != errno.ENOENT:
   315             if inst.errno != errno.ENOENT:
   228                 raise
   316                 raise
   229             i = ""
   317             i = ""
   230         else:
   318         else:
   239                     and st.st_mtime == oldst.st_mtime
   327                     and st.st_mtime == oldst.st_mtime
   240                     and st.st_ctime == oldst.st_ctime):
   328                     and st.st_ctime == oldst.st_ctime):
   241                     return
   329                     return
   242                 self.indexstat = st
   330                 self.indexstat = st
   243                 if len(i) > 0:
   331                 if len(i) > 0:
   244                     v = struct.unpack(versionformat, i[:4])[0]
   332                     v = struct.unpack(versionformat, i)[0]
   245         flags = v & ~0xFFFF
   333         flags = v & ~0xFFFF
   246         fmt = v & 0xFFFF
   334         fmt = v & 0xFFFF
   247         if fmt == 0:
   335         if fmt == 0:
   248             if flags:
   336             if flags:
   249                 raise RevlogError(_("index %s invalid flags %x for format v0" %
   337                 raise RevlogError(_("index %s invalid flags %x for format v0" %
   256             raise RevlogError(_("index %s invalid format %d" %
   344             raise RevlogError(_("index %s invalid format %d" %
   257                                (self.indexfile, fmt)))
   345                                (self.indexfile, fmt)))
   258         self.version = v
   346         self.version = v
   259         if v == 0:
   347         if v == 0:
   260             self.indexformat = indexformatv0
   348             self.indexformat = indexformatv0
       
   349             shaoffset = v0shaoffset
   261         else:
   350         else:
   262             self.indexformat = indexformatng
   351             self.indexformat = indexformatng
       
   352             shaoffset = ngshaoffset
   263 
   353 
   264         if i:
   354         if i:
   265             if not self.inlinedata() and st and st.st_size > 10000:
   355             if not self.inlinedata() and st and st.st_size > 10000:
   266                 # big index, let's parse it on demand
   356                 # big index, let's parse it on demand
   267                 parser = lazyparser(i, self, self.indexformat)
   357                 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
   268                 self.index = lazyindex(parser)
   358                 self.index = lazyindex(parser)
   269                 self.nodemap = lazymap(parser)
   359                 self.nodemap = lazymap(parser)
   270             else:
   360             else:
       
   361                 i = f.read()
   271                 self.parseindex(i)
   362                 self.parseindex(i)
   272             if self.inlinedata():
   363             if self.inlinedata():
   273                 # we've already got the entire data file read in, save it
   364                 # we've already got the entire data file read in, save it
   274                 # in the chunk data
   365                 # in the chunk data
   275                 self.chunkcache = (0, i)
   366                 self.chunkcache = (0, i)
   310         return int(q & 0xFFFF)
   401         return int(q & 0xFFFF)
   311 
   402 
   312     def offset_type(self, offset, type):
   403     def offset_type(self, offset, type):
   313         return long(long(offset) << 16 | type)
   404         return long(long(offset) << 16 | type)
   314 
   405 
       
   406     def loadindex(self, start, end):
       
   407         """load a block of indexes all at once from the lazy parser"""
       
   408         if isinstance(self.index, lazyindex):
       
   409             self.index.p.loadindex(start, end)
       
   410 
   315     def loadindexmap(self):
   411     def loadindexmap(self):
   316         """loads both the map and the index from the lazy parser"""
   412         """loads both the map and the index from the lazy parser"""
   317         if isinstance(self.index, lazyindex):
   413         if isinstance(self.index, lazyindex):
   318             p = self.index.p
   414             p = self.index.p
   319             p.load()
   415             p.loadindex()
       
   416             self.nodemap = p.map
       
   417 
       
   418     def loadmap(self):
       
   419         """loads the map from the lazy parser"""
       
   420         if isinstance(self.nodemap, lazymap):
       
   421             self.nodemap.p.loadmap()
       
   422             self.nodemap = self.nodemap.p.map
   320 
   423 
   321     def inlinedata(self): return self.version & REVLOGNGINLINEDATA
   424     def inlinedata(self): return self.version & REVLOGNGINLINEDATA
   322     def tip(self): return self.node(len(self.index) - 1)
   425     def tip(self): return self.node(len(self.index) - 1)
   323     def count(self): return len(self.index)
   426     def count(self): return len(self.index)
   324     def node(self, rev):
   427     def node(self, rev):
   688 
   791 
   689         # do we have useful data cached?
   792         # do we have useful data cached?
   690         if self.cache and self.cache[1] >= base and self.cache[1] < rev:
   793         if self.cache and self.cache[1] >= base and self.cache[1] < rev:
   691             base = self.cache[1]
   794             base = self.cache[1]
   692             text = self.cache[2]
   795             text = self.cache[2]
   693         else:
   796             self.loadindex(base, rev + 1)
       
   797         else:
       
   798             self.loadindex(base, rev + 1)
   694             text = self.chunk(base, df=df)
   799             text = self.chunk(base, df=df)
   695 
   800 
   696         bins = []
   801         bins = []
   697         for r in xrange(base + 1, rev + 1):
   802         for r in xrange(base + 1, rev + 1):
   698             bins.append(self.chunk(r, df=df))
   803             bins.append(self.chunk(r, df=df))