mercurial/manifest.py
changeset 30042 d24e03da24b5
parent 29998 14ad8e2a4abe
child 30207 abe723002509
equal deleted inserted replaced
30041:1779dde4c9ef 30042:d24e03da24b5
   102         lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
   102         lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
   103         prevf = f
   103         prevf = f
   104     _checkforbidden(files)
   104     _checkforbidden(files)
   105     return ''.join(lines)
   105     return ''.join(lines)
   106 
   106 
   107 class _lazymanifest(dict):
   107 class lazymanifestiter(object):
   108     """This is the pure implementation of lazymanifest.
   108     def __init__(self, lm):
   109 
   109         self.pos = 0
   110     It has not been optimized *at all* and is not lazy.
   110         self.lm = lm
   111     """
       
   112 
       
   113     def __init__(self, data):
       
   114         dict.__init__(self)
       
   115         for f, n, fl in _parse(data):
       
   116             self[f] = n, fl
       
   117 
       
   118     def __setitem__(self, k, v):
       
   119         node, flag = v
       
   120         assert node is not None
       
   121         if len(node) > 21:
       
   122             node = node[:21] # match c implementation behavior
       
   123         dict.__setitem__(self, k, (node, flag))
       
   124 
   111 
   125     def __iter__(self):
   112     def __iter__(self):
   126         return iter(sorted(dict.keys(self)))
   113         return self
   127 
   114 
   128     def iterkeys(self):
   115     def next(self):
   129         return iter(sorted(dict.keys(self)))
   116         try:
   130 
   117             data, pos = self.lm._get(self.pos)
   131     def iterentries(self):
   118         except IndexError:
   132         return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
   119             raise StopIteration
       
   120         if pos == -1:
       
   121             self.pos += 1
       
   122             return data[0]
       
   123         self.pos += 1
       
   124         zeropos = data.find('\x00', pos)
       
   125         return data[pos:zeropos]
       
   126 
       
   127 class lazymanifestiterentries(object):
       
   128     def __init__(self, lm):
       
   129         self.lm = lm
       
   130         self.pos = 0
       
   131 
       
   132     def __iter__(self):
       
   133         return self
       
   134 
       
   135     def next(self):
       
   136         try:
       
   137             data, pos = self.lm._get(self.pos)
       
   138         except IndexError:
       
   139             raise StopIteration
       
   140         if pos == -1:
       
   141             self.pos += 1
       
   142             return data
       
   143         zeropos = data.find('\x00', pos)
       
   144         hashval = unhexlify(data, self.lm.extrainfo[self.pos],
       
   145                             zeropos + 1, 40)
       
   146         flags = self.lm._getflags(data, self.pos, zeropos)
       
   147         self.pos += 1
       
   148         return (data[pos:zeropos], hashval, flags)
       
   149 
       
   150 def unhexlify(data, extra, pos, length):
       
   151     s = data[pos:pos + length].decode('hex')
       
   152     if extra:
       
   153         s += chr(extra & 0xff)
       
   154     return s
       
   155 
       
   156 def _cmp(a, b):
       
   157     return (a > b) - (a < b)
       
   158 
       
   159 class _lazymanifest(object):
       
   160     def __init__(self, data, positions=None, extrainfo=None, extradata=None):
       
   161         if positions is None:
       
   162             self.positions = self.findlines(data)
       
   163             self.extrainfo = [0] * len(self.positions)
       
   164             self.data = data
       
   165             self.extradata = []
       
   166         else:
       
   167             self.positions = positions[:]
       
   168             self.extrainfo = extrainfo[:]
       
   169             self.extradata = extradata[:]
       
   170             self.data = data
       
   171 
       
   172     def findlines(self, data):
       
   173         if not data:
       
   174             return []
       
   175         pos = data.find("\n")
       
   176         if pos == -1 or data[-1] != '\n':
       
   177             raise ValueError("Manifest did not end in a newline.")
       
   178         positions = [0]
       
   179         prev = data[:data.find('\x00')]
       
   180         while pos < len(data) - 1 and pos != -1:
       
   181             positions.append(pos + 1)
       
   182             nexts = data[pos + 1:data.find('\x00', pos + 1)]
       
   183             if nexts < prev:
       
   184                 raise ValueError("Manifest lines not in sorted order.")
       
   185             prev = nexts
       
   186             pos = data.find("\n", pos + 1)
       
   187         return positions
       
   188 
       
   189     def _get(self, index):
       
   190         # get the position encoded in pos:
       
   191         #   positive number is an index in 'data'
       
   192         #   negative number is in extrapieces
       
   193         pos = self.positions[index]
       
   194         if pos >= 0:
       
   195             return self.data, pos
       
   196         return self.extradata[-pos - 1], -1
       
   197 
       
   198     def _getkey(self, pos):
       
   199         if pos >= 0:
       
   200             return self.data[pos:self.data.find('\x00', pos + 1)]
       
   201         return self.extradata[-pos - 1][0]
       
   202 
       
   203     def bsearch(self, key):
       
   204         first = 0
       
   205         last = len(self.positions) - 1
       
   206 
       
   207         while first <= last:
       
   208             midpoint = (first + last)//2
       
   209             nextpos = self.positions[midpoint]
       
   210             candidate = self._getkey(nextpos)
       
   211             r = _cmp(key, candidate)
       
   212             if r == 0:
       
   213                 return midpoint
       
   214             else:
       
   215                 if r < 0:
       
   216                     last = midpoint - 1
       
   217                 else:
       
   218                     first = midpoint + 1
       
   219         return -1
       
   220 
       
   221     def bsearch2(self, key):
       
   222         # same as the above, but will always return the position
       
   223         # done for performance reasons
       
   224         first = 0
       
   225         last = len(self.positions) - 1
       
   226 
       
   227         while first <= last:
       
   228             midpoint = (first + last)//2
       
   229             nextpos = self.positions[midpoint]
       
   230             candidate = self._getkey(nextpos)
       
   231             r = _cmp(key, candidate)
       
   232             if r == 0:
       
   233                 return (midpoint, True)
       
   234             else:
       
   235                 if r < 0:
       
   236                     last = midpoint - 1
       
   237                 else:
       
   238                     first = midpoint + 1
       
   239         return (first, False)
       
   240 
       
   241     def __contains__(self, key):
       
   242         return self.bsearch(key) != -1
       
   243 
       
   244     def _getflags(self, data, needle, pos):
       
   245         start = pos + 41
       
   246         end = data.find("\n", start)
       
   247         if end == -1:
       
   248             end = len(data) - 1
       
   249         if start == end:
       
   250             return ''
       
   251         return self.data[start:end]
       
   252 
       
   253     def __getitem__(self, key):
       
   254         if not isinstance(key, str):
       
   255             raise TypeError("getitem: manifest keys must be a string.")
       
   256         needle = self.bsearch(key)
       
   257         if needle == -1:
       
   258             raise KeyError
       
   259         data, pos = self._get(needle)
       
   260         if pos == -1:
       
   261             return (data[1], data[2])
       
   262         zeropos = data.find('\x00', pos)
       
   263         assert 0 <= needle <= len(self.positions)
       
   264         assert len(self.extrainfo) == len(self.positions)
       
   265         hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
       
   266         flags = self._getflags(data, needle, zeropos)
       
   267         return (hashval, flags)
       
   268 
       
   269     def __delitem__(self, key):
       
   270         needle, found = self.bsearch2(key)
       
   271         if not found:
       
   272             raise KeyError
       
   273         cur = self.positions[needle]
       
   274         self.positions = self.positions[:needle] + self.positions[needle + 1:]
       
   275         self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
       
   276         if cur >= 0:
       
   277             self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
       
   278 
       
   279     def __setitem__(self, key, value):
       
   280         if not isinstance(key, str):
       
   281             raise TypeError("setitem: manifest keys must be a string.")
       
   282         if not isinstance(value, tuple) or len(value) != 2:
       
   283             raise TypeError("Manifest values must be a tuple of (node, flags).")
       
   284         hashval = value[0]
       
   285         if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
       
   286             raise TypeError("node must be a 20-byte string")
       
   287         flags = value[1]
       
   288         if len(hashval) == 22:
       
   289             hashval = hashval[:-1]
       
   290         if not isinstance(flags, str) or len(flags) > 1:
       
   291             raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
       
   292         needle, found = self.bsearch2(key)
       
   293         if found:
       
   294             # put the item
       
   295             pos = self.positions[needle]
       
   296             if pos < 0:
       
   297                 self.extradata[-pos - 1] = (key, hashval, value[1])
       
   298             else:
       
   299                 # just don't bother
       
   300                 self.extradata.append((key, hashval, value[1]))
       
   301                 self.positions[needle] = -len(self.extradata)
       
   302         else:
       
   303             # not found, put it in with extra positions
       
   304             self.extradata.append((key, hashval, value[1]))
       
   305             self.positions = (self.positions[:needle] + [-len(self.extradata)]
       
   306                               + self.positions[needle:])
       
   307             self.extrainfo = (self.extrainfo[:needle] + [0] +
       
   308                               self.extrainfo[needle:])
   133 
   309 
   134     def copy(self):
   310     def copy(self):
   135         c = _lazymanifest('')
   311         # XXX call _compact like in C?
   136         c.update(self)
   312         return _lazymanifest(self.data, self.positions, self.extrainfo,
   137         return c
   313             self.extradata)
       
   314 
       
   315     def _compact(self):
       
   316         # hopefully not called TOO often
       
   317         if len(self.extradata) == 0:
       
   318             return
       
   319         l = []
       
   320         last_cut = 0
       
   321         i = 0
       
   322         offset = 0
       
   323         self.extrainfo = [0] * len(self.positions)
       
   324         while i < len(self.positions):
       
   325             if self.positions[i] >= 0:
       
   326                 cur = self.positions[i]
       
   327                 last_cut = cur
       
   328                 while True:
       
   329                     self.positions[i] = offset
       
   330                     i += 1
       
   331                     if i == len(self.positions) or self.positions[i] < 0:
       
   332                         break
       
   333                     offset += self.positions[i] - cur
       
   334                     cur = self.positions[i]
       
   335                 end_cut = self.data.find('\n', cur)
       
   336                 if end_cut != -1:
       
   337                     end_cut += 1
       
   338                 offset += end_cut - cur
       
   339                 l.append(self.data[last_cut:end_cut])
       
   340             else:
       
   341                 while i < len(self.positions) and self.positions[i] < 0:
       
   342                     cur = self.positions[i]
       
   343                     t = self.extradata[-cur - 1]
       
   344                     l.append(self._pack(t))
       
   345                     self.positions[i] = offset
       
   346                     if len(t[1]) > 20:
       
   347                         self.extrainfo[i] = ord(t[1][21])
       
   348                     offset += len(l[-1])
       
   349                     i += 1
       
   350         self.data = ''.join(l)
       
   351         self.extradata = []
       
   352 
       
   353     def _pack(self, d):
       
   354         return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
       
   355 
       
   356     def text(self):
       
   357         self._compact()
       
   358         return self.data
   138 
   359 
   139     def diff(self, m2, clean=False):
   360     def diff(self, m2, clean=False):
   140         '''Finds changes between the current manifest and m2.'''
   361         '''Finds changes between the current manifest and m2.'''
       
   362         # XXX think whether efficiency matters here
   141         diff = {}
   363         diff = {}
   142 
   364 
   143         for fn, e1 in self.iteritems():
   365         for fn, e1, flags in self.iterentries():
   144             if fn not in m2:
   366             if fn not in m2:
   145                 diff[fn] = e1, (None, '')
   367                 diff[fn] = (e1, flags), (None, '')
   146             else:
   368             else:
   147                 e2 = m2[fn]
   369                 e2 = m2[fn]
   148                 if e1 != e2:
   370                 if (e1, flags) != e2:
   149                     diff[fn] = e1, e2
   371                     diff[fn] = (e1, flags), e2
   150                 elif clean:
   372                 elif clean:
   151                     diff[fn] = None
   373                     diff[fn] = None
   152 
   374 
   153         for fn, e2 in m2.iteritems():
   375         for fn, e2, flags in m2.iterentries():
   154             if fn not in self:
   376             if fn not in self:
   155                 diff[fn] = (None, ''), e2
   377                 diff[fn] = (None, ''), (e2, flags)
   156 
   378 
   157         return diff
   379         return diff
   158 
   380 
       
   381     def iterentries(self):
       
   382         return lazymanifestiterentries(self)
       
   383 
       
   384     def iterkeys(self):
       
   385         return lazymanifestiter(self)
       
   386 
       
   387     def __iter__(self):
       
   388         return lazymanifestiter(self)
       
   389 
       
   390     def __len__(self):
       
   391         return len(self.positions)
       
   392 
   159     def filtercopy(self, filterfn):
   393     def filtercopy(self, filterfn):
       
   394         # XXX should be optimized
   160         c = _lazymanifest('')
   395         c = _lazymanifest('')
   161         for f, n, fl in self.iterentries():
   396         for f, n, fl in self.iterentries():
   162             if filterfn(f):
   397             if filterfn(f):
   163                 c[f] = n, fl
   398                 c[f] = n, fl
   164         return c
   399         return c
   165 
       
   166     def text(self):
       
   167         """Get the full data of this manifest as a bytestring."""
       
   168         return _textv1(self.iterentries())
       
   169 
   400 
   170 try:
   401 try:
   171     _lazymanifest = parsers.lazymanifest
   402     _lazymanifest = parsers.lazymanifest
   172 except AttributeError:
   403 except AttributeError:
   173     pass
   404     pass