comparison hgext/remotefilelog/datapack.py @ 43076:2372284d9457

formatting: blacken the codebase This is using my patch to black (https://github.com/psf/black/pull/826) so we don't un-wrap collection literals. Done with: hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S # skip-blame mass-reformatting only # no-check-commit reformats foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D6971
author Augie Fackler <augie@google.com>
date Sun, 06 Oct 2019 09:45:02 -0400
parents eb37d95cc486
children 687b865b95ad
comparison
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
22 NOBASEINDEXMARK = -2 22 NOBASEINDEXMARK = -2
23 23
24 INDEXSUFFIX = '.dataidx' 24 INDEXSUFFIX = '.dataidx'
25 PACKSUFFIX = '.datapack' 25 PACKSUFFIX = '.datapack'
26 26
27
27 class datapackstore(basepack.basepackstore): 28 class datapackstore(basepack.basepackstore):
28 INDEXSUFFIX = INDEXSUFFIX 29 INDEXSUFFIX = INDEXSUFFIX
29 PACKSUFFIX = PACKSUFFIX 30 PACKSUFFIX = PACKSUFFIX
30 31
31 def __init__(self, ui, path): 32 def __init__(self, ui, path):
82 83
83 raise KeyError((name, hex(node))) 84 raise KeyError((name, hex(node)))
84 85
85 def add(self, name, node, data): 86 def add(self, name, node, data):
86 raise RuntimeError("cannot add to datapackstore") 87 raise RuntimeError("cannot add to datapackstore")
88
87 89
88 class datapack(basepack.basepack): 90 class datapack(basepack.basepack):
89 INDEXSUFFIX = INDEXSUFFIX 91 INDEXSUFFIX = INDEXSUFFIX
90 PACKSUFFIX = PACKSUFFIX 92 PACKSUFFIX = PACKSUFFIX
91 93
104 missing.append((name, node)) 106 missing.append((name, node))
105 107
106 return missing 108 return missing
107 109
108 def get(self, name, node): 110 def get(self, name, node):
109 raise RuntimeError("must use getdeltachain with datapack (%s:%s)" 111 raise RuntimeError(
110 % (name, hex(node))) 112 "must use getdeltachain with datapack (%s:%s)" % (name, hex(node))
113 )
111 114
112 def getmeta(self, name, node): 115 def getmeta(self, name, node):
113 value = self._find(node) 116 value = self._find(node)
114 if value is None: 117 if value is None:
115 raise KeyError((name, hex(node))) 118 raise KeyError((name, hex(node)))
116 119
117 node, deltabaseoffset, offset, size = value 120 node, deltabaseoffset, offset, size = value
118 rawentry = self._data[offset:offset + size] 121 rawentry = self._data[offset : offset + size]
119 122
120 # see docstring of mutabledatapack for the format 123 # see docstring of mutabledatapack for the format
121 offset = 0 124 offset = 0
122 offset += struct.unpack_from('!H', rawentry, offset)[0] + 2 # filename 125 offset += struct.unpack_from('!H', rawentry, offset)[0] + 2 # filename
123 offset += 40 # node, deltabase node 126 offset += 40 # node, deltabase node
124 offset += struct.unpack_from('!Q', rawentry, offset)[0] + 8 # delta 127 offset += struct.unpack_from('!Q', rawentry, offset)[0] + 8 # delta
125 128
126 metalen = struct.unpack_from('!I', rawentry, offset)[0] 129 metalen = struct.unpack_from('!I', rawentry, offset)[0]
127 offset += 4 130 offset += 4
128 131
129 meta = shallowutil.parsepackmeta(rawentry[offset:offset + metalen]) 132 meta = shallowutil.parsepackmeta(rawentry[offset : offset + metalen])
130 133
131 return meta 134 return meta
132 135
133 def getdelta(self, name, node): 136 def getdelta(self, name, node):
134 value = self._find(node) 137 value = self._find(node)
153 156
154 # Precompute chains 157 # Precompute chains
155 chain = [value] 158 chain = [value]
156 deltabaseoffset = value[1] 159 deltabaseoffset = value[1]
157 entrylen = self.INDEXENTRYLENGTH 160 entrylen = self.INDEXENTRYLENGTH
158 while (deltabaseoffset != FULLTEXTINDEXMARK 161 while (
159 and deltabaseoffset != NOBASEINDEXMARK): 162 deltabaseoffset != FULLTEXTINDEXMARK
163 and deltabaseoffset != NOBASEINDEXMARK
164 ):
160 loc = params.indexstart + deltabaseoffset 165 loc = params.indexstart + deltabaseoffset
161 value = struct.unpack(self.INDEXFORMAT, 166 value = struct.unpack(
162 self._index[loc:loc + entrylen]) 167 self.INDEXFORMAT, self._index[loc : loc + entrylen]
168 )
163 deltabaseoffset = value[1] 169 deltabaseoffset = value[1]
164 chain.append(value) 170 chain.append(value)
165 171
166 # Read chain data 172 # Read chain data
167 deltachain = [] 173 deltachain = []
173 self.freememory() 179 self.freememory()
174 180
175 return deltachain 181 return deltachain
176 182
177 def _readentry(self, offset, size, getmeta=False): 183 def _readentry(self, offset, size, getmeta=False):
178 rawentry = self._data[offset:offset + size] 184 rawentry = self._data[offset : offset + size]
179 self._pagedin += len(rawentry) 185 self._pagedin += len(rawentry)
180 186
181 # <2 byte len> + <filename> 187 # <2 byte len> + <filename>
182 lengthsize = 2 188 lengthsize = 2
183 filenamelen = struct.unpack('!H', rawentry[:2])[0] 189 filenamelen = struct.unpack('!H', rawentry[:2])[0]
184 filename = rawentry[lengthsize:lengthsize + filenamelen] 190 filename = rawentry[lengthsize : lengthsize + filenamelen]
185 191
186 # <20 byte node> + <20 byte deltabase> 192 # <20 byte node> + <20 byte deltabase>
187 nodestart = lengthsize + filenamelen 193 nodestart = lengthsize + filenamelen
188 deltabasestart = nodestart + NODELENGTH 194 deltabasestart = nodestart + NODELENGTH
189 node = rawentry[nodestart:deltabasestart] 195 node = rawentry[nodestart:deltabasestart]
190 deltabasenode = rawentry[deltabasestart:deltabasestart + NODELENGTH] 196 deltabasenode = rawentry[deltabasestart : deltabasestart + NODELENGTH]
191 197
192 # <8 byte len> + <delta> 198 # <8 byte len> + <delta>
193 deltastart = deltabasestart + NODELENGTH 199 deltastart = deltabasestart + NODELENGTH
194 rawdeltalen = rawentry[deltastart:deltastart + 8] 200 rawdeltalen = rawentry[deltastart : deltastart + 8]
195 deltalen = struct.unpack('!Q', rawdeltalen)[0] 201 deltalen = struct.unpack('!Q', rawdeltalen)[0]
196 202
197 delta = rawentry[deltastart + 8:deltastart + 8 + deltalen] 203 delta = rawentry[deltastart + 8 : deltastart + 8 + deltalen]
198 delta = self._decompress(delta) 204 delta = self._decompress(delta)
199 205
200 if getmeta: 206 if getmeta:
201 metastart = deltastart + 8 + deltalen 207 metastart = deltastart + 8 + deltalen
202 metalen = struct.unpack_from('!I', rawentry, metastart)[0] 208 metalen = struct.unpack_from('!I', rawentry, metastart)[0]
203 209
204 rawmeta = rawentry[metastart + 4:metastart + 4 + metalen] 210 rawmeta = rawentry[metastart + 4 : metastart + 4 + metalen]
205 meta = shallowutil.parsepackmeta(rawmeta) 211 meta = shallowutil.parsepackmeta(rawmeta)
206 return filename, node, deltabasenode, delta, meta 212 return filename, node, deltabasenode, delta, meta
207 else: 213 else:
208 return filename, node, deltabasenode, delta 214 return filename, node, deltabasenode, delta
209 215
213 def add(self, name, node, data): 219 def add(self, name, node, data):
214 raise RuntimeError("cannot add to datapack (%s:%s)" % (name, node)) 220 raise RuntimeError("cannot add to datapack (%s:%s)" % (name, node))
215 221
216 def _find(self, node): 222 def _find(self, node):
217 params = self.params 223 params = self.params
218 fanoutkey = struct.unpack(params.fanoutstruct, 224 fanoutkey = struct.unpack(
219 node[:params.fanoutprefix])[0] 225 params.fanoutstruct, node[: params.fanoutprefix]
226 )[0]
220 fanout = self._fanouttable 227 fanout = self._fanouttable
221 228
222 start = fanout[fanoutkey] + params.indexstart 229 start = fanout[fanoutkey] + params.indexstart
223 indexend = self._indexend 230 indexend = self._indexend
224 231
231 else: 238 else:
232 end = indexend 239 end = indexend
233 240
234 # Bisect between start and end to find node 241 # Bisect between start and end to find node
235 index = self._index 242 index = self._index
236 startnode = index[start:start + NODELENGTH] 243 startnode = index[start : start + NODELENGTH]
237 endnode = index[end:end + NODELENGTH] 244 endnode = index[end : end + NODELENGTH]
238 entrylen = self.INDEXENTRYLENGTH 245 entrylen = self.INDEXENTRYLENGTH
239 if startnode == node: 246 if startnode == node:
240 entry = index[start:start + entrylen] 247 entry = index[start : start + entrylen]
241 elif endnode == node: 248 elif endnode == node:
242 entry = index[end:end + entrylen] 249 entry = index[end : end + entrylen]
243 else: 250 else:
244 while start < end - entrylen: 251 while start < end - entrylen:
245 mid = start + (end - start) // 2 252 mid = start + (end - start) // 2
246 mid = mid - ((mid - params.indexstart) % entrylen) 253 mid = mid - ((mid - params.indexstart) % entrylen)
247 midnode = index[mid:mid + NODELENGTH] 254 midnode = index[mid : mid + NODELENGTH]
248 if midnode == node: 255 if midnode == node:
249 entry = index[mid:mid + entrylen] 256 entry = index[mid : mid + entrylen]
250 break 257 break
251 if node > midnode: 258 if node > midnode:
252 start = mid 259 start = mid
253 elif node < midnode: 260 elif node < midnode:
254 end = mid 261 end = mid
262 ledger.markdataentry(self, filename, node) 269 ledger.markdataentry(self, filename, node)
263 270
264 def cleanup(self, ledger): 271 def cleanup(self, ledger):
265 entries = ledger.sources.get(self, []) 272 entries = ledger.sources.get(self, [])
266 allkeys = set(self) 273 allkeys = set(self)
267 repackedkeys = set((e.filename, e.node) for e in entries if 274 repackedkeys = set(
268 e.datarepacked or e.gced) 275 (e.filename, e.node) for e in entries if e.datarepacked or e.gced
276 )
269 277
270 if len(allkeys - repackedkeys) == 0: 278 if len(allkeys - repackedkeys) == 0:
271 if self.path not in ledger.created: 279 if self.path not in ledger.created:
272 util.unlinkpath(self.indexpath, ignoremissing=True) 280 util.unlinkpath(self.indexpath, ignoremissing=True)
273 util.unlinkpath(self.packpath, ignoremissing=True) 281 util.unlinkpath(self.packpath, ignoremissing=True)
282 data = self._data 290 data = self._data
283 while offset < self.datasize: 291 while offset < self.datasize:
284 oldoffset = offset 292 oldoffset = offset
285 293
286 # <2 byte len> + <filename> 294 # <2 byte len> + <filename>
287 filenamelen = struct.unpack('!H', data[offset:offset + 2])[0] 295 filenamelen = struct.unpack('!H', data[offset : offset + 2])[0]
288 offset += 2 296 offset += 2
289 filename = data[offset:offset + filenamelen] 297 filename = data[offset : offset + filenamelen]
290 offset += filenamelen 298 offset += filenamelen
291 299
292 # <20 byte node> 300 # <20 byte node>
293 node = data[offset:offset + constants.NODESIZE] 301 node = data[offset : offset + constants.NODESIZE]
294 offset += constants.NODESIZE 302 offset += constants.NODESIZE
295 # <20 byte deltabase> 303 # <20 byte deltabase>
296 deltabase = data[offset:offset + constants.NODESIZE] 304 deltabase = data[offset : offset + constants.NODESIZE]
297 offset += constants.NODESIZE 305 offset += constants.NODESIZE
298 306
299 # <8 byte len> + <delta> 307 # <8 byte len> + <delta>
300 rawdeltalen = data[offset:offset + 8] 308 rawdeltalen = data[offset : offset + 8]
301 deltalen = struct.unpack('!Q', rawdeltalen)[0] 309 deltalen = struct.unpack('!Q', rawdeltalen)[0]
302 offset += 8 310 offset += 8
303 311
304 # TODO(augie): we should store a header that is the 312 # TODO(augie): we should store a header that is the
305 # uncompressed size. 313 # uncompressed size.
306 uncompressedlen = len(self._decompress( 314 uncompressedlen = len(
307 data[offset:offset + deltalen])) 315 self._decompress(data[offset : offset + deltalen])
316 )
308 offset += deltalen 317 offset += deltalen
309 318
310 # <4 byte len> + <metadata-list> 319 # <4 byte len> + <metadata-list>
311 metalen = struct.unpack_from('!I', data, offset)[0] 320 metalen = struct.unpack_from('!I', data, offset)[0]
312 offset += 4 + metalen 321 offset += 4 + metalen
315 324
316 # If we've read a lot of data from the mmap, free some memory. 325 # If we've read a lot of data from the mmap, free some memory.
317 self._pagedin += offset - oldoffset 326 self._pagedin += offset - oldoffset
318 if self.freememory(): 327 if self.freememory():
319 data = self._data 328 data = self._data
329
320 330
321 class mutabledatapack(basepack.mutablebasepack): 331 class mutabledatapack(basepack.mutablebasepack):
322 """A class for constructing and serializing a datapack file and index. 332 """A class for constructing and serializing a datapack file and index.
323 333
324 A datapack is a pair of files that contain the revision contents for various 334 A datapack is a pair of files that contain the revision contents for various
386 <pack entry offset: 8 byte unsigned int> 396 <pack entry offset: 8 byte unsigned int>
387 <pack entry size: 8 byte unsigned int> 397 <pack entry size: 8 byte unsigned int>
388 398
389 [1]: new in version 1. 399 [1]: new in version 1.
390 """ 400 """
401
391 INDEXSUFFIX = INDEXSUFFIX 402 INDEXSUFFIX = INDEXSUFFIX
392 PACKSUFFIX = PACKSUFFIX 403 PACKSUFFIX = PACKSUFFIX
393 404
394 # v[01] index format: <node><delta offset><pack data offset><pack data size> 405 # v[01] index format: <node><delta offset><pack data offset><pack data size>
395 INDEXFORMAT = datapack.INDEXFORMAT 406 INDEXFORMAT = datapack.INDEXFORMAT
401 def _compress(self, data): 412 def _compress(self, data):
402 return zlib.compress(data) 413 return zlib.compress(data)
403 414
404 def add(self, name, node, deltabasenode, delta, metadata=None): 415 def add(self, name, node, deltabasenode, delta, metadata=None):
405 # metadata is a dict, ex. {METAKEYFLAG: flag} 416 # metadata is a dict, ex. {METAKEYFLAG: flag}
406 if len(name) > 2**16: 417 if len(name) > 2 ** 16:
407 raise RuntimeError(_("name too long %s") % name) 418 raise RuntimeError(_("name too long %s") % name)
408 if len(node) != 20: 419 if len(node) != 20:
409 raise RuntimeError(_("node should be 20 bytes %s") % node) 420 raise RuntimeError(_("node should be 20 bytes %s") % node)
410 421
411 if node in self.entries: 422 if node in self.entries:
413 return 424 return
414 425
415 # TODO: allow configurable compression 426 # TODO: allow configurable compression
416 delta = self._compress(delta) 427 delta = self._compress(delta)
417 428
418 rawdata = ''.join(( 429 rawdata = ''.join(
419 struct.pack('!H', len(name)), # unsigned 2 byte int 430 (
420 name, 431 struct.pack('!H', len(name)), # unsigned 2 byte int
421 node, 432 name,
422 deltabasenode, 433 node,
423 struct.pack('!Q', len(delta)), # unsigned 8 byte int 434 deltabasenode,
424 delta, 435 struct.pack('!Q', len(delta)), # unsigned 8 byte int
425 )) 436 delta,
437 )
438 )
426 439
427 # v1 support metadata 440 # v1 support metadata
428 rawmeta = shallowutil.buildpackmeta(metadata) 441 rawmeta = shallowutil.buildpackmeta(metadata)
429 rawdata += struct.pack('!I', len(rawmeta)) # unsigned 4 byte 442 rawdata += struct.pack('!I', len(rawmeta)) # unsigned 4 byte
430 rawdata += rawmeta 443 rawdata += rawmeta
431 444
432 offset = self.packfp.tell() 445 offset = self.packfp.tell()
433 446
434 size = len(rawdata) 447 size = len(rawdata)
436 self.entries[node] = (deltabasenode, offset, size) 449 self.entries[node] = (deltabasenode, offset, size)
437 450
438 self.writeraw(rawdata) 451 self.writeraw(rawdata)
439 452
440 def createindex(self, nodelocations, indexoffset): 453 def createindex(self, nodelocations, indexoffset):
441 entries = sorted((n, db, o, s) for n, (db, o, s) 454 entries = sorted(
442 in self.entries.iteritems()) 455 (n, db, o, s) for n, (db, o, s) in self.entries.iteritems()
456 )
443 457
444 rawindex = '' 458 rawindex = ''
445 fmt = self.INDEXFORMAT 459 fmt = self.INDEXFORMAT
446 for node, deltabase, offset, size in entries: 460 for node, deltabase, offset, size in entries:
447 if deltabase == nullid: 461 if deltabase == nullid:
448 deltabaselocation = FULLTEXTINDEXMARK 462 deltabaselocation = FULLTEXTINDEXMARK
449 else: 463 else:
450 # Instead of storing the deltabase node in the index, let's 464 # Instead of storing the deltabase node in the index, let's
451 # store a pointer directly to the index entry for the deltabase. 465 # store a pointer directly to the index entry for the deltabase.
452 deltabaselocation = nodelocations.get(deltabase, 466 deltabaselocation = nodelocations.get(
453 NOBASEINDEXMARK) 467 deltabase, NOBASEINDEXMARK
468 )
454 469
455 entry = struct.pack(fmt, node, deltabaselocation, offset, size) 470 entry = struct.pack(fmt, node, deltabaselocation, offset, size)
456 rawindex += entry 471 rawindex += entry
457 472
458 return rawindex 473 return rawindex