comparison mercurial/obsolete.py @ 22851:974389427e5f

obsolete: introduce a new binary encoding for obsmarkers (version 1) This new encoding explicitly stores the date and parents allowing a significantly faster marker decoding. See inline documentation for details. This format is not yet used to store format on disk. But it will be used in bundle2 exchange if both side support it. Support for on-disk format is coming in other changesets.
author Pierre-Yves David <pierre-yves.david@fb.com>
date Thu, 09 Oct 2014 00:10:10 -0700
parents b078e4dc9f9a
children e994b034e91e
comparison
equal deleted inserted replaced
22850:b078e4dc9f9a 22851:974389427e5f
229 if l: 229 if l:
230 key, value = l.split(':') 230 key, value = l.split(':')
231 d[key] = value 231 d[key] = value
232 return d 232 return d
233 233
234 ## Parsing and writing of version "1"
235 #
236 # The header is followed by the markers. Each marker is made of:
237 #
238 # - uint32: total size of the marker (including this field)
239 #
240 # - float64: date in seconds since epoch
241 #
242 # - int16: timezone offset in minutes
243 #
244 # - uint16: a bit field. It is reserved for flags used in common
245 # obsolete marker operations, to avoid repeated decoding of metadata
246 # entries.
247 #
248 # - uint8: number of successors "N", can be zero.
249 #
250 # - uint8: number of parents "P", can be zero.
251 #
252 # 0: parents data stored but no parent,
253 # 1: one parent stored,
254 # 2: two parents stored,
255 # 3: no parent data stored
256 #
257 # - uint8: number of metadata entries M
258 #
259 # - 20 or 32 bytes: precursor changeset identifier.
260 #
261 # - N*(20 or 32) bytes: successors changesets identifiers.
262 #
263 # - P*(20 or 32) bytes: parents of the precursors changesets.
264 #
265 # - M*(uint8, uint8): size of all metadata entries (key and value)
266 #
267 # - remaining bytes: the metadata, each (key, value) pair after the other.
268 _fm1version = 1
269 _fm1fixed = '>IdhHBBB20s'
270 _fm1nodesha1 = '20s'
271 _fm1nodesha256 = '32s'
272 _fm1fsize = struct.calcsize(_fm1fixed)
273 _fm1parentnone = 3
274 _fm1parentshift = 14
275 _fm1parentmask = (_fm1parentnone << _fm1parentshift)
276 _fm1metapair = 'BB'
277 _fm1metapairsize = struct.calcsize('BB')
278
279 def _fm1readmarkers(data, off=0):
280 # Loop on markers
281 l = len(data)
282 while off + _fm1fsize <= l:
283 # read fixed part
284 cur = data[off:off + _fm1fsize]
285 off += _fm1fsize
286 fixeddata = _unpack(_fm1fixed, cur)
287 ttsize, seconds, tz, flags, numsuc, numpar, nummeta, prec = fixeddata
288 # extract the number of parents information
289 if numpar == _fm1parentnone:
290 numpar = None
291 # build the date tuple (upgrade tz minutes to seconds)
292 date = (seconds, tz * 60)
293 _fm1node = _fm1nodesha1
294 if flags & usingsha256:
295 _fm1node = _fm1nodesha256
296 fnodesize = struct.calcsize(_fm1node)
297 # read replacement
298 sucs = ()
299 if numsuc:
300 s = (fnodesize * numsuc)
301 cur = data[off:off + s]
302 sucs = _unpack(_fm1node * numsuc, cur)
303 off += s
304 # read parents
305 if numpar is None:
306 parents = None
307 elif numpar == 0:
308 parents = ()
309 elif numpar: # neither None nor zero
310 s = (fnodesize * numpar)
311 cur = data[off:off + s]
312 parents = _unpack(_fm1node * numpar, cur)
313 off += s
314 # read metadata
315 metaformat = '>' + (_fm1metapair * nummeta)
316 s = _fm1metapairsize * nummeta
317 metapairsize = _unpack(metaformat, data[off:off + s])
318 off += s
319 metadata = []
320 for idx in xrange(0, len(metapairsize), 2):
321 sk = metapairsize[idx]
322 sv = metapairsize[idx + 1]
323 key = data[off:off + sk]
324 value = data[off + sk:off + sk + sv]
325 assert len(key) == sk
326 assert len(value) == sv
327 metadata.append((key, value))
328 off += sk + sv
329 metadata = tuple(metadata)
330
331 yield (prec, sucs, flags, metadata, date, parents)
332
333 def _fm1encodeonemarker(marker):
334 pre, sucs, flags, metadata, date, parents = marker
335 # determine node size
336 _fm1node = _fm1nodesha1
337 if flags & usingsha256:
338 _fm1node = _fm1nodesha256
339 numsuc = len(sucs)
340 numextranodes = numsuc
341 if parents is None:
342 numpar = _fm1parentnone
343 else:
344 numpar = len(parents)
345 numextranodes += numpar
346 formatnodes = _fm1node * numextranodes
347 formatmeta = _fm1metapair * len(metadata)
348 format = _fm1fixed + formatnodes + formatmeta
349 # tz is stored in minutes so we divide by 60
350 tz = date[1]//60
351 data = [None, date[0], tz, flags, numsuc, numpar, len(metadata), pre]
352 data.extend(sucs)
353 if parents is not None:
354 data.extend(parents)
355 totalsize = struct.calcsize(format)
356 for key, value in metadata:
357 lk = len(key)
358 lv = len(value)
359 data.append(lk)
360 data.append(lv)
361 totalsize += lk + lv
362 data[0] = totalsize
363 data = [_pack(format, *data)]
364 for key, value in metadata:
365 data.append(key)
366 data.append(value)
367 return ''.join(data)
234 368
235 # mapping to read/write various marker formats 369 # mapping to read/write various marker formats
236 # <version> -> (decoder, encoder) 370 # <version> -> (decoder, encoder)
237 formats = {_fm0version: (_fm0readmarkers, _fm0encodeonemarker)} 371 formats = {_fm0version: (_fm0readmarkers, _fm0encodeonemarker),
372 _fm1version: (_fm1readmarkers, _fm1encodeonemarker)}
238 373
239 def _readmarkers(data): 374 def _readmarkers(data):
240 """Read and enumerate markers from raw data""" 375 """Read and enumerate markers from raw data"""
241 off = 0 376 off = 0
242 diskversion = _unpack('>B', data[off:off + 1])[0] 377 diskversion = _unpack('>B', data[off:off + 1])[0]