mercurial/pvec.py
changeset 43076 2372284d9457
parent 38783 e7aa113b14f7
child 43077 687b865b95ad
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
    54 from . import (
    54 from . import (
    55     pycompat,
    55     pycompat,
    56     util,
    56     util,
    57 )
    57 )
    58 
    58 
    59 _size = 448 # 70 chars b85-encoded
    59 _size = 448  # 70 chars b85-encoded
    60 _bytes = _size / 8
    60 _bytes = _size / 8
    61 _depthbits = 24
    61 _depthbits = 24
    62 _depthbytes = _depthbits / 8
    62 _depthbytes = _depthbits / 8
    63 _vecbytes = _bytes - _depthbytes
    63 _vecbytes = _bytes - _depthbytes
    64 _vecbits = _vecbytes * 8
    64 _vecbits = _vecbytes * 8
    65 _radius = (_vecbits - 30) / 2 # high probability vectors are related
    65 _radius = (_vecbits - 30) / 2  # high probability vectors are related
       
    66 
    66 
    67 
    67 def _bin(bs):
    68 def _bin(bs):
    68     '''convert a bytestring to a long'''
    69     '''convert a bytestring to a long'''
    69     v = 0
    70     v = 0
    70     for b in bs:
    71     for b in bs:
    71         v = v * 256 + ord(b)
    72         v = v * 256 + ord(b)
    72     return v
    73     return v
    73 
    74 
       
    75 
    74 def _str(v, l):
    76 def _str(v, l):
    75     bs = ""
    77     bs = ""
    76     for p in pycompat.xrange(l):
    78     for p in pycompat.xrange(l):
    77         bs = chr(v & 255) + bs
    79         bs = chr(v & 255) + bs
    78         v >>= 8
    80         v >>= 8
    79     return bs
    81     return bs
    80 
    82 
       
    83 
    81 def _split(b):
    84 def _split(b):
    82     '''depth and bitvec'''
    85     '''depth and bitvec'''
    83     return _bin(b[:_depthbytes]), _bin(b[_depthbytes:])
    86     return _bin(b[:_depthbytes]), _bin(b[_depthbytes:])
    84 
    87 
       
    88 
    85 def _join(depth, bitvec):
    89 def _join(depth, bitvec):
    86     return _str(depth, _depthbytes) + _str(bitvec, _vecbytes)
    90     return _str(depth, _depthbytes) + _str(bitvec, _vecbytes)
       
    91 
    87 
    92 
    88 def _hweight(x):
    93 def _hweight(x):
    89     c = 0
    94     c = 0
    90     while x:
    95     while x:
    91         if x & 1:
    96         if x & 1:
    92             c += 1
    97             c += 1
    93         x >>= 1
    98         x >>= 1
    94     return c
    99     return c
       
   100 
       
   101 
    95 _htab = [_hweight(x) for x in pycompat.xrange(256)]
   102 _htab = [_hweight(x) for x in pycompat.xrange(256)]
       
   103 
    96 
   104 
    97 def _hamming(a, b):
   105 def _hamming(a, b):
    98     '''find the hamming distance between two longs'''
   106     '''find the hamming distance between two longs'''
    99     d = a ^ b
   107     d = a ^ b
   100     c = 0
   108     c = 0
   101     while d:
   109     while d:
   102         c += _htab[d & 0xff]
   110         c += _htab[d & 0xFF]
   103         d >>= 8
   111         d >>= 8
   104     return c
   112     return c
       
   113 
   105 
   114 
   106 def _mergevec(x, y, c):
   115 def _mergevec(x, y, c):
   107     # Ideally, this function would be x ^ y ^ ancestor, but finding
   116     # Ideally, this function would be x ^ y ^ ancestor, but finding
   108     # ancestors is a nuisance. So instead we find the minimal number
   117     # ancestors is a nuisance. So instead we find the minimal number
   109     # of changes to balance the depth and hamming distance
   118     # of changes to balance the depth and hamming distance
   114         d1, d2, v1, v2 = d2, d1, v2, v1
   123         d1, d2, v1, v2 = d2, d1, v2, v1
   115 
   124 
   116     hdist = _hamming(v1, v2)
   125     hdist = _hamming(v1, v2)
   117     ddist = d1 - d2
   126     ddist = d1 - d2
   118     v = v1
   127     v = v1
   119     m = v1 ^ v2 # mask of different bits
   128     m = v1 ^ v2  # mask of different bits
   120     i = 1
   129     i = 1
   121 
   130 
   122     if hdist > ddist:
   131     if hdist > ddist:
   123         # if delta = 10 and hdist = 100, then we need to go up 55 steps
   132         # if delta = 10 and hdist = 100, then we need to go up 55 steps
   124         # to the ancestor and down 45
   133         # to the ancestor and down 45
   138     else:
   147     else:
   139         v = _flipbit(v, c)
   148         v = _flipbit(v, c)
   140 
   149 
   141     return depth, v
   150     return depth, v
   142 
   151 
       
   152 
   143 def _flipbit(v, node):
   153 def _flipbit(v, node):
   144     # converting bit strings to longs is slow
   154     # converting bit strings to longs is slow
   145     bit = (hash(node) & 0xffffffff) % _vecbits
   155     bit = (hash(node) & 0xFFFFFFFF) % _vecbits
   146     return v ^ (1<<bit)
   156     return v ^ (1 << bit)
       
   157 
   147 
   158 
   148 def ctxpvec(ctx):
   159 def ctxpvec(ctx):
   149     '''construct a pvec for ctx while filling in the cache'''
   160     '''construct a pvec for ctx while filling in the cache'''
   150     r = ctx.repo()
   161     r = ctx.repo()
   151     if not util.safehasattr(r, "_pveccache"):
   162     if not util.safehasattr(r, "_pveccache"):
   166                 else:
   177                 else:
   167                     pvc[n] = _mergevec(pvc[p1], pvc[p2], node)
   178                     pvc[n] = _mergevec(pvc[p1], pvc[p2], node)
   168     bs = _join(*pvc[ctx.rev()])
   179     bs = _join(*pvc[ctx.rev()])
   169     return pvec(util.b85encode(bs))
   180     return pvec(util.b85encode(bs))
   170 
   181 
       
   182 
   171 class pvec(object):
   183 class pvec(object):
   172     def __init__(self, hashorctx):
   184     def __init__(self, hashorctx):
   173         if isinstance(hashorctx, str):
   185         if isinstance(hashorctx, str):
   174             self._bs = hashorctx
   186             self._bs = hashorctx
   175             self._depth, self._vec = _split(util.b85decode(hashorctx))
   187             self._depth, self._vec = _split(util.b85decode(hashorctx))
   183         return self._vec == b._vec and self._depth == b._depth
   195         return self._vec == b._vec and self._depth == b._depth
   184 
   196 
   185     def __lt__(self, b):
   197     def __lt__(self, b):
   186         delta = b._depth - self._depth
   198         delta = b._depth - self._depth
   187         if delta < 0:
   199         if delta < 0:
   188             return False # always correct
   200             return False  # always correct
   189         if _hamming(self._vec, b._vec) > delta:
   201         if _hamming(self._vec, b._vec) > delta:
   190             return False
   202             return False
   191         return True
   203         return True
   192 
   204 
   193     def __gt__(self, b):
   205     def __gt__(self, b):