view mercurial/dicthelpers.py @ 18878:3cfaace0441e

copies._forwardcopies: use set operations to find missing files This is a performance win for a number of reasons: - We don't iterate over contexts, which avoids a completely unnecessary sorted call + the O(number of files) abstraction cost of doing that. - We don't check membership in a context, which avoids another O(number of files) abstraction cost. - We iterate over the manifests in C instead of Python. For a large repo with 170,000 files, this improves perfpathcopies from 0.34 seconds to 0.07. Anything that uses pathcopies, such as rebase or diff --git between two revisions, benefits.
author Siddharth Agarwal <sid0@fb.com>
date Thu, 04 Apr 2013 20:22:29 -0700
parents 40c679748fa9
children ed46c2b98b0d
line wrap: on
line source

# dicthelpers.py - helper routines for Python dicts
#
# Copyright 2013 Facebook
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

def diff(d1, d2, default=None):
    '''Return all key-value pairs that are different between d1 and d2.

    This includes keys that are present in one dict but not the other, and
    keys whose values are different. The return value is a dict with values
    being pairs of values from d1 and d2 respectively, and missing values
    represented as default.'''
    res = {}
    if d1 is d2:
        # same dict, so diff is empty
        return res

    for k1, v1 in d1.iteritems():
        if k1 in d2:
            v2 = d2[k1]
            if v1 != v2:
                res[k1] = (v1, v2)
        else:
            res[k1] = (v1, default)

    for k2 in d2:
        if k2 not in d1:
            res[k2] = (default, d2[k2])

    return res

def join(d1, d2, default=None):
    '''Return all key-value pairs from both d1 and d2.

    This is akin to an outer join in relational algebra. The return value is a
    dict with values being pairs of values from d1 and d2 respectively, and
    missing values represented as default.'''
    res = {}

    for k1, v1 in d1.iteritems():
        if k1 in d2:
            res[k1] = (v1, d2[k1])
        else:
            res[k1] = (v1, default)

    if d1 is d2:
        return res

    for k2 in d2:
        if k2 not in d1:
            res[k2] = (default, d2[k2])

    return res