Mercurial > hg-stable

--- a/mercurial/copies.py	Mon Dec 14 11:32:20 2020 +0100
+++ b/mercurial/copies.py	Mon Dec 14 11:32:24 2020 +0100
@@ -288,40 +288,92 @@

     cl = repo.changelog
     isancestor = cl.isancestorrev
-    missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
-    mrset = set(missingrevs)
+
+    # To track rename from "A" to B, we need to gather all parent → children
+    # edges that are contains in `::B` but not in `::A`.
+    #
+    #
+    # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
+    # ::a`) and also all the "roots point", ie the parents of the exclusive set
+    # that belong to ::a. These are exactly all the revisions needed to express
+    # the parent → children we need to combine.
+    #
+    # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
+    # excluding paths that leads to roots that are not ancestors of `a`. We
+    # keep this out of the explanation because it is hard enough without this special case..
+
+    parents = cl._uncheckedparentrevs
+    graph_roots = (nullrev, nullrev)
+
+    ancestors = cl.ancestors([a.rev()], inclusive=True)
+    revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
     roots = set()
-    for r in missingrevs:
-        for p in cl.parentrevs(r):
-            if p == nullrev:
-                continue
-            if p not in children:
-                children[p] = [r]
-            else:
-                children[p].append(r)
-            if p not in mrset:
-                roots.add(p)
+    has_graph_roots = False
+
+    # iterate over `only(B, A)`
+    for r in revs:
+        ps = parents(r)
+        if ps == graph_roots:
+            has_graph_roots = True
+        else:
+            p1, p2 = ps
+
+            # find all the "root points" (see larger comment above)
+            if p1 != nullrev and p1 in ancestors:
+                roots.add(p1)
+            if p2 != nullrev and p2 in ancestors:
+                roots.add(p2)
     if not roots:
         # no common revision to track copies from
         return {}
-    min_root = min(roots)
-
-    from_head = set(
-        cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
-    )
-
-    iterrevs = set(from_head)
-    iterrevs &= mrset
-    iterrevs.update(roots)
-    iterrevs.remove(b.rev())
-    revs = sorted(iterrevs)
+    if has_graph_roots:
+        # this deal with the special case mentionned in the [1] footnotes. We
+        # must filter out revisions that leads to non-common graphroots.
+        roots = list(roots)
+        m = min(roots)
+        h = [b.rev()]
+        roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
+        roots_to_head = set(roots_to_head)
+        revs = [r for r in revs if r in roots_to_head]

     if repo.filecopiesmode == b'changeset-sidedata':
+        # When using side-data, we will process the edges "from" the children.
+        # We iterate over the childre, gathering previous collected data for
+        # the parents. Do know when the parents data is no longer necessary, we
+        # keep a counter of how many children each revision has.
+        #
+        # An interresting property of `children_count` is that it only contains
+        # revision that will be relevant for a edge of the graph. So if a
+        # children has parent not in `children_count`, that edges should not be
+        # processed.
+        children_count = dict((r, 0) for r in roots)
+        for r in revs:
+            for p in cl.parentrevs(r):
+                if p == nullrev:
+                    continue
+                children_count[r] = 0
+                if p in children_count:
+                    children_count[p] += 1
         revinfo = _revinfo_getter(repo, match)
         return _combine_changeset_copies(
-            revs, children, b.rev(), revinfo, match, isancestor
+            revs, children_count, b.rev(), revinfo, match, isancestor
         )
     else:
+        # When not using side-data, we will process the edges "from" the parent.
+        # so we need a full mapping of the parent -> children relation.
+        children = dict((r, []) for r in roots)
+        for r in revs:
+            for p in cl.parentrevs(r):
+                if p == nullrev:
+                    continue
+                children[r] = []
+                if p in children:
+                    children[p].append(r)
+        x = revs.pop()
+        assert x == b.rev()
+        revs.extend(roots)
+        revs.sort()
+
         revinfo = _revinfo_getter_extra(repo)
         return _combine_changeset_copies_extra(
             revs, children, b.rev(), revinfo, match, isancestor
@@ -329,12 +381,12 @@


 def _combine_changeset_copies(
-    revs, children, targetrev, revinfo, match, isancestor
+    revs, children_count, targetrev, revinfo, match, isancestor
 ):
     """combine the copies information for each item of iterrevs

     revs: sorted iterable of revision to visit
-    children: a {parent: [children]} mapping.
+    children_count: a {parent: <number-of-relevant-children>} mapping.
     targetrev: the final copies destination revision (not in iterrevs)
     revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
     match: a matcher
@@ -346,69 +398,76 @@

     if rustmod is not None and alwaysmatch:
         return rustmod.combine_changeset_copies(
-            list(revs), children, targetrev, revinfo, isancestor
+            list(revs), children_count, targetrev, revinfo, isancestor
         )

     isancestor = cached_is_ancestor(isancestor)

     all_copies = {}
-    # iterate over all the "parent" side of copy tracing "edge"
-    for r in revs:
-        # fetch potential previously computed data for that parent
-        copies = all_copies.pop(r, None)
-        if copies is None:
-            # this is a root
-            copies = {}
+    # iterate over all the "children" side of copy tracing "edge"
+    for current_rev in revs:
+        p1, p2, changes = revinfo(current_rev)
+        current_copies = None

-        # iterate over all known children to chain the existing data with the
+        # iterate over all parents to chain the existing data with the
         # data from the parent → child edge.
-        for i, c in enumerate(children[r]):
-            p1, p2, changes = revinfo(c)
-            childcopies = {}
+        for parent, parent_rev in ((1, p1), (2, p2)):
+            if parent_rev == nullrev:
+                continue
+            remaining_children = children_count.get(parent_rev)
+            if remaining_children is None:
+                continue
+            remaining_children -= 1
+            children_count[parent_rev] = remaining_children
+            if remaining_children:
+                copies = all_copies.get(parent_rev, None)
+            else:
+                copies = all_copies.pop(parent_rev, None)

-            # select the right parent → child edge
-            if r == p1:
-                parent = 1
-                if changes is not None:
+            if copies is None:
+                # this is a root
+                copies = {}
+
+            newcopies = copies
+            # chain the data in the edge with the existing data
+            if changes is not None:
+                childcopies = {}
+                if parent == 1:
                     childcopies = changes.copied_from_p1
-            else:
-                assert r == p2
-                parent = 2
-                if changes is not None:
+                elif parent == 2:
                     childcopies = changes.copied_from_p2
-            if not alwaysmatch:
-                childcopies = {
-                    dst: src for dst, src in childcopies.items() if match(dst)
-                }

-            # chain the data in the edge with the existing data
-            newcopies = copies
-            if childcopies:
-                newcopies = copies.copy()
-                for dest, source in pycompat.iteritems(childcopies):
-                    prev = copies.get(source)
-                    if prev is not None and prev[1] is not None:
-                        source = prev[1]
-                    newcopies[dest] = (c, source)
-                assert newcopies is not copies
-            if changes is not None and changes.removed:
-                if newcopies is copies:
+                if not alwaysmatch:
+                    childcopies = {
+                        dst: src
+                        for dst, src in childcopies.items()
+                        if match(dst)
+                    }
+                if childcopies:
                     newcopies = copies.copy()
-                for f in changes.removed:
-                    if f in newcopies:
-                        if newcopies is copies:
-                            # copy on write to avoid affecting potential other
-                            # branches.  when there are no other branches, this
-                            # could be avoided.
-                            newcopies = copies.copy()
-                        newcopies[f] = (c, None)
+                    for dest, source in pycompat.iteritems(childcopies):
+                        prev = copies.get(source)
+                        if prev is not None and prev[1] is not None:
+                            source = prev[1]
+                        newcopies[dest] = (current_rev, source)
+                    assert newcopies is not copies
+                if changes.removed:
+                    if newcopies is copies:
+                        newcopies = copies.copy()
+                    for f in changes.removed:
+                        if f in newcopies:
+                            if newcopies is copies:
+                                # copy on write to avoid affecting potential other
+                                # branches.  when there are no other branches, this
+                                # could be avoided.
+                                newcopies = copies.copy()
+                            newcopies[f] = (current_rev, None)

             # check potential need to combine the data from another parent (for
             # that child). See comment below for details.
-            othercopies = all_copies.get(c)
-            if othercopies is None:
-                all_copies[c] = newcopies
-            elif newcopies is othercopies:
+            if current_copies is None:
+                current_copies = newcopies
+            elif current_copies is newcopies:
                 # nothing to merge:
                 pass
             else:
@@ -419,17 +478,11 @@
                 # This is an arbitrary choice made anew when implementing
                 # changeset based copies. It was made without regards with
                 # potential filelog related behavior.
-                if parent == 1:
-                    if newcopies is copies:
-                        newcopies = copies.copy()
-                    minor, major = othercopies, newcopies
-                else:
-                    # we do not know if the other dict is a copy or not, so we
-                    # need to blindly copy it. Future change should make this
-                    # unnecessary.
-                    minor, major = newcopies, othercopies.copy()
-                copies = _merge_copies_dict(minor, major, isancestor, changes)
-                all_copies[c] = copies
+                assert parent == 2
+                current_copies = _merge_copies_dict(
+                    newcopies, current_copies, isancestor, changes
+                )
+        all_copies[current_rev] = current_copies

     # filter out internal details and return a {dest: source mapping}
     final_copies = {}
--- a/rust/hg-core/src/copy_tracing.rs	Mon Dec 14 11:32:20 2020 +0100
+++ b/rust/hg-core/src/copy_tracing.rs	Mon Dec 14 11:32:24 2020 +0100
@@ -1,6 +1,7 @@
 use crate::utils::hg_path::HgPath;
 use crate::utils::hg_path::HgPathBuf;
 use crate::Revision;
+use crate::NULL_REVISION;

 use im_rc::ordmap::DiffItem;
 use im_rc::ordmap::OrdMap;
@@ -328,7 +329,7 @@
 ///                                 ancestor of another
 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
     revs: Vec<Revision>,
-    children: HashMap<Revision, Vec<Revision>>,
+    mut children_count: HashMap<Revision, usize>,
     target_rev: Revision,
     rev_info: RevInfoMaker<D>,
     is_ancestor: &A,
@@ -337,57 +338,69 @@
     let mut oracle = AncestorOracle::new(is_ancestor);

     for rev in revs {
-        // Retrieve data computed in a previous iteration
-        let copies = all_copies.remove(&rev);
-        let copies = match copies {
-            Some(c) => c,
-            None => TimeStampedPathCopies::default(), // root of the walked set
-        };
-
-        let current_children = match children.get(&rev) {
-            Some(c) => c,
-            None => panic!("inconsistent `revs` and `children`"),
-        };
-
-        for child in current_children {
-            // We will chain the copies information accumulated for `rev` with
-            // the individual copies information for each of its children.
-            // Creating a new PathCopies for each `rev` → `children` vertex.
-            let mut d: DataHolder<D> = DataHolder { data: None };
-            let (p1, p2, changes) = rev_info(*child, &mut d);
+        let mut d: DataHolder<D> = DataHolder { data: None };
+        let (p1, p2, changes) = rev_info(rev, &mut d);

-            let parent = if rev == p1 {
-                Parent::FirstParent
-            } else {
-                assert_eq!(rev, p2);
-                Parent::SecondParent
-            };
-            let new_copies =
-                add_from_changes(&copies, &changes, parent, *child);
+        // We will chain the copies information accumulated for the parent with
+        // the individual copies information the curent revision.  Creating a
+        // new TimeStampedPath for each `rev` → `children` vertex.
+        let mut copies: Option<TimeStampedPathCopies> = None;
+        if p1 != NULL_REVISION {
+            // Retrieve data computed in a previous iteration
+            let parent_copies = get_and_clean_parent_copies(
+                &mut all_copies,
+                &mut children_count,
+                p1,
+            );
+            if let Some(parent_copies) = parent_copies {
+                // combine it with data for that revision
+                let vertex_copies = add_from_changes(
+                    &parent_copies,
+                    &changes,
+                    Parent::FirstParent,
+                    rev,
+                );
+                // keep that data around for potential later combination
+                copies = Some(vertex_copies);
+            }
+        }
+        if p2 != NULL_REVISION {
+            // Retrieve data computed in a previous iteration
+            let parent_copies = get_and_clean_parent_copies(
+                &mut all_copies,
+                &mut children_count,
+                p2,
+            );
+            if let Some(parent_copies) = parent_copies {
+                // combine it with data for that revision
+                let vertex_copies = add_from_changes(
+                    &parent_copies,
+                    &changes,
+                    Parent::SecondParent,
+                    rev,
+                );

-            // Merge has two parents needs to combines their copy information.
-            //
-            // If the vertex from the other parent was already processed, we
-            // will have a value for the child ready to be used. We need to
-            // grab it and combine it with the one we already
-            // computed. If not we can simply store the newly
-            // computed data. The processing happening at
-            // the time of the second parent will take care of combining the
-            // two TimeStampedPathCopies instance.
-            match all_copies.remove(child) {
-                None => {
-                    all_copies.insert(child, new_copies);
-                }
-                Some(other_copies) => {
-                    let (minor, major) = match parent {
-                        Parent::FirstParent => (other_copies, new_copies),
-                        Parent::SecondParent => (new_copies, other_copies),
-                    };
-                    let merged_copies =
-                        merge_copies_dict(minor, major, &changes, &mut oracle);
-                    all_copies.insert(child, merged_copies);
-                }
-            };
+                copies = match copies {
+                    None => Some(vertex_copies),
+                    // Merge has two parents needs to combines their copy
+                    // information.
+                    //
+                    // If we got data from both parents, We need to combine
+                    // them.
+                    Some(copies) => Some(merge_copies_dict(
+                        vertex_copies,
+                        copies,
+                        &changes,
+                        &mut oracle,
+                    )),
+                };
+            }
+        }
+        match copies {
+            Some(copies) => {
+                all_copies.insert(rev, copies);
+            }
+            _ => {}
         }
     }

@@ -405,6 +418,32 @@
     result
 }

+/// fetch previous computed information
+///
+/// If no other children are expected to need this information, we drop it from
+/// the cache.
+///
+/// If parent is not part of the set we are expected to walk, return None.
+fn get_and_clean_parent_copies(
+    all_copies: &mut HashMap<Revision, TimeStampedPathCopies>,
+    children_count: &mut HashMap<Revision, usize>,
+    parent_rev: Revision,
+) -> Option<TimeStampedPathCopies> {
+    let count = children_count.get_mut(&parent_rev)?;
+    *count -= 1;
+    if *count == 0 {
+        match all_copies.remove(&parent_rev) {
+            Some(c) => Some(c),
+            None => Some(TimeStampedPathCopies::default()),
+        }
+    } else {
+        match all_copies.get(&parent_rev) {
+            Some(c) => Some(c.clone()),
+            None => Some(TimeStampedPathCopies::default()),
+        }
+    }
+}
+
 /// Combine ChangedFiles with some existing PathCopies information and return
 /// the result
 fn add_from_changes(
--- a/rust/hg-cpython/src/copy_tracing.rs	Mon Dec 14 11:32:20 2020 +0100
+++ b/rust/hg-cpython/src/copy_tracing.rs	Mon Dec 14 11:32:24 2020 +0100
@@ -23,7 +23,7 @@
 pub fn combine_changeset_copies_wrapper(
     py: Python,
     revs: PyList,
-    children: PyDict,
+    children_count: PyDict,
     target_rev: Revision,
     rev_info: PyObject,
     is_ancestor: PyObject,
@@ -93,20 +93,15 @@

             (p1, p2, files)
         });
-    let children: PyResult<_> = children
+    let children_count: PyResult<_> = children_count
         .items(py)
         .iter()
-        .map(|(k, v)| {
-            let v: &PyList = v.cast_as(py)?;
-            let v: PyResult<_> =
-                v.iter(py).map(|child| Ok(child.extract(py)?)).collect();
-            Ok((k.extract(py)?, v?))
-        })
+        .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
         .collect();

     let res = combine_changeset_copies(
         revs?,
-        children?,
+        children_count?,
         target_rev,
         rev_info_maker,
         &is_ancestor_wrap,