changeset 45696:de6f2afc0247

grep: move match and diff logic to new module commands.grep() has lots of functions and classes. Let's split it into reusable components so we can leverage them to implement a revset predicate for 'hg grep --diff'. I want to do 'hg log -r "diff(pattern)"'.
author Yuya Nishihara <yuya@tcha.org>
date Wed, 09 Sep 2020 15:23:49 +0900
parents 760bb4d74aad
children 494642ed3c50
files mercurial/commands.py mercurial/grep.py
diffstat 2 files changed, 75 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/commands.py	Wed Sep 09 15:17:26 2020 +0900
+++ b/mercurial/commands.py	Wed Sep 09 15:23:49 2020 +0900
@@ -7,7 +7,6 @@
 
 from __future__ import absolute_import
 
-import difflib
 import errno
 import os
 import re
@@ -41,6 +40,7 @@
     filemerge,
     formatter,
     graphmod,
+    grep as grepmod,
     hbisect,
     help,
     hg,
@@ -3399,48 +3399,6 @@
         sep = eol = b'\0'
 
     getfile = util.lrucachefunc(repo.file)
-
-    def matchlines(body, regexp):
-        begin = 0
-        linenum = 0
-        while begin < len(body):
-            match = regexp.search(body, begin)
-            if not match:
-                break
-            mstart, mend = match.span()
-            linenum += body.count(b'\n', begin, mstart) + 1
-            lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
-            begin = body.find(b'\n', mend) + 1 or len(body) + 1
-            lend = begin - 1
-            yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]
-
-    class linestate(object):
-        def __init__(self, line, linenum, colstart, colend):
-            self.line = line
-            self.linenum = linenum
-            self.colstart = colstart
-            self.colend = colend
-
-        def __hash__(self):
-            return hash(self.line)
-
-        def __eq__(self, other):
-            return self.line == other.line
-
-        def findpos(self, regexp):
-            """Iterate all (start, end) indices of matches"""
-            yield self.colstart, self.colend
-            p = self.colend
-            while p < len(self.line):
-                m = regexp.search(self.line, p)
-                if not m:
-                    break
-                if m.end() == p:
-                    p += 1
-                else:
-                    yield m.span()
-                    p = m.end()
-
     matches = {}
     copies = {}
 
@@ -3450,25 +3408,10 @@
         if body is None:
             return
 
-        for lnum, cstart, cend, line in matchlines(body, regexp):
-            s = linestate(line, lnum, cstart, cend)
+        for lnum, cstart, cend, line in grepmod.matchlines(body, regexp):
+            s = grepmod.linestate(line, lnum, cstart, cend)
             m.append(s)
 
-    def difflinestates(a, b):
-        sm = difflib.SequenceMatcher(None, a, b)
-        for tag, alo, ahi, blo, bhi in sm.get_opcodes():
-            if tag == 'insert':
-                for i in pycompat.xrange(blo, bhi):
-                    yield (b'+', b[i])
-            elif tag == 'delete':
-                for i in pycompat.xrange(alo, ahi):
-                    yield (b'-', a[i])
-            elif tag == 'replace':
-                for i in pycompat.xrange(alo, ahi):
-                    yield (b'-', a[i])
-                for i in pycompat.xrange(blo, bhi):
-                    yield (b'+', b[i])
-
     uipathfn = scmutil.getuipathfn(repo)
 
     def display(fm, fn, ctx, pstates, states):
@@ -3493,7 +3436,7 @@
 
         fieldnamemap = {b'linenumber': b'lineno'}
         if diff:
-            iter = difflinestates(pstates, states)
+            iter = grepmod.difflinestates(pstates, states)
         else:
             iter = [(b'', l) for l in states]
         for change, l in iter:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/grep.py	Wed Sep 09 15:23:49 2020 +0900
@@ -0,0 +1,71 @@
+# grep.py - logic for history walk and grep
+#
+# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import difflib
+
+from . import pycompat
+
+
+def matchlines(body, regexp):
+    begin = 0
+    linenum = 0
+    while begin < len(body):
+        match = regexp.search(body, begin)
+        if not match:
+            break
+        mstart, mend = match.span()
+        linenum += body.count(b'\n', begin, mstart) + 1
+        lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
+        begin = body.find(b'\n', mend) + 1 or len(body) + 1
+        lend = begin - 1
+        yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]
+
+
+class linestate(object):
+    def __init__(self, line, linenum, colstart, colend):
+        self.line = line
+        self.linenum = linenum
+        self.colstart = colstart
+        self.colend = colend
+
+    def __hash__(self):
+        return hash(self.line)
+
+    def __eq__(self, other):
+        return self.line == other.line
+
+    def findpos(self, regexp):
+        """Iterate all (start, end) indices of matches"""
+        yield self.colstart, self.colend
+        p = self.colend
+        while p < len(self.line):
+            m = regexp.search(self.line, p)
+            if not m:
+                break
+            if m.end() == p:
+                p += 1
+            else:
+                yield m.span()
+                p = m.end()
+
+
+def difflinestates(a, b):
+    sm = difflib.SequenceMatcher(None, a, b)
+    for tag, alo, ahi, blo, bhi in sm.get_opcodes():
+        if tag == 'insert':
+            for i in pycompat.xrange(blo, bhi):
+                yield (b'+', b[i])
+        elif tag == 'delete':
+            for i in pycompat.xrange(alo, ahi):
+                yield (b'-', a[i])
+        elif tag == 'replace':
+            for i in pycompat.xrange(alo, ahi):
+                yield (b'-', a[i])
+            for i in pycompat.xrange(blo, bhi):
+                yield (b'+', b[i])