grep: move match and diff logic to new module
commands.grep() has lots of functions and classes. Let's split it into
reusable components so we can leverage them to implement a revset predicate
for 'hg grep --diff'. I want to do 'hg log -r "diff(pattern)"'.
--- a/mercurial/commands.py Wed Sep 09 15:17:26 2020 +0900
+++ b/mercurial/commands.py Wed Sep 09 15:23:49 2020 +0900
@@ -7,7 +7,6 @@
from __future__ import absolute_import
-import difflib
import errno
import os
import re
@@ -41,6 +40,7 @@
filemerge,
formatter,
graphmod,
+ grep as grepmod,
hbisect,
help,
hg,
@@ -3399,48 +3399,6 @@
sep = eol = b'\0'
getfile = util.lrucachefunc(repo.file)
-
- def matchlines(body, regexp):
- begin = 0
- linenum = 0
- while begin < len(body):
- match = regexp.search(body, begin)
- if not match:
- break
- mstart, mend = match.span()
- linenum += body.count(b'\n', begin, mstart) + 1
- lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
- begin = body.find(b'\n', mend) + 1 or len(body) + 1
- lend = begin - 1
- yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]
-
- class linestate(object):
- def __init__(self, line, linenum, colstart, colend):
- self.line = line
- self.linenum = linenum
- self.colstart = colstart
- self.colend = colend
-
- def __hash__(self):
- return hash(self.line)
-
- def __eq__(self, other):
- return self.line == other.line
-
- def findpos(self, regexp):
- """Iterate all (start, end) indices of matches"""
- yield self.colstart, self.colend
- p = self.colend
- while p < len(self.line):
- m = regexp.search(self.line, p)
- if not m:
- break
- if m.end() == p:
- p += 1
- else:
- yield m.span()
- p = m.end()
-
matches = {}
copies = {}
@@ -3450,25 +3408,10 @@
if body is None:
return
- for lnum, cstart, cend, line in matchlines(body, regexp):
- s = linestate(line, lnum, cstart, cend)
+ for lnum, cstart, cend, line in grepmod.matchlines(body, regexp):
+ s = grepmod.linestate(line, lnum, cstart, cend)
m.append(s)
- def difflinestates(a, b):
- sm = difflib.SequenceMatcher(None, a, b)
- for tag, alo, ahi, blo, bhi in sm.get_opcodes():
- if tag == 'insert':
- for i in pycompat.xrange(blo, bhi):
- yield (b'+', b[i])
- elif tag == 'delete':
- for i in pycompat.xrange(alo, ahi):
- yield (b'-', a[i])
- elif tag == 'replace':
- for i in pycompat.xrange(alo, ahi):
- yield (b'-', a[i])
- for i in pycompat.xrange(blo, bhi):
- yield (b'+', b[i])
-
uipathfn = scmutil.getuipathfn(repo)
def display(fm, fn, ctx, pstates, states):
@@ -3493,7 +3436,7 @@
fieldnamemap = {b'linenumber': b'lineno'}
if diff:
- iter = difflinestates(pstates, states)
+ iter = grepmod.difflinestates(pstates, states)
else:
iter = [(b'', l) for l in states]
for change, l in iter:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/grep.py Wed Sep 09 15:23:49 2020 +0900
@@ -0,0 +1,71 @@
+# grep.py - logic for history walk and grep
+#
+# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import difflib
+
+from . import pycompat
+
+
+def matchlines(body, regexp):
+ begin = 0
+ linenum = 0
+ while begin < len(body):
+ match = regexp.search(body, begin)
+ if not match:
+ break
+ mstart, mend = match.span()
+ linenum += body.count(b'\n', begin, mstart) + 1
+ lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
+ begin = body.find(b'\n', mend) + 1 or len(body) + 1
+ lend = begin - 1
+ yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]
+
+
+class linestate(object):
+ def __init__(self, line, linenum, colstart, colend):
+ self.line = line
+ self.linenum = linenum
+ self.colstart = colstart
+ self.colend = colend
+
+ def __hash__(self):
+ return hash(self.line)
+
+ def __eq__(self, other):
+ return self.line == other.line
+
+ def findpos(self, regexp):
+ """Iterate all (start, end) indices of matches"""
+ yield self.colstart, self.colend
+ p = self.colend
+ while p < len(self.line):
+ m = regexp.search(self.line, p)
+ if not m:
+ break
+ if m.end() == p:
+ p += 1
+ else:
+ yield m.span()
+ p = m.end()
+
+
+def difflinestates(a, b):
+ sm = difflib.SequenceMatcher(None, a, b)
+ for tag, alo, ahi, blo, bhi in sm.get_opcodes():
+ if tag == 'insert':
+ for i in pycompat.xrange(blo, bhi):
+ yield (b'+', b[i])
+ elif tag == 'delete':
+ for i in pycompat.xrange(alo, ahi):
+ yield (b'-', a[i])
+ elif tag == 'replace':
+ for i in pycompat.xrange(alo, ahi):
+ yield (b'-', a[i])
+ for i in pycompat.xrange(blo, bhi):
+ yield (b'+', b[i])