Add grep command.
authorBryan O'Sullivan <bos@serpentine.com>
Thu, 25 Aug 2005 02:00:03 -0700
changeset 1057 2fd15d743b3b
parent 1056 34be48b4ca85
child 1058 402279974aea
Add grep command. It currently searches all revs of every matching file. I'll change this soon so that it can still do this, but it will not be the default behaviour. Many options are unimplemented. There's only one output mode. Binary files are not handled yet.
mercurial/commands.py
--- a/mercurial/commands.py	Wed Aug 24 22:25:55 2005 -0700
+++ b/mercurial/commands.py	Thu Aug 25 02:00:03 2005 -0700
@@ -46,6 +46,80 @@
     files, matchfn, results = makewalk(repo, pats, opts, head)
     for r in results: yield r
 
+def walkchangerevs(ui, repo, cwd, pats, opts):
+    # This code most commonly needs to iterate backwards over the
+    # history it is interested in.  Doing so has awful
+    # (quadratic-looking) performance, so we use iterators in a
+    # "windowed" way.  Walk forwards through a window of revisions,
+    # yielding them in the desired order, and walk the windows
+    # themselves backwards.
+    cwd = repo.getcwd()
+    if not pats and cwd:
+        opts['include'] = [os.path.join(cwd, i) for i in opts['include']]
+        opts['exclude'] = [os.path.join(cwd, x) for x in opts['exclude']]
+    files, matchfn, anypats = matchpats(repo, (pats and cwd) or '',
+                                        pats, opts)
+    revs = map(int, revrange(ui, repo, opts['rev'] or ['tip:0']))
+    wanted = {}
+    slowpath = anypats
+    window = 300
+    fncache = {}
+    if not slowpath and not files:
+        # No files, no patterns.  Display all revs.
+        wanted = dict(zip(revs, revs))
+    if not slowpath:
+        # Only files, no patterns.  Check the history of each file.
+        def filerevgen(filelog):
+            for i in xrange(filelog.count() - 1, -1, -window):
+                revs = []
+                for j in xrange(max(0, i - window), i + 1):
+                    revs.append(filelog.linkrev(filelog.node(j)))
+                revs.reverse()
+                for rev in revs:
+                    yield rev
+
+        minrev, maxrev = min(revs), max(revs)
+        for file in files:
+            filelog = repo.file(file)
+            # A zero count may be a directory or deleted file, so
+            # try to find matching entries on the slow path.
+            if filelog.count() == 0:
+                slowpath = True
+                break
+            for rev in filerevgen(filelog):
+                if rev <= maxrev:
+                    if rev < minrev: break
+                    fncache.setdefault(rev, [])
+                    fncache[rev].append(file)
+                    wanted[rev] = 1
+    if slowpath:
+        # The slow path checks files modified in every changeset.
+        def changerevgen():
+            for i in xrange(repo.changelog.count() - 1, -1, -window):
+                for j in xrange(max(0, i - window), i + 1):
+                    yield j, repo.changelog.read(repo.lookup(str(j)))[3]
+
+        for rev, changefiles in changerevgen():
+            matches = filter(matchfn, changefiles)
+            if matches:
+                fncache[rev] = matches
+                wanted[rev] = 1
+
+    for i in xrange(0, len(revs), window):
+        yield 'window', revs[0] < revs[-1], revs[-1]
+        nrevs = [rev for rev in revs[i : min(i + window, len(revs))]
+                 if rev in wanted]
+        srevs = list(nrevs)
+        srevs.sort()
+        for rev in srevs:
+            fns = fncache.get(rev)
+            if not fns:
+                fns = repo.changelog.read(repo.lookup(str(rev)))[3]
+                fns = filter(matchfn, fns)
+            yield 'add', rev, fns
+        for rev in nrevs:
+            yield 'iter', rev, None
+
 revrangesep = ':'
 
 def revrange(ui, repo, revs, revlog=None):
@@ -716,6 +790,89 @@
             if not exact: ui.status('forgetting ', rel, '\n')
     repo.forget(forget)
 
+def grep(ui, repo, pattern = None, *pats, **opts):
+    if pattern is None: pattern = opts['regexp']
+    if not pattern: raise util.Abort('no pattern to search for')
+    reflags = 0
+    if opts['ignore_case']: reflags |= re.I
+    regexp = re.compile(pattern, reflags)
+    sep, end = ':', '\n'
+    if opts['null'] or opts['print0']: sep = end = '\0'
+
+    fcache = {}
+    def getfile(fn):
+        if fn not in fcache:
+            fcache[fn] = repo.file(fn)
+        return fcache[fn]
+
+    def matchlines(body):
+        for match in regexp.finditer(body):
+            start, end = match.span()
+            lnum = body.count('\n', 0, start) + 1
+            lstart = body.rfind('\n', 0, start) + 1
+            lend = body.find('\n', end)
+            yield lnum, start - lstart, end - lstart, body[lstart:lend]
+
+    class linestate:
+        def __init__(self, line, linenum, colstart, colend):
+            self.line = line
+            self.linenum = linenum
+            self.colstart = colstart
+            self.colend = colend
+        def __eq__(self, other): return self.line == other.line
+        def __hash__(self): return hash(self.line)
+
+    matches = {}
+    def grepbody(fn, rev, body):
+        matches[rev].setdefault(fn, {})
+        m = matches[rev][fn]
+        for lnum, cstart, cend, line in matchlines(body):
+            s = linestate(line, lnum, cstart, cend)
+            m[s] = s
+
+    prev = {}
+    def display(fn, rev, states, prevstates):
+        diff = list(set(states).symmetric_difference(set(prevstates)))
+        diff.sort(lambda x, y: cmp(x.linenum, y.linenum))
+        for l in diff:
+            if incrementing:
+                change = ((l in prevstates) and '-') or '+'
+                r = rev
+            else:
+                change = ((l in states) and '-') or '+'
+                r = prev[fn]
+            ui.write('%s:%s:%s:%s%s\n' % (fn, r, l.linenum, change, l.line))
+
+    fstate = {}
+    for st, rev, fns in walkchangerevs(ui, repo, repo.getcwd(), pats, opts):
+        if st == 'window':
+            incrementing = rev
+            matches.clear()
+        elif st == 'add':
+            change = repo.changelog.read(repo.lookup(str(rev)))
+            mf = repo.manifest.read(change[0])
+            matches[rev] = {}
+            for fn in fns:
+                fstate.setdefault(fn, {})
+                try:
+                    grepbody(fn, rev, getfile(fn).read(mf[fn]))
+                except KeyError:
+                    pass
+        elif st == 'iter':
+            states = matches[rev].items()
+            states.sort()
+            for fn, m in states:
+                if incrementing or fstate[fn]:
+                    display(fn, rev, m, fstate[fn])
+                fstate[fn] = m
+                prev[fn] = rev
+
+    if not incrementing:
+        fstate = fstate.items()
+        fstate.sort()
+        for fn, state in fstate:
+            display(fn, rev, {}, state)
+
 def heads(ui, repo, **opts):
     """show current repository heads"""
     heads = repo.changelog.heads()
@@ -845,96 +1002,42 @@
 
 def log(ui, repo, *pats, **opts):
     """show revision history of entire repository or files"""
-    # This code most commonly needs to iterate backwards over the
-    # history it is interested in.  This has awful (quadratic-looking)
-    # performance, so we use iterators that walk forwards through
-    # windows of revisions, yielding revisions in reverse order, while
-    # walking the windows backwards.
+    class dui:
+        # Implement and delegate some ui protocol.  Save hunks of
+        # output for later display in the desired order.
+        def __init__(self, ui):
+            self.ui = ui
+            self.hunk = {}
+        def bump(self, rev):
+            self.rev = rev
+            self.hunk[rev] = []
+        def note(self, *args):
+            if self.verbose: self.write(*args)
+        def status(self, *args):
+            if not self.quiet: self.write(*args)
+        def write(self, *args):
+            self.hunk[self.rev].append(args)
+        def __getattr__(self, key):
+            return getattr(self.ui, key)
     cwd = repo.getcwd()
     if not pats and cwd:
         opts['include'] = [os.path.join(cwd, i) for i in opts['include']]
         opts['exclude'] = [os.path.join(cwd, x) for x in opts['exclude']]
-    files, matchfn, anypats = matchpats(repo, (pats and cwd) or '',
-                                        pats, opts)
-    revs = map(int, revrange(ui, repo, opts['rev'] or ['tip:0']))
-    wanted = {}
-    slowpath = anypats
-    window = 300
-    if not slowpath and not files:
-        # No files, no patterns.  Display all revs.
-        wanted = dict(zip(revs, revs))
-    if not slowpath:
-        # Only files, no patterns.  Check the history of each file.
-        def filerevgen(filelog):
-            for i in xrange(filelog.count() - 1, -1, -window):
-                print "filelog"
-                revs = []
-                for j in xrange(max(0, i - window), i + 1):
-                    revs.append(filelog.linkrev(filelog.node(j)))
-                revs.reverse()
-                for rev in revs:
-                    yield rev
-
-        minrev, maxrev = min(revs), max(revs)
-        for filelog in map(repo.file, files):
-            # A zero count may be a directory or deleted file, so
-            # try to find matching entries on the slow path.
-            if filelog.count() == 0:
-                slowpath = True
-                break
-            for rev in filerevgen(filelog):
-                if rev <= maxrev:
-                    if rev < minrev: break
-                    wanted[rev] = 1
-    if slowpath:
-        # The slow path checks files modified in every changeset.
-        def mfrevgen():
-            for i in xrange(repo.changelog.count() - 1, -1, -window):
-                for j in xrange(max(0, i - window), i + 1):
-                    yield j, repo.changelog.read(repo.lookup(str(j)))[3]
-
-        for rev, mf in mfrevgen():
-            if filter(matchfn, mf):
-                wanted[rev] = 1
-
-    def changerevgen():
-        class dui:
-            # Implement and delegate some ui protocol.  Save hunks of
-            # output for later display in the desired order.
-            def __init__(self, ui):
-                self.ui = ui
-                self.hunk = {}
-            def bump(self, rev):
-                self.rev = rev
-                self.hunk[rev] = []
-            def note(self, *args):
-                if self.verbose: self.write(*args)
-            def status(self, *args):
-                if not self.quiet: self.write(*args)
-            def write(self, *args):
-                self.hunk[self.rev].append(args)
-            def __getattr__(self, key):
-                return getattr(self.ui, key)
-        for i in xrange(0, len(revs), window):
-            nrevs = [rev for rev in revs[i : min(i + window, len(revs))]
-                     if rev in wanted]
-            srevs = list(nrevs)
-            srevs.sort()
+    for st, rev, fns in walkchangerevs(ui, repo, (pats and cwd) or '', pats,
+                                       opts):
+        if st == 'window':
             du = dui(ui)
-            for rev in srevs:
-                du.bump(rev)
-                yield rev, du
-            for rev in nrevs:
-                for args in du.hunk[rev]:
-                    ui.write(*args)
-
-    for rev, dui in changerevgen():
-        show_changeset(dui, repo, rev)
-        if opts['patch']:
-            changenode = repo.changelog.node(rev)
-            prev, other = repo.changelog.parents(changenode)
-            dodiff(dui, dui, repo, prev, changenode, files)
-            dui.write("\n\n")
+        elif st == 'add':
+            du.bump(rev)
+            show_changeset(du, repo, rev)
+            if opts['patch']:
+                changenode = repo.changelog.node(rev)
+                prev, other = repo.changelog.parents(changenode)
+                dodiff(du, du, repo, prev, changenode, fns)
+                du.write("\n\n")
+        elif st == 'iter':
+            for args in du.hunk[rev]:
+                ui.write(*args)
 
 def manifest(ui, repo, rev=None):
     """output the latest or given revision of the project manifest"""
@@ -1408,6 +1511,21 @@
          [('I', 'include', [], 'include path in search'),
           ('X', 'exclude', [], 'exclude path from search')],
          "hg forget [OPTION]... FILE..."),
+    "grep": (grep,
+             [('0', 'print0', None, 'terminate file names with NUL'),
+              ('I', 'include', [], 'include path in search'),
+              ('X', 'exclude', [], 'include path in search'),
+              ('Z', 'null', None, 'terminate file names with NUL'),
+              ('a', 'all-revs', '', 'search all revs'),
+              ('e', 'regexp', '', 'pattern to search for'),
+              ('f', 'full-path', None, 'print complete paths'),
+              ('i', 'ignore-case', None, 'ignore case when matching'),
+              ('l', 'files-with-matches', None, 'print names of files with matches'),
+              ('n', 'line-number', '', 'print line numbers'),
+              ('r', 'rev', [], 'search in revision rev'),
+              ('s', 'no-messages', None, 'do not print error messages'),
+              ('v', 'invert-match', None, 'select non-matching lines')],
+             "hg grep [options] [pat] [files]"),
     "heads":
         (heads,
          [('b', 'branches', None, 'find branch info')],