comparison mercurial/revset.py @ 11275:c9ce8ecd6ca1

revset: introduce revset core
author Matt Mackall <mpm@selenic.com>
date Tue, 01 Jun 2010 11:18:57 -0500
parents
children 7df88cdf47fd
comparison
equal deleted inserted replaced
11274:77272d28b53f 11275:c9ce8ecd6ca1
1 # revset.py - revision set queries for mercurial
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 import re
9 import parser, util, hg
10 import match as _match
11
12 elements = {
13 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
14 "-": (19, ("negate", 19), ("minus", 19)),
15 "..": (17, None, ("dagrange", 17)),
16 ":": (15, None, ("range", 15)),
17 "not": (10, ("not", 10)),
18 "!": (10, ("not", 10)),
19 "and": (5, None, ("and", 5)),
20 "&": (5, None, ("and", 5)),
21 "or": (4, None, ("or", 4)),
22 "|": (4, None, ("or", 4)),
23 "+": (4, None, ("or", 4)),
24 ",": (2, None, ("list", 2)),
25 ")": (0, None, None),
26 "symbol": (0, ("symbol",), None),
27 "string": (0, ("string",), None),
28 "end": (0, None, None),
29 }
30
31 keywords = set(['and', 'or', 'not'])
32
33 def tokenize(program):
34 pos, l = 0, len(program)
35 while pos < l:
36 c = program[pos]
37 if c.isspace(): # skip inter-token whitespace
38 pass
39 elif c in "():,-|&+!": # handle simple operators
40 yield (c, None)
41 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
42 yield ('..', None)
43 pos += 1 # skip ahead
44 elif c in '"\'': # handle quoted strings
45 pos += 1
46 s = pos
47 while pos < l: # find closing quote
48 d = program[pos]
49 if d == '\\': # skip over escaped characters
50 pos += 2
51 continue
52 if d == c:
53 yield ('string', program[s:pos].decode('string-escape'))
54 break
55 pos += 1
56 else:
57 raise "unterminated string"
58 elif c.isalnum() or c in '.': # gather up a symbol/keyword
59 s = pos
60 pos += 1
61 while pos < l: # find end of symbol
62 d = program[pos]
63 if not (d.isalnum() or d in "._"):
64 break
65 if d == '.' and program[pos - 1] == '.': # special case for ..
66 pos -= 1
67 break
68 pos += 1
69 sym = program[s:pos]
70 if sym in keywords: # operator keywords
71 yield (sym, None)
72 else:
73 yield ('symbol', sym)
74 pos -= 1
75 else:
76 raise "syntax error at %d" % pos
77 pos += 1
78 yield ('end', None)
79
80 # helpers
81
82 def getstring(x, err):
83 if x[0] == 'string' or x[0] == 'symbol':
84 return x[1]
85 raise err
86
87 def getlist(x):
88 if not x:
89 return []
90 if x[0] == 'list':
91 return getlist(x[1]) + [x[2]]
92 return [x]
93
94 def getpair(x, err):
95 l = getlist(x)
96 if len(l) != 2:
97 raise err
98 return l
99
100 def getset(repo, subset, x):
101 if not x:
102 raise "missing argument"
103 return methods[x[0]](repo, subset, *x[1:])
104
105 # operator methods
106
107 def negate(repo, subset, x):
108 return getset(repo, subset,
109 ('string', '-' + getstring(x, "can't negate that")))
110
111 def stringset(repo, subset, x):
112 x = repo[x].rev()
113 if x in subset:
114 return [x]
115 return []
116
117 def symbolset(repo, subset, x):
118 if x in symbols:
119 raise "can't use %s here" % x
120 return stringset(repo, subset, x)
121
122 def rangeset(repo, subset, x, y):
123 m = getset(repo, subset, x)[0]
124 n = getset(repo, subset, y)[-1]
125 if m < n:
126 return range(m, n + 1)
127 return range(m, n - 1, -1)
128
129 def dagrangeset(repo, subset, x, y):
130 return andset(repo, subset,
131 ('func', ('symbol', 'descendants'), x),
132 ('func', ('symbol', 'ancestors'), y))
133
134 def andset(repo, subset, x, y):
135 if weight(x, True) > weight(y, True):
136 x, y = y, x
137 return getset(repo, getset(repo, subset, x), y)
138
139 def orset(repo, subset, x, y):
140 if weight(y, False) < weight(x, False):
141 x, y = y, x
142 s = set(getset(repo, subset, x))
143 s |= set(getset(repo, [r for r in subset if r not in s], y))
144 return [r for r in subset if r in s]
145
146 def notset(repo, subset, x):
147 s = set(getset(repo, subset, x))
148 return [r for r in subset if r not in s]
149
150 def minusset(repo, subset, x, y):
151 if weight(x, True) > weight(y, True):
152 return getset(repo, notset(repo, subset, y), x)
153 return notset(repo, getset(repo, subset, x), y)
154
155 def listset(repo, subset, a, b):
156 raise "can't use a list in this context"
157
158 def func(repo, subset, a, b):
159 if a[0] == 'symbol' and a[1] in symbols:
160 return symbols[a[1]](repo, subset, b)
161 raise "that's not a function: %s" % a[1]
162
163 # functions
164
165 def p1(repo, subset, x):
166 ps = set()
167 cl = repo.changelog
168 for r in getset(repo, subset, x):
169 ps.add(cl.parentrevs(r)[0])
170 return [r for r in subset if r in ps]
171
172 def p2(repo, subset, x):
173 ps = set()
174 cl = repo.changelog
175 for r in getset(repo, subset, x):
176 ps.add(cl.parentrevs(r)[1])
177 return [r for r in subset if r in ps]
178
179 def parents(repo, subset, x):
180 ps = set()
181 cl = repo.changelog
182 for r in getset(repo, subset, x):
183 ps.update(cl.parentrevs(r))
184 return [r for r in subset if r in ps]
185
186 def maxrev(repo, subset, x):
187 s = getset(repo, subset, x)
188 if s:
189 m = max(s)
190 if m in subset:
191 return [m]
192 return []
193
194 def limit(repo, subset, x):
195 l = getpair(x, "limit wants two args")
196 try:
197 lim = int(getstring(l[1], "limit wants a number"))
198 except ValueError:
199 raise "wants a number"
200 return getset(repo, subset, l[0])[:lim]
201
202 def children(repo, subset, x):
203 cs = set()
204 cl = repo.changelog
205 s = set(getset(repo, subset, x))
206 for r in xrange(0, len(repo)):
207 for p in cl.parentrevs(r):
208 if p in s:
209 cs.add(r)
210 return [r for r in subset if r in cs]
211
212 def branch(repo, subset, x):
213 s = getset(repo, range(len(repo)), x)
214 b = set()
215 for r in s:
216 b.add(repo[r].branch())
217 s = set(s)
218 return [r for r in subset if r in s or repo[r].branch() in b]
219
220 def ancestor(repo, subset, x):
221 l = getpair(x, "ancestor wants two args")
222 a = getset(repo, subset, l[0])
223 b = getset(repo, subset, l[1])
224 if len(a) > 1 or len(b) > 1:
225 raise "arguments to ancestor must be single revisions"
226 return [repo[a[0]].ancestor(repo[b[0]]).rev()]
227
228 def ancestors(repo, subset, x):
229 args = getset(repo, range(len(repo)), x)
230 s = set(repo.changelog.ancestors(*args)) | set(args)
231 return [r for r in subset if r in s]
232
233 def descendants(repo, subset, x):
234 args = getset(repo, range(len(repo)), x)
235 s = set(repo.changelog.descendants(*args)) | set(args)
236 return [r for r in subset if r in s]
237
238 def follow(repo, subset, x):
239 if x:
240 raise "follow takes no args"
241 p = repo['.'].rev()
242 s = set(repo.changelog.ancestors(p)) | set([p])
243 return [r for r in subset if r in s]
244
245 def date(repo, subset, x):
246 ds = getstring(x, 'date wants a string')
247 dm = util.matchdate(ds)
248 return [r for r in subset if dm(repo[r].date()[0])]
249
250 def keyword(repo, subset, x):
251 kw = getstring(x, "keyword wants a string").lower()
252 l = []
253 for r in subset:
254 c = repo[r]
255 t = " ".join(c.files() + [c.user(), c.description()])
256 if kw in t.lower():
257 l.append(r)
258 return l
259
260 def grep(repo, subset, x):
261 gr = re.compile(getstring(x, "grep wants a string"))
262 l = []
263 for r in subset:
264 c = repo[r]
265 for e in c.files() + [c.user(), c.description()]:
266 if gr.search(e):
267 l.append(r)
268 continue
269 return l
270
271 def author(repo, subset, x):
272 n = getstring(x, "author wants a string").lower()
273 return [r for r in subset if n in repo[r].user().lower()]
274
275 def hasfile(repo, subset, x):
276 pat = getstring(x, "file wants a pattern")
277 m = _match.match(repo.root, repo.getcwd(), [pat])
278 s = []
279 for r in subset:
280 for f in repo[r].files():
281 if m(f):
282 s.append(r)
283 continue
284 return s
285
286 def contains(repo, subset, x):
287 pat = getstring(x, "file wants a pattern")
288 m = _match.match(repo.root, repo.getcwd(), [pat])
289 s = []
290 if m.files() == [pat]:
291 for r in subset:
292 if pat in repo[r]:
293 s.append(r)
294 continue
295 else:
296 for r in subset:
297 c = repo[r]
298 for f in repo[r].manifest():
299 if m(f):
300 s.append(r)
301 continue
302 return s
303
304 def checkstatus(repo, subset, pat, field):
305 m = _match.match(repo.root, repo.getcwd(), [pat])
306 s = []
307 fast = (m.files() == [pat])
308 for r in subset:
309 c = repo[r]
310 if fast:
311 if pat not in c.files():
312 continue
313 else:
314 for f in c.files():
315 if m(f):
316 break
317 else:
318 continue
319 files = repo.status(c.p1().node(), c.node())[field]
320 if fast:
321 if pat in files:
322 s.append(r)
323 continue
324 else:
325 for f in files:
326 if m(f):
327 s.append(r)
328 continue
329 return s
330
331 def modifies(repo, subset, x):
332 pat = getstring(x, "modifies wants a pattern")
333 return checkstatus(repo, subset, pat, 0)
334
335 def adds(repo, subset, x):
336 pat = getstring(x, "adds wants a pattern")
337 return checkstatus(repo, subset, pat, 1)
338
339 def removes(repo, subset, x):
340 pat = getstring(x, "removes wants a pattern")
341 return checkstatus(repo, subset, pat, 2)
342
343 def merge(repo, subset, x):
344 if x:
345 raise "merge takes no args"
346 cl = repo.changelog
347 return [r for r in subset if cl.parentrevs(r)[1] != -1]
348
349 def closed(repo, subset, x):
350 return [r for r in subset if repo[r].extra('close')]
351
352 def head(repo, subset, x):
353 hs = set()
354 for b, ls in repo.branchmap().iteritems():
355 hs.update(repo[h].rev() for h in ls)
356 return [r for r in subset if r in hs]
357
358 def reverse(repo, subset, x):
359 l = getset(repo, subset, x)
360 l.reverse()
361 return l
362
363 def sort(repo, subset, x):
364 l = getlist(x)
365 keys = "rev"
366 if len(l) == 2:
367 keys = getstring(l[1], "sort spec must be a string")
368
369 s = l[0]
370 keys = keys.split()
371 l = []
372 def invert(s):
373 return "".join(chr(255 - ord(c)) for c in s)
374 for r in getset(repo, subset, s):
375 c = repo[r]
376 e = []
377 for k in keys:
378 if k == 'rev':
379 e.append(r)
380 elif k == '-rev':
381 e.append(-r)
382 elif k == 'branch':
383 e.append(c.branch())
384 elif k == '-branch':
385 e.append(invert(c.branch()))
386 elif k == 'desc':
387 e.append(c.description())
388 elif k == '-desc':
389 e.append(invert(c.description()))
390 elif k in 'user author':
391 e.append(c.user())
392 elif k in '-user -author':
393 e.append(invert(c.user()))
394 elif k == 'date':
395 e.append(c.date()[0])
396 elif k == '-date':
397 e.append(-c.date()[0])
398 else:
399 raise "unknown sort key %r" % k
400 e.append(r)
401 l.append(e)
402 l.sort()
403 return [e[-1] for e in l]
404
405 def getall(repo, subset, x):
406 return subset
407
408 def heads(repo, subset, x):
409 s = getset(repo, subset, x)
410 ps = set(parents(repo, subset, x))
411 return [r for r in s if r not in ps]
412
413 def roots(repo, subset, x):
414 s = getset(repo, subset, x)
415 cs = set(children(repo, subset, x))
416 return [r for r in s if r not in cs]
417
418 def outgoing(repo, subset, x):
419 l = getlist(x)
420 if len(l) == 1:
421 dest = getstring(l[0], "outgoing wants a repo path")
422 else:
423 dest = ''
424 dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
425 dest, branches = hg.parseurl(dest)
426 other = hg.repository(hg.remoteui(repo, {}), dest)
427 repo.ui.pushbuffer()
428 o = repo.findoutgoing(other)
429 repo.ui.popbuffer()
430 cl = repo.changelog
431 o = set([cl.rev(r) for r in repo.changelog.nodesbetween(o, None)[0]])
432 print 'out', dest, o
433 return [r for r in subset if r in o]
434
435 symbols = {
436 "ancestor": ancestor,
437 "ancestors": ancestors,
438 "descendants": descendants,
439 "follow": follow,
440 "merge": merge,
441 "reverse": reverse,
442 "sort": sort,
443 "branch": branch,
444 "keyword": keyword,
445 "author": author,
446 "user": author,
447 "date": date,
448 "grep": grep,
449 "p1": p1,
450 "p2": p2,
451 "parents": parents,
452 "children": children,
453 "max": maxrev,
454 "limit": limit,
455 "file": hasfile,
456 "contains": contains,
457 "heads": heads,
458 "roots": roots,
459 "all": getall,
460 "closed": closed,
461 "head": head,
462 "modifies": modifies,
463 "adds": adds,
464 "removes": removes,
465 "outgoing": outgoing,
466 }
467
468 methods = {
469 "negate": negate,
470 "minus": minusset,
471 "range": rangeset,
472 "dagrange": dagrangeset,
473 "string": stringset,
474 "symbol": symbolset,
475 "and": andset,
476 "or": orset,
477 "not": notset,
478 "list": listset,
479 "func": func,
480 "group": lambda r, s, x: getset(r, s, x),
481 }
482
483 def weight(x, small):
484 smallbonus = 1
485 if small:
486 smallbonus = .5
487
488 op = x[0]
489 if op in 'string symbol negate':
490 return smallbonus # single revisions are small
491 elif op == 'and' or op == 'dagrange':
492 return min(weight(x[1], True), weight(x[2], True))
493 elif op in 'or -':
494 return max(weight(x[1], False), weight(x[2], False))
495 elif op == 'not':
496 return weight(x[1], not small)
497 elif op == 'group':
498 return weight(x[1], small)
499 elif op == 'range':
500 return weight(x[1], small) + weight(x[2], small)
501 elif op == 'func':
502 f = getstring(x[1], "not a symbol")
503 if f in "grep date user author keyword branch file":
504 return 10 # slow
505 elif f in "modifies adds removes":
506 return 30 # slower
507 elif f == "contains":
508 return 100 # very slow
509 elif f == "ancestor":
510 return (weight(x[1][1], small) +
511 weight(x[1][2], small)) * smallbonus
512 elif f == "reverse limit":
513 return weight(x[1], small)
514 elif f in "sort":
515 base = x[1]
516 spec = "rev"
517 if x[1][0] == 'list':
518 base = x[1][1]
519 spec = x[1][2]
520 return max(weight(base, small), 10)
521 else:
522 return 1
523
524 parse = parser.parser(tokenize, elements).parse
525
526 def match(spec):
527 tree = parse(spec)
528 def mfunc(repo, subset):
529 return getset(repo, subset, tree)
530 return mfunc