comparison mercurial/dagparser.py @ 11335:3201ff1459dd

dagparser: parses and formats DAGs as concise text As discussed during the sprint. See the doc comment and doctests for specification and examples. This is used in subsequent patches to export revlog and changelog DAGs, and to generate a repo with a given changelog DAG.
author Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
date Thu, 10 Jun 2010 11:48:15 +0200
parents
children 32a9744acf1e
comparison
equal deleted inserted replaced
11334:5bf465160632 11335:3201ff1459dd
1 # dagparser.py - parser and generator for concise description of DAGs
2 #
3 # Copyright 2010 Peter Arrenbrecht <peter@arrenbrecht.ch>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 import re, string
9 import util
10
11 def parsedag(desc):
12 '''parses a DAG from a concise textual description; generates events
13
14 "+n" is a linear run of n nodes based on the current default parent
15 "." is a single node based on the current default parent
16 "$" resets the default parent to -1 (implied at the start);
17 otherwise the default parent is always the last node created
18 "<p" sets the default parent to the backref p
19 "*p" is a fork at parent p, where p is a backref
20 "*p1/p2/.../pn" is a merge of parents p1..pn, where the pi are backrefs
21 "/p2/.../pn" is a merge of the preceding node and p2..pn
22 ":name" defines a label for the preceding node; labels can be redefined
23 "@text" emits an annotation event for text
24 "!command" emits an action event for the current node
25 "!!my command\n" is like "!", but to the end of the line
26 "#...\n" is a comment up to the end of the line
27
28 Whitespace between the above elements is ignored.
29
30 A backref is either
31 * a number n, which references the node curr-n, where curr is the current
32 node, or
33 * the name of a label you placed earlier using ":name", or
34 * empty to denote the default parent.
35
36 All string valued-elements are either strictly alphanumeric, or must
37 be enclosed in double quotes ("..."), with "\" as escape character.
38
39 Generates sequence of
40
41 ('n', (id, [parentids])) for node creation
42 ('l', (id, labelname)) for labels on nodes
43 ('a', text) for annotations
44 ('c', command) for actions (!)
45 ('C', command) for line actions (!!)
46
47 Examples
48 --------
49
50 Example of a complex graph (output not shown for brevity):
51
52 >>> len(list(parsedag("""
53 ...
54 ... +3 # 3 nodes in linear run
55 ... :forkhere # a label for the last of the 3 nodes from above
56 ... +5 # 5 more nodes on one branch
57 ... :mergethis # label again
58 ... <forkhere # set default parent to labelled fork node
59 ... +10 # 10 more nodes on a parallel branch
60 ... @stable # following nodes will be annotated as "stable"
61 ... +5 # 5 nodes in stable
62 ... !addfile # custom command; could trigger new file in next node
63 ... +2 # two more nodes
64 ... /mergethis # merge last node with labelled node
65 ... +4 # 4 more nodes descending from merge node
66 ...
67 ... """)))
68 34
69
70 Empty list:
71
72 >>> list(parsedag(""))
73 []
74
75 A simple linear run:
76
77 >>> list(parsedag("+3"))
78 [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))]
79
80 Some non-standard ways to define such runs:
81
82 >>> list(parsedag("+1+2"))
83 [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))]
84
85 >>> list(parsedag("+1*1*"))
86 [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))]
87
88 >>> list(parsedag("*"))
89 [('n', (0, [-1]))]
90
91 >>> list(parsedag("..."))
92 [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [1]))]
93
94 A fork and a join, using numeric back references:
95
96 >>> list(parsedag("+2*2*/2"))
97 [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [0])), ('n', (3, [2, 1]))]
98
99 >>> list(parsedag("+2<2+1/2"))
100 [('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [0])), ('n', (3, [2, 1]))]
101
102 Placing a label:
103
104 >>> list(parsedag("+1 :mylabel +1"))
105 [('n', (0, [-1])), ('l', (0, 'mylabel')), ('n', (1, [0]))]
106
107 An empty label (silly, really):
108
109 >>> list(parsedag("+1:+1"))
110 [('n', (0, [-1])), ('l', (0, '')), ('n', (1, [0]))]
111
112 Fork and join, but with labels instead of numeric back references:
113
114 >>> list(parsedag("+1:f +1:p2 *f */p2"))
115 [('n', (0, [-1])), ('l', (0, 'f')), ('n', (1, [0])), ('l', (1, 'p2')),
116 ('n', (2, [0])), ('n', (3, [2, 1]))]
117
118 >>> list(parsedag("+1:f +1:p2 <f +1 /p2"))
119 [('n', (0, [-1])), ('l', (0, 'f')), ('n', (1, [0])), ('l', (1, 'p2')),
120 ('n', (2, [0])), ('n', (3, [2, 1]))]
121
122 Restarting from the root:
123
124 >>> list(parsedag("+1 $ +1"))
125 [('n', (0, [-1])), ('n', (1, [-1]))]
126
127 Annotations, which are meant to introduce sticky state for subsequent nodes:
128
129 >>> list(parsedag("+1 @ann +1"))
130 [('n', (0, [-1])), ('a', 'ann'), ('n', (1, [0]))]
131
132 >>> list(parsedag('+1 @"my annotation" +1'))
133 [('n', (0, [-1])), ('a', 'my annotation'), ('n', (1, [0]))]
134
135 Commands, which are meant to operate on the most recently created node:
136
137 >>> list(parsedag("+1 !cmd +1"))
138 [('n', (0, [-1])), ('c', 'cmd'), ('n', (1, [0]))]
139
140 >>> list(parsedag('+1 !"my command" +1'))
141 [('n', (0, [-1])), ('c', 'my command'), ('n', (1, [0]))]
142
143 >>> list(parsedag('+1 !!my command line\\n +1'))
144 [('n', (0, [-1])), ('C', 'my command line'), ('n', (1, [0]))]
145
146 Comments, which extend to the end of the line:
147
148 >>> list(parsedag('+1 # comment\\n+1'))
149 [('n', (0, [-1])), ('n', (1, [0]))]
150
151 Error:
152
153 >>> try: list(parsedag('+1 bad'))
154 ... except Exception, e: print e
155 invalid character in dag description: bad...
156
157 '''
158 if not desc:
159 return
160
161 wordchars = string.ascii_letters + string.digits
162
163 labels = {}
164 p1 = -1
165 r = 0
166
167 def resolve(ref):
168 if not ref:
169 return p1
170 elif ref[0] in string.digits:
171 return r - int(ref)
172 else:
173 return labels[ref]
174
175 chiter = (c for c in desc)
176
177 def nextch():
178 try:
179 return chiter.next()
180 except StopIteration:
181 return '\0'
182
183 def nextrun(c, allow):
184 s = ''
185 while c in allow:
186 s += c
187 c = nextch()
188 return c, s
189
190 def nextdelimited(c, limit, escape):
191 s = ''
192 while c != limit:
193 if c == escape:
194 c = nextch()
195 s += c
196 c = nextch()
197 return nextch(), s
198
199 def nextstring(c):
200 if c == '"':
201 return nextdelimited(nextch(), '"', '\\')
202 else:
203 return nextrun(c, wordchars)
204
205 c = nextch()
206 while c != '\0':
207 while c in string.whitespace:
208 c = nextch()
209 if c == '.':
210 yield 'n', (r, [p1])
211 p1 = r
212 r += 1
213 c = nextch()
214 elif c == '+':
215 c, digs = nextrun(nextch(), string.digits)
216 n = int(digs)
217 for i in xrange(0, n):
218 yield 'n', (r, [p1])
219 p1 = r
220 r += 1
221 elif c == '*' or c == '/':
222 if c == '*':
223 c = nextch()
224 c, pref = nextstring(c)
225 prefs = [pref]
226 while c == '/':
227 c, pref = nextstring(nextch())
228 prefs.append(pref)
229 ps = [resolve(ref) for ref in prefs]
230 yield 'n', (r, ps)
231 p1 = r
232 r += 1
233 elif c == '<':
234 c, ref = nextstring(nextch())
235 p1 = resolve(ref)
236 elif c == ':':
237 c, name = nextstring(nextch())
238 labels[name] = p1
239 yield 'l', (p1, name)
240 elif c == '@':
241 c, text = nextstring(nextch())
242 yield 'a', text
243 elif c == '!':
244 c = nextch()
245 if c == '!':
246 cmd = ''
247 c = nextch()
248 while c not in '\n\r\0':
249 cmd += c
250 c = nextch()
251 yield 'C', cmd
252 else:
253 c, cmd = nextstring(c)
254 yield 'c', cmd
255 elif c == '#':
256 while c not in '\n\r\0':
257 c = nextch()
258 elif c == '$':
259 p1 = -1
260 c = nextch()
261 elif c == '\0':
262 return # in case it was preceded by whitespace
263 else:
264 s = ''
265 i = 0
266 while c != '\0' and i < 10:
267 s += c
268 i += 1
269 c = nextch()
270 raise util.Abort("invalid character in dag description: %s..." % s)
271
272 def dagtextlines(events,
273 addspaces=True,
274 wraplabels=False,
275 wrapannotations=False,
276 wrapcommands=False,
277 wrapnonlinear=False,
278 usedots=False,
279 maxlinewidth=70):
280 '''generates single lines for dagtext()'''
281
282 def wrapstring(text):
283 if re.match("^[0-9a-z]*$", text):
284 return text
285 return '"' + text.replace('\\', '\\\\').replace('"', '\"') + '"'
286
287 def gen():
288 labels = {}
289 run = 0
290 wantr = 0
291 needroot = False
292 for kind, data in events:
293 if kind == 'n':
294 r, ps = data
295
296 # sanity check
297 if r != wantr:
298 raise util.Abort("Expected id %i, got %i" % (wantr, r))
299 if not ps:
300 ps = [-1]
301 else:
302 for p in ps:
303 if p >= r:
304 raise util.Abort("Parent id %i is larger than "
305 "current id %i" % (p, r))
306 wantr += 1
307
308 # new root?
309 p1 = r - 1
310 if len(ps) == 1 and ps[0] == -1:
311 if needroot:
312 if run:
313 yield '+' + str(run)
314 run = 0
315 if wrapnonlinear:
316 yield '\n'
317 yield '$'
318 p1 = -1
319 else:
320 needroot = True
321 if len(ps) == 1 and ps[0] == p1:
322 if usedots:
323 yield "."
324 else:
325 run += 1
326 else:
327 if run:
328 yield '+' + str(run)
329 run = 0
330 if wrapnonlinear:
331 yield '\n'
332 prefs = []
333 for p in ps:
334 if p == p1:
335 prefs.append('')
336 elif p in labels:
337 prefs.append(labels[p])
338 else:
339 prefs.append(str(r - p))
340 yield '*' + '/'.join(prefs)
341 else:
342 if run:
343 yield '+' + str(run)
344 run = 0
345 if kind == 'l':
346 rid, name = data
347 labels[rid] = name
348 yield ':' + name
349 if wraplabels:
350 yield '\n'
351 elif kind == 'c':
352 yield '!' + wrapstring(data)
353 if wrapcommands:
354 yield '\n'
355 elif kind == 'C':
356 yield '!!' + data
357 yield '\n'
358 elif kind == 'a':
359 if wrapannotations:
360 yield '\n'
361 yield '@' + wrapstring(data)
362 elif kind == '#':
363 yield '#' + data
364 yield '\n'
365 else:
366 raise util.Abort("invalid event type in dag: "
367 + format((type, data)))
368 if run:
369 yield '+' + str(run)
370
371 line = ''
372 for part in gen():
373 if part == '\n':
374 if line:
375 yield line
376 line = ''
377 else:
378 if len(line) + len(part) >= maxlinewidth:
379 yield line
380 line = ''
381 elif addspaces and line and part != '.':
382 line += ' '
383 line += part
384 if line:
385 yield line
386
387 def dagtext(dag,
388 addspaces=True,
389 wraplabels=False,
390 wrapannotations=False,
391 wrapcommands=False,
392 wrapnonlinear=False,
393 usedots=False,
394 maxlinewidth=70):
395 '''generates lines of a textual representation for a dag event stream
396
397 events should generate what parsedag() does, so:
398
399 ('n', (id, [parentids])) for node creation
400 ('l', (id, labelname)) for labels on nodes
401 ('a', text) for annotations
402 ('c', text) for commands
403 ('C', text) for line commands ('!!')
404 ('#', text) for comment lines
405
406 Parent nodes must come before child nodes.
407
408 Examples
409 --------
410
411 Linear run:
412
413 >>> dagtext([('n', (0, [-1])), ('n', (1, [0]))])
414 '+2'
415
416 Two roots:
417
418 >>> dagtext([('n', (0, [-1])), ('n', (1, [-1]))])
419 '+1 $ +1'
420
421 Fork and join:
422
423 >>> dagtext([('n', (0, [-1])), ('n', (1, [0])), ('n', (2, [0])),
424 ... ('n', (3, [2, 1]))])
425 '+2 *2 */2'
426
427 Fork and join with labels:
428
429 >>> dagtext([('n', (0, [-1])), ('l', (0, 'f')), ('n', (1, [0])),
430 ... ('l', (1, 'p2')), ('n', (2, [0])), ('n', (3, [2, 1]))])
431 '+1 :f +1 :p2 *f */p2'
432
433 Annotations:
434
435 >>> dagtext([('n', (0, [-1])), ('a', 'ann'), ('n', (1, [0]))])
436 '+1 @ann +1'
437
438 >>> dagtext([('n', (0, [-1])), ('a', 'my annotation'), ('n', (1, [0]))])
439 '+1 @"my annotation" +1'
440
441 Commands:
442
443 >>> dagtext([('n', (0, [-1])), ('c', 'cmd'), ('n', (1, [0]))])
444 '+1 !cmd +1'
445
446 >>> dagtext([('n', (0, [-1])), ('c', 'my command'), ('n', (1, [0]))])
447 '+1 !"my command" +1'
448
449 >>> dagtext([('n', (0, [-1])), ('C', 'my command line'), ('n', (1, [0]))])
450 '+1 !!my command line\\n+1'
451
452 Comments:
453
454 >>> dagtext([('n', (0, [-1])), ('#', ' comment'), ('n', (1, [0]))])
455 '+1 # comment\\n+1'
456
457 >>> dagtext([])
458 ''
459
460 Combining parsedag and dagtext:
461
462 >>> dagtext(parsedag('+1 :f +1 :p2 *f */p2'))
463 '+1 :f +1 :p2 *f */p2'
464
465 '''
466 return "\n".join(dagtextlines(dag,
467 addspaces,
468 wraplabels,
469 wrapannotations,
470 wrapcommands,
471 wrapnonlinear,
472 usedots,
473 maxlinewidth))