comparison hgext/inotify/server.py @ 9115:b55d44719b47

inotify: server: new data structure to keep track of changes. == Rationale for the new structure == Current structure was a dictionary tree. One directory was tracked as a dictionary: - keys: file/subdir name - values: - for a file, the status (a/r/m/...) - for a subdir, the directory representing the subdir It allowed efficient lookups, no matter of the type of the terminal leaf: for part in path.split('/'): tree = tree[part] However, there is no way to represent a directory and a file with the same name because keys are conflicting in the dictionary. Concrete example: Initial state: root dir |- foo (file) |- bar (file) # data state is: {'foo': 'n', 'bar': 'n'} Remove foo: root dir |- bar (file) # Data becomes {'foo': 'r'} until next commit. Add foo, as a directory, and foo/barbar file: root dir |- bar (file) |-> foo (dir) |- barbar (file) # New state should be represented as: {'foo': {'barbar': 'a'}, 'bar': 'n'} however, the key "foo" is already used and represents the old file. The dirstate: D foo A foo/barbar cannot be represented, hence the need for a new structure. == The new structure == 'directory' class. Represents one directory level. * Notable attributes: Two dictionaries: - 'files' Maps filename -> status for the current dir. - 'dirs' Maps subdir's name -> directory object representing the subdir * methods - walk(), formerly server.walk - lookup(), old server.lookup - dir(), old server.dir This new class allows embedding all the tree walks/lookups in its own class, instead of having everything mixed together in server. Incidently, since files and directories are not stored in the same dictionaries, we are solving the previous key conflict problem. The small drawback is that lookup operation is a bit more complex: for a path a/b/c/d/e we have to check twice the leaf, if e is a directory or a file.
author Nicolas Dumazet <nicdumz.commits@gmail.com>
date Tue, 26 May 2009 23:00:35 +0900
parents e1d119f450f0
children f90bbf1ea09f
comparison
equal deleted inserted replaced
9114:72cbbfe6e343 9115:b55d44719b47
199 199
200 wrapper.func_name = f.func_name 200 wrapper.func_name = f.func_name
201 return wrapper 201 return wrapper
202 return decorator 202 return decorator
203 203
204 class directory(object):
205 """
206 Representing a directory
207
208 * path is the relative path from repo root to this directory
209 * files is a dict listing the files in this directory
210 - keys are file names
211 - values are file status
212 * dirs is a dict listing the subdirectories
213 - key are subdirectories names
214 - values are directory objects
215 """
216 def __init__(self, relpath=''):
217 self.path = relpath
218 self.files = {}
219 self.dirs = {}
220
221 def dir(self, relpath):
222 """
223 Returns the directory contained at the relative path relpath.
224 Creates the intermediate directories if necessary.
225 """
226 if not relpath:
227 return self
228 l = relpath.split('/')
229 ret = self
230 while l:
231 next = l.pop(0)
232 try:
233 ret = ret.dirs[next]
234 except KeyError:
235 d = directory(join(ret.path, next))
236 ret.dirs[next] = d
237 ret = d
238 return ret
239
240 def walk(self, states):
241 """
242 yield (filename, status) pairs for items in the trees
243 that have status in states.
244 filenames are relative to the repo root
245 """
246 for file, st in self.files.iteritems():
247 if st in states:
248 yield join(self.path, file), st
249 for dir in self.dirs.itervalues():
250 for e in dir.walk(states):
251 yield e
252
253 def lookup(self, states, path):
254 """
255 yield root-relative filenames that match path, and whose
256 status are in states:
257 * if path is a file, yield path
258 * if path is a directory, yield directory files
259 * if path is not tracked, yield nothing
260 """
261 if path[-1] == '/':
262 path = path[:-1]
263
264 paths = path.split('/')
265
266 # we need to check separately for last node
267 last = paths.pop()
268
269 tree = self
270 try:
271 for dir in paths:
272 tree = tree.dirs[dir]
273 except KeyError:
274 # path is not tracked
275 return
276
277 try:
278 # if path is a directory, walk it
279 for file, st in tree.dirs[last].walk(states):
280 yield file
281 except KeyError:
282 try:
283 if tree.files[last] in states:
284 # path is a file
285 yield path
286 except KeyError:
287 # path is not tracked
288 pass
289
204 class repowatcher(pollable): 290 class repowatcher(pollable):
205 """ 291 """
206 Watches inotify events 292 Watches inotify events
207 """ 293 """
208 statuskeys = 'almr!?' 294 statuskeys = 'almr!?'
229 raise util.Abort(_('inotify service not available: %s') % 315 raise util.Abort(_('inotify service not available: %s') %
230 err.strerror) 316 err.strerror)
231 self.threshold = watcher.threshold(self.watcher) 317 self.threshold = watcher.threshold(self.watcher)
232 self.fileno = self.watcher.fileno 318 self.fileno = self.watcher.fileno
233 319
234 self.tree = {} 320 self.tree = directory()
235 self.statcache = {} 321 self.statcache = {}
236 self.statustrees = dict([(s, {}) for s in self.statuskeys]) 322 self.statustrees = dict([(s, directory()) for s in self.statuskeys])
237 323
238 self.last_event = None 324 self.last_event = None
239 325
240 self.lastevent = {} 326 self.lastevent = {}
241 327
285 371
286 def setup(self): 372 def setup(self):
287 self.ui.note(_('watching directories under %r\n') % self.repo.root) 373 self.ui.note(_('watching directories under %r\n') % self.repo.root)
288 self.add_watch(self.repo.path, inotify.IN_DELETE) 374 self.add_watch(self.repo.path, inotify.IN_DELETE)
289 self.check_dirstate() 375 self.check_dirstate()
290
291 def dir(self, tree, path):
292 if path:
293 for name in path.split('/'):
294 tree = tree.setdefault(name, {})
295 return tree
296
297 def lookup(self, path, tree):
298 if path:
299 try:
300 for name in path.split('/'):
301 tree = tree[name]
302 except KeyError:
303 return 'x'
304 except TypeError:
305 return 'd'
306 return tree
307 376
308 def filestatus(self, fn, st): 377 def filestatus(self, fn, st):
309 try: 378 try:
310 type_, mode, size, time = self.repo.dirstate._map[fn][:4] 379 type_, mode, size, time = self.repo.dirstate._map[fn][:4]
311 except KeyError: 380 except KeyError:
348 self.statcache.pop(wfn, None) 417 self.statcache.pop(wfn, None)
349 self._updatestatus(wfn, newstatus) 418 self._updatestatus(wfn, newstatus)
350 419
351 def _updatestatus(self, wfn, newstatus): 420 def _updatestatus(self, wfn, newstatus):
352 ''' 421 '''
353 Update the stored status of a file or directory. 422 Update the stored status of a file.
354 423
355 newstatus: - char in (statuskeys + 'ni'), new status to apply. 424 newstatus: - char in (statuskeys + 'ni'), new status to apply.
356 - or None, to stop tracking wfn 425 - or None, to stop tracking wfn
357 ''' 426 '''
358 root, fn = split(wfn) 427 root, fn = split(wfn)
359 d = self.dir(self.tree, root) 428 d = self.tree.dir(root)
360 429
361 oldstatus = d.get(fn) 430 oldstatus = d.files.get(fn)
362 # oldstatus can be either: 431 # oldstatus can be either:
363 # - None : fn is new 432 # - None : fn is new
364 # - a char in statuskeys: fn is a (tracked) file 433 # - a char in statuskeys: fn is a (tracked) file
365 # - a dict: fn is a directory
366 isdir = isinstance(oldstatus, dict)
367 434
368 if self.ui.debugflag and oldstatus != newstatus: 435 if self.ui.debugflag and oldstatus != newstatus:
369 if isdir: 436 self.ui.note(_('status: %r %s -> %s\n') %
370 self.ui.note(_('status: %r dir(%d) -> %s\n') %
371 (wfn, len(oldstatus), newstatus))
372 else:
373 self.ui.note(_('status: %r %s -> %s\n') %
374 (wfn, oldstatus, newstatus)) 437 (wfn, oldstatus, newstatus))
375 if not isdir: 438
376 if oldstatus and oldstatus in self.statuskeys \ 439 if oldstatus and oldstatus in self.statuskeys \
377 and oldstatus != newstatus: 440 and oldstatus != newstatus:
378 del self.dir(self.statustrees[oldstatus], root)[fn] 441 del self.statustrees[oldstatus].dir(root).files[fn]
379 if newstatus and newstatus != 'i': 442 if newstatus and newstatus != 'i':
380 d[fn] = newstatus 443 d.files[fn] = newstatus
381 if newstatus in self.statuskeys: 444 if newstatus in self.statuskeys:
382 dd = self.dir(self.statustrees[newstatus], root) 445 dd = self.statustrees[newstatus].dir(root)
383 if oldstatus != newstatus or fn not in dd: 446 if oldstatus != newstatus or fn not in dd.files:
384 dd[fn] = newstatus 447 dd.files[fn] = newstatus
385 else: 448 else:
386 d.pop(fn, None) 449 d.files.pop(fn, None)
387 450
388 451
389 def check_deleted(self, key): 452 def check_deleted(self, key):
390 # Files that had been deleted but were present in the dirstate 453 # Files that had been deleted but were present in the dirstate
391 # may have vanished from the dirstate; we must clean them up. 454 # may have vanished from the dirstate; we must clean them up.
392 nuke = [] 455 nuke = []
393 for wfn, ignore in self.walk(key, self.statustrees[key]): 456 for wfn, ignore in self.statustrees[key].walk(key):
394 if wfn not in self.repo.dirstate: 457 if wfn not in self.repo.dirstate:
395 nuke.append(wfn) 458 nuke.append(wfn)
396 for wfn in nuke: 459 for wfn in nuke:
397 root, fn = split(wfn) 460 root, fn = split(wfn)
398 del self.dir(self.statustrees[key], root)[fn] 461 del self.statustrees[key].dir(root).files[fn]
399 del self.dir(self.tree, root)[fn] 462 del self.tree.dir(root).files[fn]
400 463
401 def scan(self, topdir=''): 464 def scan(self, topdir=''):
402 ds = self.repo.dirstate._map.copy() 465 ds = self.repo.dirstate._map.copy()
403 self.add_watch(join(self.repo.root, topdir), self.mask) 466 self.add_watch(join(self.repo.root, topdir), self.mask)
404 for root, dirs, files in walk(self.repo, topdir): 467 for root, dirs, files in walk(self.repo, topdir):
436 self.repo.dirstate.invalidate() 499 self.repo.dirstate.invalidate()
437 self.handle_timeout() 500 self.handle_timeout()
438 self.scan() 501 self.scan()
439 self.ui.note(_('%s end dirstate reload\n') % self.event_time()) 502 self.ui.note(_('%s end dirstate reload\n') % self.event_time())
440 503
441 def walk(self, states, tree, prefix=''):
442 # This is the "inner loop" when talking to the client.
443
444 for name, val in tree.iteritems():
445 path = join(prefix, name)
446 try:
447 if val in states:
448 yield path, val
449 except TypeError:
450 for p in self.walk(states, val, path):
451 yield p
452
453 def update_hgignore(self): 504 def update_hgignore(self):
454 # An update of the ignore file can potentially change the 505 # An update of the ignore file can potentially change the
455 # states of all unknown and ignored files. 506 # states of all unknown and ignored files.
456 507
457 # XXX If the user has other ignore files outside the repo, or 508 # XXX If the user has other ignore files outside the repo, or
535 if self.ui.debugflag: 586 if self.ui.debugflag:
536 self.ui.note(_('%s event: deleted %s\n') % 587 self.ui.note(_('%s event: deleted %s\n') %
537 (self.event_time(), wpath)) 588 (self.event_time(), wpath))
538 589
539 if evt.mask & inotify.IN_ISDIR: 590 if evt.mask & inotify.IN_ISDIR:
540 tree = self.dir(self.tree, wpath).copy() 591 tree = self.tree.dir(wpath)
541 for wfn, ignore in self.walk('?', tree): 592 todelete = [wfn for wfn, ignore in tree.walk('?')]
542 self.deletefile(join(wpath, wfn), '?') 593 for fn in todelete:
594 self.deletefile(fn, '?')
543 self.scan(wpath) 595 self.scan(wpath)
544 else: 596 else:
545 self.deleted(wpath) 597 self.deleted(wpath)
546 598
547 def process_modify(self, wpath, evt): 599 def process_modify(self, wpath, evt):
667 # answer. 719 # answer.
668 self.repowatcher.handle_timeout() 720 self.repowatcher.handle_timeout()
669 721
670 if not names: 722 if not names:
671 def genresult(states, tree): 723 def genresult(states, tree):
672 for fn, state in self.repowatcher.walk(states, tree): 724 for fn, state in tree.walk(states):
673 yield fn 725 yield fn
674 else: 726 else:
675 def genresult(states, tree): 727 def genresult(states, tree):
676 for fn in names: 728 for fn in names:
677 l = self.repowatcher.lookup(fn, tree) 729 for f in tree.lookup(states, fn):
678 try: 730 yield f
679 if l in states:
680 yield fn
681 except TypeError:
682 for f, s in self.repowatcher.walk(states, l, fn):
683 yield f
684 731
685 return ['\0'.join(r) for r in [ 732 return ['\0'.join(r) for r in [
686 genresult('l', self.repowatcher.statustrees['l']), 733 genresult('l', self.repowatcher.statustrees['l']),
687 genresult('m', self.repowatcher.statustrees['m']), 734 genresult('m', self.repowatcher.statustrees['m']),
688 genresult('a', self.repowatcher.statustrees['a']), 735 genresult('a', self.repowatcher.statustrees['a']),