changeset 5127:39b6eaee6fd7

convert: replace fork with subprocess call.
author Patrick Mezard <pmezard@gmail.com>
date Mon, 06 Aug 2007 21:49:26 +0200
parents 117dab48ca99
children 78eb03cbe535
files hgext/convert/__init__.py hgext/convert/common.py hgext/convert/subversion.py
diffstat 3 files changed, 70 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/__init__.py	Thu Aug 02 23:38:32 2007 +0200
+++ b/hgext/convert/__init__.py	Mon Aug 06 21:49:26 2007 +0200
@@ -5,17 +5,17 @@
 # This software may be used and distributed according to the terms
 # of the GNU General Public License, incorporated herein by reference.
 
-from common import NoRepo, converter_source, converter_sink
+from common import NoRepo, converter_source, converter_sink, decodeargs
 from cvs import convert_cvs
 from git import convert_git
 from hg import mercurial_source, mercurial_sink
 from subversion import convert_svn
 
-import os, shlex, shutil
+import os, shlex, shutil, sys
 from mercurial import hg, ui, util, commands
 from mercurial.i18n import _
 
-commands.norepo += " convert"
+commands.norepo += " convert debug-svn-log"
 
 converters = [convert_cvs, convert_git, convert_svn, mercurial_source,
               mercurial_sink]
@@ -431,6 +431,15 @@
                 opts)
     c.convert()
 
+def debugsvnlog(ui, **opts):
+    """Fetch SVN log in a subprocess and channel them back to parent to
+    avoid memory collection issues.
+    """
+    util.set_binary(sys.stdin)
+    util.set_binary(sys.stdout)
+    args = decodeargs(sys.stdin.read())
+    subversion.get_log_child(sys.stdout, *args)
+
 cmdtable = {
     "convert":
         (_convert,
@@ -439,4 +448,9 @@
           ('r', 'rev', '', 'import up to target revision REV'),
           ('', 'datesort', None, 'try to sort changesets by date')],
          'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
+    "debug-svn-log":
+        (debugsvnlog,
+         [],
+         'hg debug-svn-log'),
 }
+
--- a/hgext/convert/common.py	Thu Aug 02 23:38:32 2007 +0200
+++ b/hgext/convert/common.py	Mon Aug 06 21:49:26 2007 +0200
@@ -1,4 +1,19 @@
 # common code for the convert extension
+import base64
+import cPickle as pickle
+
+def encodeargs(args):
+    def encodearg(s):
+        lines = base64.encodestring(s)
+        lines = [l.splitlines()[0] for l in lines]
+        return ''.join(lines)
+    
+    s = pickle.dumps(args)
+    return encodearg(s)
+
+def decodeargs(s):
+    s = base64.decodestring(s)
+    return pickle.loads(s)
 
 class NoRepo(Exception): pass
 
@@ -118,3 +133,4 @@
         """Put tags into sink.
         tags: {tagname: sink_rev_id, ...}"""
         raise NotImplementedError()
+
--- a/hgext/convert/subversion.py	Thu Aug 02 23:38:32 2007 +0200
+++ b/hgext/convert/subversion.py	Mon Aug 06 21:49:26 2007 +0200
@@ -24,7 +24,7 @@
 
 from cStringIO import StringIO
 
-from common import NoRepo, commit, converter_source
+from common import NoRepo, commit, converter_source, encodeargs
 
 try:
     from svn.core import SubversionException, Pool
@@ -58,6 +58,30 @@
         self.copyfrom_rev = p.copyfrom_rev
         self.action = p.action
 
+def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
+                    strict_node_history=False):
+    protocol = -1
+    def receiver(orig_paths, revnum, author, date, message, pool):
+        if orig_paths is not None:
+            for k, v in orig_paths.iteritems():
+                orig_paths[k] = changedpath(v)
+        pickle.dump((orig_paths, revnum, author, date, message),
+                                fp, protocol)
+        
+    try:
+        # Use an ra of our own so that our parent can consume
+        # our results without confusing the server.
+        t = transport.SvnRaTransport(url=url)
+        svn.ra.get_log(t.ra, paths, start, end, limit,
+                       discover_changed_paths,
+                       strict_node_history,
+                       receiver)
+    except SubversionException, (_, num):
+        pickle.dump(num, fp, protocol)
+    else:
+        pickle.dump(None, fp, protocol)
+    fp.close()
+
 # SVN conversion code stolen from bzr-svn and tailor
 class convert_svn(converter_source):
     def __init__(self, ui, url, rev=None):
@@ -196,34 +220,6 @@
 
     def get_log(self, paths, start, end, limit=0, discover_changed_paths=True,
                 strict_node_history=False):
-        '''wrapper for svn.ra.get_log.
-        on a large repository, svn.ra.get_log pins huge amounts of
-        memory that cannot be recovered.  work around it by forking
-        and writing results over a pipe.'''
-
-        def child(fp):
-            protocol = -1
-            def receiver(orig_paths, revnum, author, date, message, pool):
-                if orig_paths is not None:
-                    for k, v in orig_paths.iteritems():
-                        orig_paths[k] = changedpath(v)
-                pickle.dump((orig_paths, revnum, author, date, message),
-                            fp, protocol)
-
-            try:
-                # Use an ra of our own so that our parent can consume
-                # our results without confusing the server.
-                t = transport.SvnRaTransport(url=self.url)
-                svn.ra.get_log(t.ra, paths, start, end, limit,
-                               discover_changed_paths,
-                               strict_node_history,
-                               receiver)
-            except SubversionException, (_, num):
-                self.ui.print_exc()
-                pickle.dump(num, fp, protocol)
-            else:
-                pickle.dump(None, fp, protocol)
-            fp.close()
 
         def parent(fp):
             while True:
@@ -235,20 +231,19 @@
                         break
                     raise SubversionException("child raised exception", entry)
                 yield entry
+            
+        args = [self.url, paths, start, end, limit, discover_changed_paths,
+                strict_node_history]
+        arg = encodeargs(args)
+        hgexe = util.hgexecutable()
+        cmd = '"%s "debug-svn-log""' % util.shellquote(hgexe)
+        stdin, stdout = os.popen2(cmd, 'b')
+        
+        stdin.write(arg)
+        stdin.close()
 
-        rfd, wfd = os.pipe()
-        pid = os.fork()
-        if pid:
-            os.close(wfd)
-            for p in parent(os.fdopen(rfd, 'rb')):
-                yield p
-            ret = os.waitpid(pid, 0)[1]
-            if ret:
-                raise util.Abort(_('get_log %s') % util.explain_exit(ret))
-        else:
-            os.close(rfd)
-            child(os.fdopen(wfd, 'wb'))
-            os._exit(0)
+        for p in parent(stdout):
+            yield p
 
     def gettags(self):
         tags = {}