Add file encoding/decoding support
authormpm@selenic.com
Thu, 15 Sep 2005 02:59:16 -0500
changeset 1258 1945754e466b
parent 1256 8054fdb0b145
child 1259 f75567782aba
Add file encoding/decoding support
doc/hgrc.5.txt
mercurial/localrepo.py
mercurial/util.py
tests/test-encode
tests/test-encode.out
--- a/doc/hgrc.5.txt	Thu Sep 15 00:49:40 2005 -0500
+++ b/doc/hgrc.5.txt	Thu Sep 15 02:59:16 2005 -0500
@@ -61,6 +61,26 @@
 Mercurial "hgrc" file, the purpose of each section, its possible
 keys, and their possible values.
 
+decode/encode:: 
+  Filters for transforming files on checkout/checkin. This would
+  typically be used for newline processing or other
+  localization/canonicalization of files.
+
+  Filters consist of a filter pattern followed by a filter command.
+  The command must accept data on stdin and return the transformed
+  data on stdout.
+
+  Example:
+
+    [encode]
+    # uncompress gzip files on checkin to improve delta compression
+    # note: not necessarily a good idea, just an example
+    *.gz = gunzip
+
+    [decode]
+    # recompress gzip files when writing them to the working dir
+    *.gz = gzip
+
 hooks::
   Commands that get automatically executed by various actions such as
   starting or finishing a commit.
--- a/mercurial/localrepo.py	Thu Sep 15 00:49:40 2005 -0500
+++ b/mercurial/localrepo.py	Thu Sep 15 02:59:16 2005 -0500
@@ -33,6 +33,8 @@
         self.changelog = changelog.changelog(self.opener)
         self.tagscache = None
         self.nodetagscache = None
+        self.encodepats = None
+        self.decodepats = None
 
         if create:
             os.mkdir(self.path)
@@ -160,9 +162,37 @@
         return self.wopener(f, mode)
 
     def wread(self, filename):
-        return self.wopener(filename, 'r').read()
+        if self.encodepats == None:
+            l = []
+            for pat, cmd in self.ui.configitems("encode"):
+                mf = util.matcher("", "/", [pat], [], [])[1]
+                l.append((mf, cmd))
+            self.encodepats = l
+
+        data = self.wopener(filename, 'r').read()
+
+        for mf, cmd in self.encodepats:
+            if mf(filename):
+                self.ui.debug("filtering %s through %s\n" % (filename, cmd))
+                data = util.filter(data, cmd)
+                break
+
+        return data
 
     def wwrite(self, filename, data, fd=None):
+        if self.decodepats == None:
+            l = []
+            for pat, cmd in self.ui.configitems("decode"):
+                mf = util.matcher("", "/", [pat], [], [])[1]
+                l.append((mf, cmd))
+            self.decodepats = l
+
+        for mf, cmd in self.decodepats:
+            if mf(filename):
+                self.ui.debug("filtering %s through %s\n" % (filename, cmd))
+                data = util.filter(data, cmd)
+                break
+
         if fd:
             return fd.write(data)
         return self.wopener(filename, 'w').write(data)
--- a/mercurial/util.py	Thu Sep 15 00:49:40 2005 -0500
+++ b/mercurial/util.py	Thu Sep 15 02:59:16 2005 -0500
@@ -12,7 +12,23 @@
 
 import os, errno
 from demandload import *
-demandload(globals(), "re cStringIO shutil")
+demandload(globals(), "re cStringIO shutil popen2 threading")
+
+def filter(s, cmd):
+    "filter a string through a command that transforms its input to its output"
+    (pout, pin) = popen2.popen2(cmd, -1, 'b')
+    def writer():
+        pin.write(s)
+        pin.close()
+
+    # we should use select instead on UNIX, but this will work on most
+    # systems, including Windows
+    w = threading.Thread(target=writer)
+    w.start()
+    f = pout.read()
+    pout.close()
+    w.join()
+    return f
 
 def binary(s):
     """return true if a string is binary data using diff's heuristic"""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-encode	Thu Sep 15 02:59:16 2005 -0500
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+hg init
+
+cat > .hg/hgrc <<EOF
+[encode]
+*.gz = gunzip
+
+[decode]
+*.gz = gzip
+
+EOF
+
+echo "this is a test" | gzip > a.gz
+hg add a.gz
+hg ci -m "test" -d "0 0"
+echo %% no changes
+hg status
+touch a.gz
+
+echo %% no changes
+hg status
+
+echo %% uncompressed contents in repo
+hg debugdata .hg/data/a.gz.d 0
+
+echo %% uncompress our working dir copy
+gunzip < a.gz
+
+rm a.gz
+hg co
+
+echo %% uncompress our new working dir copy
+gunzip < a.gz
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-encode.out	Thu Sep 15 02:59:16 2005 -0500
@@ -0,0 +1,8 @@
+%% no changes
+%% no changes
+%% uncompressed contents in repo
+this is a test
+%% uncompress our working dir copy
+this is a test
+%% uncompress our new working dir copy
+this is a test