osutil: add darwin-only version of os.listdir using cffi
authorMaciej Fijalkowski <fijall@gmail.com>
Mon, 11 Jul 2016 11:05:08 +0200
changeset 29600 7a157639b8f2
parent 29599 e3dc96834126
child 29601 6cff2ac0ccb9
osutil: add darwin-only version of os.listdir using cffi
mercurial/pure/osutil.py
setup.py
setup_osutil_cffi.py
--- a/mercurial/pure/osutil.py	Sun Jun 05 12:29:08 2016 +0900
+++ b/mercurial/pure/osutil.py	Mon Jul 11 11:05:08 2016 +0200
@@ -14,6 +14,10 @@
 import stat as statmod
 import sys
 
+from . import policy
+modulepolicy = policy.policy
+policynocffi = policy.policynocffi
+
 def _mode_to_kind(mode):
     if statmod.S_ISREG(mode):
         return statmod.S_IFREG
@@ -31,7 +35,7 @@
         return statmod.S_IFSOCK
     return mode
 
-def listdir(path, stat=False, skip=None):
+def listdirpure(path, stat=False, skip=None):
     '''listdir(path, stat=False) -> list_of_tuples
 
     Return a sorted list containing information about the entries
@@ -61,6 +65,95 @@
             result.append((fn, _mode_to_kind(st.st_mode)))
     return result
 
+ffi = None
+if modulepolicy not in policynocffi and sys.platform == 'darwin':
+    try:
+        from _osutil_cffi import ffi, lib
+    except ImportError:
+        if modulepolicy == 'cffi': # strict cffi import
+            raise
+
+if sys.platform == 'darwin' and ffi is not None:
+    listdir_batch_size = 4096
+    # tweakable number, only affects performance, which chunks
+    # of bytes do we get back from getattrlistbulk
+
+    attrkinds = [None] * 20 # we need the max no for enum VXXX, 20 is plenty
+
+    attrkinds[lib.VREG] = statmod.S_IFREG
+    attrkinds[lib.VDIR] = statmod.S_IFDIR
+    attrkinds[lib.VLNK] = statmod.S_IFLNK
+    attrkinds[lib.VBLK] = statmod.S_IFBLK
+    attrkinds[lib.VCHR] = statmod.S_IFCHR
+    attrkinds[lib.VFIFO] = statmod.S_IFIFO
+    attrkinds[lib.VSOCK] = statmod.S_IFSOCK
+
+    class stat_res(object):
+        def __init__(self, st_mode, st_mtime, st_size):
+            self.st_mode = st_mode
+            self.st_mtime = st_mtime
+            self.st_size = st_size
+
+    tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
+    buf = ffi.new("char[]", listdir_batch_size)
+
+    def listdirinternal(dfd, req, stat, skip):
+        ret = []
+        while True:
+            r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
+            if r == 0:
+                break
+            if r == -1:
+                raise OSError(ffi.errno, os.strerror(ffi.errno))
+            cur = ffi.cast("val_attrs_t*", buf)
+            for i in range(r):
+                lgt = cur.length
+                assert lgt == ffi.cast('uint32_t*', cur)[0]
+                ofs = cur.name_info.attr_dataoffset
+                str_lgt = cur.name_info.attr_length
+                base_ofs = ffi.offsetof('val_attrs_t', 'name_info')
+                name = str(ffi.buffer(ffi.cast("char*", cur) + base_ofs + ofs,
+                           str_lgt - 1))
+                tp = attrkinds[cur.obj_type]
+                if name == "." or name == "..":
+                    continue
+                if skip == name and tp == statmod.S_ISDIR:
+                    return []
+                if stat:
+                    mtime = cur.time.tv_sec
+                    mode = (cur.accessmask & ~lib.S_IFMT)| tp
+                    ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
+                                st_size=cur.datalength)))
+                else:
+                    ret.append((name, tp))
+                cur += lgt
+        return ret
+
+    def listdir(path, stat=False, skip=None):
+        req = ffi.new("struct attrlist*")
+        req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
+        req.commonattr = (lib.ATTR_CMN_RETURNED_ATTRS |
+                          lib.ATTR_CMN_NAME |
+                          lib.ATTR_CMN_OBJTYPE |
+                          lib.ATTR_CMN_ACCESSMASK |
+                          lib.ATTR_CMN_MODTIME)
+        req.fileattr = lib.ATTR_FILE_DATALENGTH
+        dfd = lib.open(path, lib.O_RDONLY, 0)
+        if dfd == -1:
+            raise OSError(ffi.errno, os.strerror(ffi.errno))
+
+        try:
+            ret = listdirinternal(dfd, req, stat, skip)
+        finally:
+            try:
+                lib.close(dfd)
+            except BaseException:
+                pass # we ignore all the errors from closing, not
+                # much we can do about that
+        return ret
+else:
+    listdir = listdirpure
+
 if os.name != 'nt':
     posixfile = open
 
--- a/setup.py	Sun Jun 05 12:29:08 2016 +0900
+++ b/setup.py	Mon Jul 11 11:05:08 2016 +0200
@@ -320,6 +320,9 @@
         elif self.distribution.cffi:
             exts = []
             # cffi modules go here
+            if sys.platform == 'darwin':
+                import setup_osutil_cffi
+                exts.append(setup_osutil_cffi.ffi.distutils_extension())
             self.distribution.ext_modules = exts
         else:
             h = os.path.join(get_python_inc(), 'Python.h')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/setup_osutil_cffi.py	Mon Jul 11 11:05:08 2016 +0200
@@ -0,0 +1,101 @@
+from __future__ import absolute_import
+
+import cffi
+
+ffi = cffi.FFI()
+ffi.set_source("_osutil_cffi", """
+#include <sys/attr.h>
+#include <sys/vnode.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <time.h>
+
+typedef struct val_attrs {
+    uint32_t          length;
+    attribute_set_t   returned;
+    attrreference_t   name_info;
+    fsobj_type_t      obj_type;
+    struct timespec   mtime;
+    uint32_t          accessmask;
+    off_t             datalength;
+} __attribute__((aligned(4), packed)) val_attrs_t;
+""", include_dirs=['mercurial'])
+ffi.cdef('''
+
+typedef uint32_t attrgroup_t;
+
+typedef struct attrlist {
+    uint16_t     bitmapcount; /* number of attr. bit sets in list */
+    uint16_t   reserved;    /* (to maintain 4-byte alignment) */
+    attrgroup_t commonattr;  /* common attribute group */
+    attrgroup_t volattr;     /* volume attribute group */
+    attrgroup_t dirattr;     /* directory attribute group */
+    attrgroup_t fileattr;    /* file attribute group */
+    attrgroup_t forkattr;    /* fork attribute group */
+    ...;
+};
+
+typedef struct attribute_set {
+    ...;
+} attribute_set_t;
+
+typedef struct attrreference {
+    int attr_dataoffset;
+    int attr_length;
+    ...;
+} attrreference_t;
+
+typedef struct val_attrs {
+    uint32_t          length;
+    attribute_set_t   returned;
+    attrreference_t   name_info;
+    uint32_t          obj_type;
+    struct timespec   mtime;
+    uint32_t          accessmask;
+    int               datalength;
+    ...;
+} val_attrs_t;
+
+/* the exact layout of the above struct will be figured out during build time */
+
+typedef int ... time_t;
+typedef int ... off_t;
+
+typedef struct timespec {
+    time_t tv_sec;
+    ...;
+};
+
+int getattrlist(const char* path, struct attrlist * attrList, void * attrBuf,
+                size_t attrBufSize, unsigned int options);
+
+int getattrlistbulk(int dirfd, struct attrlist * attrList, void * attrBuf,
+                    size_t attrBufSize, uint64_t options);
+
+#define ATTR_BIT_MAP_COUNT ...
+#define ATTR_CMN_NAME ...
+#define ATTR_CMN_OBJTYPE ...
+#define ATTR_CMN_MODTIME ...
+#define ATTR_CMN_ACCESSMASK ...
+#define ATTR_CMN_ERROR ...
+#define ATTR_CMN_RETURNED_ATTRS ...
+#define ATTR_FILE_DATALENGTH ...
+
+#define VREG ...
+#define VDIR ...
+#define VLNK ...
+#define VBLK ...
+#define VCHR ...
+#define VFIFO ...
+#define VSOCK ...
+
+#define S_IFMT ...
+
+int open(const char *path, int oflag, int perm);
+int close(int);
+
+#define O_RDONLY ...
+''')
+
+if __name__ == '__main__':
+    ffi.compile()