Hi Gregory. It's needed because fstatat is faster (uses less system time). As far as backport of scandir - scandir is written entirely in C, so it's not completely clear to me how you would backport that to use cffi. I don't believe dropping python2 support will be a viable option for quite a while, if you ask me
On Sun, Oct 16, 2016 at 6:55 PM, Gregory Szorc <gregory.sz...@gmail.com> wrote: > On Fri, Oct 14, 2016 at 12:02 AM, Maciej Fijalkowski <fij...@gmail.com> > wrote: >> >> # HG changeset patch >> # User Maciej Fijalkowski <fij...@gmail.com> >> # Date 1476428549 -7200 >> # Fri Oct 14 09:02:29 2016 +0200 >> # Node ID 4e80a66124279e235dec7ae8f58c0a14b0b137bf >> # Parent c770219dc4c253d7cd82519ce3c74438bb2829d3 >> osutil: implement listdir for linux >> >> diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py >> --- a/mercurial/pure/osutil.py >> +++ b/mercurial/pure/osutil.py >> @@ -66,13 +66,20 @@ >> return result >> >> ffi = None >> -if modulepolicy not in policynocffi and sys.platform == 'darwin': >> +if modulepolicy not in policynocffi and ( >> + sys.platform == 'darwin' or sys.platform.startswith('linux')): >> try: >> from _osutil_cffi import ffi, lib >> except ImportError: >> if modulepolicy == 'cffi': # strict cffi import >> raise >> >> +class stat_res(object): >> + def __init__(self, st_mode, st_mtime, st_size): >> + self.st_mode = st_mode >> + self.st_mtime = st_mtime >> + self.st_size = st_size >> + >> if sys.platform == 'darwin' and ffi is not None: >> listdir_batch_size = 4096 >> # tweakable number, only affects performance, which chunks >> @@ -88,12 +95,6 @@ >> attrkinds[lib.VFIFO] = statmod.S_IFIFO >> attrkinds[lib.VSOCK] = statmod.S_IFSOCK >> >> - class stat_res(object): >> - def __init__(self, st_mode, st_mtime, st_size): >> - self.st_mode = st_mode >> - self.st_mtime = st_mtime >> - self.st_size = st_size >> - >> tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec") >> buf = ffi.new("char[]", listdir_batch_size) >> >> @@ -117,7 +118,7 @@ >> tp = attrkinds[cur.obj_type] >> if name == "." or name == "..": >> continue >> - if skip == name and tp == statmod.S_ISDIR: >> + if skip == name and tp == statmod.S_IFDIR: >> return [] >> if stat: >> mtime = cur.mtime.tv_sec >> @@ -146,12 +147,74 @@ >> try: >> ret = listdirinternal(dfd, req, stat, skip) >> finally: >> + lib.close(dfd) >> + return ret >> + >> +elif sys.platform.startswith('linux') and ffi is not None: >> + >> + _known_common_file_types = { >> + lib.DT_DIR: statmod.S_IFDIR, >> + lib.DT_CHR: statmod.S_IFCHR, >> + lib.DT_BLK: statmod.S_IFBLK, >> + lib.DT_REG: statmod.S_IFREG, >> + lib.DT_FIFO: statmod.S_IFIFO, >> + lib.DT_LNK: statmod.S_IFLNK, >> + lib.DT_SOCK: statmod.S_IFSOCK, >> + } >> + >> + def listdirinternal(dir, dirfd, stat, skip): >> + buf = ffi.new("struct stat *") >> + ret = [] >> + while True: >> + ffi.errno = 0 >> + dirent = lib.readdir(dir) >> + if not dirent: >> + break >> + dname = dirent.d_name >> + if dname[0] == "." and (dname[1] == "\x00" or >> + (dname[1] == "." and dname[2] == "\x00")): >> + continue >> + # >> + dtype = dirent.d_type >> + if stat or dtype not in _known_common_file_types: >> + if lib.fstatat(dirfd, dirent.d_name, buf, >> + lib.AT_SYMLINK_NOFOLLOW) != 0: >> + raise OSError(ffi.errno, os.strerror(ffi.errno)) >> + tp = statmod.S_IFMT(buf.st_mode) >> + else: >> + tp = _known_common_file_types[dtype] >> + # >> + name = ffi.string(dirent.d_name) >> + if skip == name and tp == statmod.S_IFDIR: >> + return [] >> + if stat: >> + ret.append((name, tp, stat_res( >> + st_mode = buf.st_mode, >> + st_mtime = buf.st_mtim.tv_sec, >> + st_size = buf.st_size))) >> + else: >> + ret.append((name, tp)) >> + >> + if ffi.errno != 0: >> + raise OSError(ffi.errno, os.strerror(ffi.errno)) >> + return ret >> + >> + def listdir(path, stat=False, skip=None): >> + dfd = os.open(path, os.O_RDONLY) >> + dir = lib.fdopendir(dfd) >> + if dir == ffi.NULL: >> try: >> - lib.close(dfd) >> + os.close(dfd) >> except BaseException: >> - pass # we ignore all the errors from closing, not >> - # much we can do about that >> + pass >> + raise OSError(ffi.errno, os.strerror(ffi.errno)) >> + >> + try: >> + ret = listdirinternal(dir, dfd, stat, skip) >> + finally: >> + lib.closedir(dir) >> return ret >> + >> else: >> listdir = listdirpure >> >> diff --git a/setup.py b/setup.py >> --- a/setup.py >> +++ b/setup.py >> @@ -323,7 +323,7 @@ >> exts = [setup_mpatch_cffi.ffi.distutils_extension(), >> setup_bdiff_cffi.ffi.distutils_extension()] >> # cffi modules go here >> - if sys.platform == 'darwin': >> + if sys.platform == 'darwin' or >> sys.platform.startswith('linux'): >> import setup_osutil_cffi >> exts.append(setup_osutil_cffi.ffi.distutils_extension()) >> self.distribution.ext_modules = exts >> diff --git a/setup_osutil_cffi.py b/setup_osutil_cffi.py >> --- a/setup_osutil_cffi.py >> +++ b/setup_osutil_cffi.py >> @@ -1,102 +1,159 @@ >> from __future__ import absolute_import >> >> import cffi >> +import sys >> >> -ffi = cffi.FFI() >> -ffi.set_source("_osutil_cffi", """ >> -#include <sys/attr.h> >> -#include <sys/vnode.h> >> -#include <unistd.h> >> -#include <fcntl.h> >> -#include <time.h> >> +if sys.platform == "darwin": >> + ffi = cffi.FFI() >> + ffi.set_source("_osutil_cffi", """ >> + #include <sys/attr.h> >> + #include <sys/vnode.h> >> + #include <unistd.h> >> + #include <fcntl.h> >> + #include <time.h> >> >> -typedef struct val_attrs { >> - uint32_t length; >> - attribute_set_t returned; >> - attrreference_t name_info; >> - fsobj_type_t obj_type; >> - struct timespec mtime; >> - uint32_t accessmask; >> - off_t datalength; >> -} __attribute__((aligned(4), packed)) val_attrs_t; >> -""", include_dirs=['mercurial']) >> -ffi.cdef(''' >> + typedef struct val_attrs { >> + uint32_t length; >> + attribute_set_t returned; >> + attrreference_t name_info; >> + fsobj_type_t obj_type; >> + struct timespec mtime; >> + uint32_t accessmask; >> + off_t datalength; >> + } __attribute__((aligned(4), packed)) val_attrs_t; >> + """, include_dirs=['mercurial']) >> + ffi.cdef(''' >> >> -typedef uint32_t attrgroup_t; >> + typedef uint32_t attrgroup_t; >> >> -typedef struct attrlist { >> - uint16_t bitmapcount; /* number of attr. bit sets in list */ >> - uint16_t reserved; /* (to maintain 4-byte alignment) */ >> - attrgroup_t commonattr; /* common attribute group */ >> - attrgroup_t volattr; /* volume attribute group */ >> - attrgroup_t dirattr; /* directory attribute group */ >> - attrgroup_t fileattr; /* file attribute group */ >> - attrgroup_t forkattr; /* fork attribute group */ >> - ...; >> -}; >> + typedef struct attrlist { >> + uint16_t bitmapcount; /* number of attr. bit sets in list */ >> + uint16_t reserved; /* (to maintain 4-byte alignment) */ >> + attrgroup_t commonattr; /* common attribute group */ >> + attrgroup_t volattr; /* volume attribute group */ >> + attrgroup_t dirattr; /* directory attribute group */ >> + attrgroup_t fileattr; /* file attribute group */ >> + attrgroup_t forkattr; /* fork attribute group */ >> + ...; >> + }; >> >> -typedef struct attribute_set { >> - ...; >> -} attribute_set_t; >> + typedef struct attribute_set { >> + ...; >> + } attribute_set_t; >> >> -typedef struct attrreference { >> - int attr_dataoffset; >> - int attr_length; >> - ...; >> -} attrreference_t; >> + typedef struct attrreference { >> + int attr_dataoffset; >> + int attr_length; >> + ...; >> + } attrreference_t; >> >> -typedef int ... off_t; >> + typedef int ... off_t; >> >> -typedef struct val_attrs { >> - uint32_t length; >> - attribute_set_t returned; >> - attrreference_t name_info; >> - uint32_t obj_type; >> - struct timespec mtime; >> - uint32_t accessmask; >> - off_t datalength; >> - ...; >> -} val_attrs_t; >> + typedef struct val_attrs { >> + uint32_t length; >> + attribute_set_t returned; >> + attrreference_t name_info; >> + uint32_t obj_type; >> + struct timespec mtime; >> + uint32_t accessmask; >> + off_t datalength; >> + ...; >> + } val_attrs_t; >> >> -/* the exact layout of the above struct will be figured out during build >> time */ >> + /* the exact layout of the above struct will be figured out during >> + build time */ >> >> -typedef int ... time_t; >> + typedef int ... time_t; >> >> -typedef struct timespec { >> - time_t tv_sec; >> - ...; >> -}; >> + typedef struct timespec { >> + time_t tv_sec; >> + ...; >> + }; >> >> -int getattrlist(const char* path, struct attrlist * attrList, void * >> attrBuf, >> - size_t attrBufSize, unsigned int options); >> + int getattrlist(const char* path, struct attrlist * attrList, >> + void * attrBuf, size_t attrBufSize, unsigned int >> options); >> >> -int getattrlistbulk(int dirfd, struct attrlist * attrList, void * >> attrBuf, >> - size_t attrBufSize, uint64_t options); >> + int getattrlistbulk(int dirfd, struct attrlist * attrList, void * >> attrBuf, >> + size_t attrBufSize, uint64_t options); >> >> -#define ATTR_BIT_MAP_COUNT ... >> -#define ATTR_CMN_NAME ... >> -#define ATTR_CMN_OBJTYPE ... >> -#define ATTR_CMN_MODTIME ... >> -#define ATTR_CMN_ACCESSMASK ... >> -#define ATTR_CMN_ERROR ... >> -#define ATTR_CMN_RETURNED_ATTRS ... >> -#define ATTR_FILE_DATALENGTH ... >> + #define ATTR_BIT_MAP_COUNT ... >> + #define ATTR_CMN_NAME ... >> + #define ATTR_CMN_OBJTYPE ... >> + #define ATTR_CMN_MODTIME ... >> + #define ATTR_CMN_ACCESSMASK ... >> + #define ATTR_CMN_ERROR ... >> + #define ATTR_CMN_RETURNED_ATTRS ... >> + #define ATTR_FILE_DATALENGTH ... >> >> -#define VREG ... >> -#define VDIR ... >> -#define VLNK ... >> -#define VBLK ... >> -#define VCHR ... >> -#define VFIFO ... >> -#define VSOCK ... >> + #define VREG ... >> + #define VDIR ... >> + #define VLNK ... >> + #define VBLK ... >> + #define VCHR ... >> + #define VFIFO ... >> + #define VSOCK ... >> >> -#define S_IFMT ... >> + #define S_IFMT ... >> >> -int open(const char *path, int oflag, int perm); >> -int close(int); >> + int open(const char *path, int oflag, int perm); >> + int close(int); >> >> -#define O_RDONLY ... >> -''') >> + #define O_RDONLY ... >> + ''') >> + >> +else: >> + >> + ffi = cffi.FFI() >> + >> + ffi.set_source("_osutil_cffi", """ >> + #define _GNU_SOURCE 1 >> + #include <sys/stat.h> >> + #include <sys/types.h> >> + #include <dirent.h> >> + #include <fcntl.h> >> + """) >> + >> + ffi.cdef(""" >> + typedef ... DIR; >> + >> + struct dirent { >> + unsigned char d_type; /* Type of file; not supported >> + by all filesystem types */ >> + char d_name[...]; /* Null-terminated filename */ >> + ...; >> + }; >> + >> + typedef int... mode_t; >> + typedef int... off_t; >> + typedef int... time_t; >> + >> + struct timespec { >> + time_t tv_sec; >> + ...; >> + }; >> + >> + struct stat { >> + mode_t st_mode; /* inode protection mode */ >> + struct timespec st_mtim; /* time of last data modification */ >> + off_t st_size; /* file size, in bytes */ >> + ...; >> + }; >> + >> + DIR *fdopendir(int fd); >> + struct dirent *readdir(DIR *dirp); >> + int closedir(DIR *dirp); >> + >> + #define AT_SYMLINK_NOFOLLOW ... >> + #define DT_DIR ... >> + #define DT_CHR ... >> + #define DT_BLK ... >> + #define DT_REG ... >> + #define DT_FIFO ... >> + #define DT_LNK ... >> + #define DT_SOCK ... >> + >> + int fstatat(int fd, const char *path, struct stat *buf, int flag); >> + """) >> >> if __name__ == '__main__': >> ffi.compile() > > > Few questions. > > First, why is this needed? I assume performance. But it isn't clear to me > what the benefit is. > > Second, do you think we should import a backport of scandir from Python 3.5? > I think that's essentially the same as what our custom listdir code is > doing. I'd rather we take a snapshot of upstream code, modify it to work on > Python 2.6/2.7, then call it a day until we drop Python 2 support in several > years. > _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel