V2: The only difference is removed delete_dir() function
was accidentally left from previous attempts to implement this
syscall.

It looks like the golang apps that need to iterate over entries
in a directory use a system call getdents64 which is documented
in https://man7.org/linux/man-pages/man2/getdents.2.html. Normally
this functionality is provided by the libc functions like opendir(),
readdir(), etc which actually do delegate to getdents64. Go is known
of bypassing libc in such cases.

So this patch implements the syscall getdents64 by adding a utility
function to VFS main.cc that is then called by syscall in linux.cc.
For details of how this function works please look at the comments.

This patch also adds a unit test to verify this syscall works.

Refs #1188

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 fs/vfs/main.cc         |  65 ++++++++++++++++++++++++
 linux.cc               |   4 ++
 modules/tests/Makefile |   2 +-
 tests/tst-getdents.cc  | 111 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 181 insertions(+), 1 deletion(-)
 create mode 100644 tests/tst-getdents.cc

diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
index 8e3d4e5e..bdedc6c6 100644
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -790,6 +790,71 @@ int readdir64_r(DIR *dir, struct dirent64 *entry,
 extern "C" OSV_LIBC_API
 struct dirent *readdir64(DIR *dir) __attribute__((alias("readdir")));
 
+struct linux_dirent64 {
+    u64            d_ino;
+    s64            d_off;
+    unsigned short d_reclen;
+    unsigned char  d_type;
+    char           d_name[];
+};
+
+#undef getdents64
+extern "C"
+ssize_t sys_getdents64(int fd, void *dirp, size_t count)
+{
+    auto *dir = fdopendir(fd);
+    if (dir) {
+        // We have verified that fd points to a valid directory
+        // but we do NOT need the DIR handle so just delete it
+        delete dir;
+
+        struct file *fp;
+        int error = fget(fd, &fp);
+        if (error) {
+            errno = error;
+            return -1;
+        }
+
+        size_t bytes_read = 0;
+        off_t last_off = -1;
+        errno = 0;
+
+        // Iterate over as many entries as there is space in the buffer
+        // by directly calling sys_readdir()
+        struct dirent entry;
+        while ((error = sys_readdir(fp, &entry)) == 0) {
+            auto rec_len = offsetof(linux_dirent64, d_name) + 
strlen(entry.d_name) + 1;
+            if (rec_len <= count) {
+                auto *ldirent = static_cast<linux_dirent64*>(dirp + 
bytes_read);
+                ldirent->d_ino = entry.d_ino;
+                ldirent->d_off = entry.d_off;
+                ldirent->d_type = entry.d_type;
+                strcpy(ldirent->d_name, entry.d_name);
+                ldirent->d_reclen = rec_len;
+                count -= rec_len;
+                bytes_read += rec_len;
+                last_off = entry.d_off;
+            } else {
+                if (last_off >= 0)
+                    sys_seekdir(fp, last_off);
+                break;
+            }
+        }
+
+        fdrop(fp);
+
+        if (error && error != ENOENT) {
+            errno = error;
+            return -1;
+        } else {
+            errno = 0;
+            return bytes_read;
+        }
+    } else {
+        return -1;
+    }
+}
+
 OSV_LIBC_API
 void rewinddir(DIR *dirp)
 {
diff --git a/linux.cc b/linux.cc
index 85c08981..f60489e3 100644
--- a/linux.cc
+++ b/linux.cc
@@ -424,6 +424,9 @@ static int tgkill(int tgid, int tid, int sig)
     return -1;
 }
 
+#define __NR_sys_getdents64 __NR_getdents64
+extern "C" ssize_t sys_getdents64(int fd, void *dirp, size_t count);
+
 OSV_LIBC_API long syscall(long number, ...)
 {
     // Save FPU state and restore it at the end of this function
@@ -512,6 +515,7 @@ OSV_LIBC_API long syscall(long number, ...)
     SYSCALL2(statfs, const char *, struct statfs *);
     SYSCALL3(unlinkat, int, const char *, int);
     SYSCALL3(symlinkat, const char *, int, const char *);
+    SYSCALL3(sys_getdents64, int, void *, size_t);
     }
 
     debug_always("syscall(): unimplemented system call %d\n", number);
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index ca489341..e462ebc8 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -133,7 +133,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
        tst-getopt.so tst-getopt-pie.so tst-non-pie.so tst-semaphore.so \
        tst-elf-init.so tst-realloc.so tst-setjmp.so \
        libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
-       tst-sigaction.so tst-syscall.so tst-ifaddrs.so
+       tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so
 #      libstatic-thread-variable.so tst-static-thread-variable.so \
 
 #TODO For now let us disable these tests for aarch64 until
diff --git a/tests/tst-getdents.cc b/tests/tst-getdents.cc
new file mode 100644
index 00000000..5803aaeb
--- /dev/null
+++ b/tests/tst-getdents.cc
@@ -0,0 +1,111 @@
+#include <dirent.h>     /* Defines DT_* constants */
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <cassert>
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#define handle_error(msg) \
+    do { perror(msg); exit(EXIT_FAILURE); } while (0)
+
+struct test_dirent64 {
+    unsigned long  d_ino;
+    off_t          d_off;
+    unsigned char  d_type;
+    std::string    d_name;
+
+    bool operator ==(const test_dirent64 &b) const {
+       return d_ino == b.d_ino &&
+              d_off == b.d_off &&
+              d_type == b.d_type &&
+              d_name == b.d_name;
+    }
+};
+
+// This code is loosely based on the example found under 
https://man7.org/linux/man-pages/man2/getdents.2.html
+void test_getdents64(const char *dir_path, size_t buf_size, 
std::vector<test_dirent64> &dirents) {
+    struct linux_dirent64 {
+        unsigned long  d_ino;
+        off_t          d_off;
+        unsigned short d_reclen;
+        unsigned char  d_type;
+        char           d_name[];
+    };
+
+    int fd = open(dir_path, O_RDONLY | O_DIRECTORY);
+    if (fd == -1)
+        handle_error("open");
+
+    std::unique_ptr<char []> buf_ptr(new char[buf_size]);
+    char *buf = buf_ptr.get();
+
+    for (;;) {
+        long nread = syscall(SYS_getdents64, fd, buf, buf_size);
+        if (nread == -1)
+            handle_error("getdents64");
+
+        if (nread == 0)
+            break;
+
+        printf("--------------- nread=%ld ---------------\n", nread);
+        printf("inode#    file type  d_reclen  d_off   d_name\n");
+        for (long bpos = 0; bpos < nread;) {
+            auto *d = (struct linux_dirent64 *) (buf + bpos);
+            printf("%8ld  ", d->d_ino);
+
+            char d_type = d->d_type;
+            printf("%-10s ", (d_type == DT_REG) ?  "regular" :
+                             (d_type == DT_DIR) ?  "directory" :
+                             (d_type == DT_FIFO) ? "FIFO" :
+                             (d_type == DT_SOCK) ? "socket" :
+                             (d_type == DT_LNK) ?  "symlink" :
+                             (d_type == DT_BLK) ?  "block dev" :
+                             (d_type == DT_CHR) ?  "char dev" : "???");
+
+            printf("%4d %10jd   %s\n", d->d_reclen,
+                    (intmax_t) d->d_off, d->d_name);
+            bpos += d->d_reclen;
+
+            test_dirent64 dirent;
+            dirent.d_ino = d->d_ino;
+            dirent.d_off = d->d_off;
+            dirent.d_type = d_type;
+            dirent.d_name = d->d_name;
+            dirents.push_back(dirent);
+        }
+    }
+
+    close(fd);
+}
+
+#define LARGE_BUF_SIZE 1024
+#define SMALL_BUF_SIZE 128
+
+int main()
+{
+    // Verify that getdents64 works correctly against /proc directory and 
yields
+    // correct results
+    std::vector<test_dirent64> dirents_1;
+    test_getdents64("/proc", LARGE_BUF_SIZE, dirents_1);
+
+    assert(std::count_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_type == DT_REG; }) >= 3);
+    assert(std::count_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_type == DT_DIR; }) >= 5);
+
+    assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_name == ".."; }) != dirents_1.end());
+    assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_name == "cpuinfo"; }) != dirents_1.end());
+    assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_name == "sys"; }) != dirents_1.end());
+
+    // Verify that getdents64 works with smaller buffer and yields same 
results as above
+    std::vector<test_dirent64> dirents_2;
+    test_getdents64("/proc", SMALL_BUF_SIZE, dirents_2);
+
+    assert(std::equal(dirents_1.begin(), dirents_1.end(), dirents_2.begin()));
+}
-- 
2.34.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220520193155.146856-1-jwkozaczuk%40gmail.com.

Reply via email to