It looks like the golang apps that need to iterate over entries
in a directory use a system call getdents64 which is documented
in https://man7.org/linux/man-pages/man2/getdents.2.html. Normally
this functionality is provided by the libc functions like opendir(),
readdir(), etc which actually do delegate to getdents64. Go is known
of bypassing libc in such cases.

So this patch implements the syscall getdents64 by adding a utility
function to VFS main.cc that is then called by syscall in linux.cc.
For details of how this function works please look at the comments.

This patch also adds a unit test to verify this syscall works.

Refs #1188

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 fs/vfs/main.cc         |  68 +++++++++++++++++++++++++
 linux.cc               |   4 ++
 modules/tests/Makefile |   2 +-
 tests/tst-getdents.cc  | 111 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 tests/tst-getdents.cc

diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
index 1b0d7c11..f5db6be0 100644
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -668,6 +668,10 @@ struct __dirstream
     int fd;
 };
 
+void _delete_dir(DIR *dir) {
+    delete dir;
+}
+
 OSV_LIBC_API
 DIR *opendir(const char *path)
 {
@@ -775,6 +779,70 @@ int readdir64_r(DIR *dir, struct dirent64 *entry,
 extern "C" OSV_LIBC_API
 struct dirent *readdir64(DIR *dir) __attribute__((alias("readdir")));
 
+struct linux_dirent64 {
+    u64            d_ino;
+    s64            d_off;
+    unsigned short d_reclen;
+    unsigned char  d_type;
+    char           d_name[];
+};
+
+extern "C"
+ssize_t __get_dents_64(int fd, void *dirp, size_t count)
+{
+    auto *dir = fdopendir(fd);
+    if (dir) {
+        // We have verified that fd points to a valid directory
+        // but we do NOT need the DIR handle so just delete it
+        delete dir;
+
+        struct file *fp;
+        int error = fget(fd, &fp);
+        if (error) {
+            errno = error;
+            return -1;
+        }
+
+        size_t bytes_read = 0;
+        off_t last_off = -1;
+        errno = 0;
+
+        // Iterate over as many entries as there is space in the buffer
+        // by directly calling sys_readdir()
+        struct dirent entry;
+        while ((error = sys_readdir(fp, &entry)) == 0) {
+            auto rec_len = offsetof(linux_dirent64, d_name) + 
strlen(entry.d_name) + 1;
+            if (rec_len <= count) {
+                auto *ldirent = static_cast<linux_dirent64*>(dirp + 
bytes_read);
+                ldirent->d_ino = entry.d_ino;
+                ldirent->d_off = entry.d_off;
+                ldirent->d_type = entry.d_type;
+                strcpy(ldirent->d_name, entry.d_name);
+                ldirent->d_reclen = rec_len;
+                count -= rec_len;
+                bytes_read += rec_len;
+                last_off = entry.d_off;
+            } else {
+                if (last_off >= 0)
+                    sys_seekdir(fp, last_off);
+                break;
+            }
+        }
+
+        fdrop(fp);
+
+        if (error && error != ENOENT) {
+            errno = error;
+            return -1;
+        } else {
+            errno = 0;
+            return bytes_read;
+        }
+    } else {
+        return -1;
+    }
+}
+
 OSV_LIBC_API
 void rewinddir(DIR *dirp)
 {
diff --git a/linux.cc b/linux.cc
index c9b6b7b6..235ba1cf 100644
--- a/linux.cc
+++ b/linux.cc
@@ -410,6 +410,9 @@ static int tgkill(int tgid, int tid, int sig)
     return -1;
 }
 
+#define __NR___get_dents_64 __NR_getdents64
+extern "C" ssize_t __get_dents_64(int fd, void *dirp, size_t count);
+
 OSV_LIBC_API long syscall(long number, ...)
 {
     // Save FPU state and restore it at the end of this function
@@ -495,6 +498,7 @@ OSV_LIBC_API long syscall(long number, ...)
     SYSCALL0(getuid);
     SYSCALL3(lseek, int, off_t, int);
     SYSCALL2(statfs, const char *, struct statfs *);
+    SYSCALL3(__get_dents_64, int, void *, size_t);
     }
 
     debug_always("syscall(): unimplemented system call %d\n", number);
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index ca489341..e462ebc8 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -133,7 +133,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
        tst-getopt.so tst-getopt-pie.so tst-non-pie.so tst-semaphore.so \
        tst-elf-init.so tst-realloc.so tst-setjmp.so \
        libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
-       tst-sigaction.so tst-syscall.so tst-ifaddrs.so
+       tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so
 #      libstatic-thread-variable.so tst-static-thread-variable.so \
 
 #TODO For now let us disable these tests for aarch64 until
diff --git a/tests/tst-getdents.cc b/tests/tst-getdents.cc
new file mode 100644
index 00000000..5803aaeb
--- /dev/null
+++ b/tests/tst-getdents.cc
@@ -0,0 +1,111 @@
+#include <dirent.h>     /* Defines DT_* constants */
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <cassert>
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#define handle_error(msg) \
+    do { perror(msg); exit(EXIT_FAILURE); } while (0)
+
+struct test_dirent64 {
+    unsigned long  d_ino;
+    off_t          d_off;
+    unsigned char  d_type;
+    std::string    d_name;
+
+    bool operator ==(const test_dirent64 &b) const {
+       return d_ino == b.d_ino &&
+              d_off == b.d_off &&
+              d_type == b.d_type &&
+              d_name == b.d_name;
+    }
+};
+
+// This code is loosely based on the example found under 
https://man7.org/linux/man-pages/man2/getdents.2.html
+void test_getdents64(const char *dir_path, size_t buf_size, 
std::vector<test_dirent64> &dirents) {
+    struct linux_dirent64 {
+        unsigned long  d_ino;
+        off_t          d_off;
+        unsigned short d_reclen;
+        unsigned char  d_type;
+        char           d_name[];
+    };
+
+    int fd = open(dir_path, O_RDONLY | O_DIRECTORY);
+    if (fd == -1)
+        handle_error("open");
+
+    std::unique_ptr<char []> buf_ptr(new char[buf_size]);
+    char *buf = buf_ptr.get();
+
+    for (;;) {
+        long nread = syscall(SYS_getdents64, fd, buf, buf_size);
+        if (nread == -1)
+            handle_error("getdents64");
+
+        if (nread == 0)
+            break;
+
+        printf("--------------- nread=%ld ---------------\n", nread);
+        printf("inode#    file type  d_reclen  d_off   d_name\n");
+        for (long bpos = 0; bpos < nread;) {
+            auto *d = (struct linux_dirent64 *) (buf + bpos);
+            printf("%8ld  ", d->d_ino);
+
+            char d_type = d->d_type;
+            printf("%-10s ", (d_type == DT_REG) ?  "regular" :
+                             (d_type == DT_DIR) ?  "directory" :
+                             (d_type == DT_FIFO) ? "FIFO" :
+                             (d_type == DT_SOCK) ? "socket" :
+                             (d_type == DT_LNK) ?  "symlink" :
+                             (d_type == DT_BLK) ?  "block dev" :
+                             (d_type == DT_CHR) ?  "char dev" : "???");
+
+            printf("%4d %10jd   %s\n", d->d_reclen,
+                    (intmax_t) d->d_off, d->d_name);
+            bpos += d->d_reclen;
+
+            test_dirent64 dirent;
+            dirent.d_ino = d->d_ino;
+            dirent.d_off = d->d_off;
+            dirent.d_type = d_type;
+            dirent.d_name = d->d_name;
+            dirents.push_back(dirent);
+        }
+    }
+
+    close(fd);
+}
+
+#define LARGE_BUF_SIZE 1024
+#define SMALL_BUF_SIZE 128
+
+int main()
+{
+    // Verify that getdents64 works correctly against /proc directory and 
yields
+    // correct results
+    std::vector<test_dirent64> dirents_1;
+    test_getdents64("/proc", LARGE_BUF_SIZE, dirents_1);
+
+    assert(std::count_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_type == DT_REG; }) >= 3);
+    assert(std::count_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_type == DT_DIR; }) >= 5);
+
+    assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_name == ".."; }) != dirents_1.end());
+    assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_name == "cpuinfo"; }) != dirents_1.end());
+    assert(std::find_if(dirents_1.begin(), dirents_1.end(), [](test_dirent64 
d) { return d.d_name == "sys"; }) != dirents_1.end());
+
+    // Verify that getdents64 works with smaller buffer and yields same 
results as above
+    std::vector<test_dirent64> dirents_2;
+    test_getdents64("/proc", SMALL_BUF_SIZE, dirents_2);
+
+    assert(std::equal(dirents_1.begin(), dirents_1.end(), dirents_2.begin()));
+}
-- 
2.34.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220519011810.107902-1-jwkozaczuk%40gmail.com.

Reply via email to