cyb70289 commented on pull request #11588:
URL: https://github.com/apache/arrow/pull/11588#issuecomment-958669125


   Thanks @niyue !
   I did a similar test to read randomly at most 1/4 pages of a memory mapped 
file with 1G size. Without `madvise`, almost all 1G data is in the page cache 
after program finishes.
   With `madvise(random)`, only 22% is in the page cache, as expected.
   Though I'm still not sure of the usefulness of tuning this option manually. 
In practice, I think it's pretty hard to get it right.
   
   **Test code:**
   ```c
   // test.c
   
   #include <stdio.h>
   #include <stdlib.h>
   #include <sys/stat.h>
   #include <sys/mman.h>
   #include <fcntl.h>
   
   const unsigned seed = 42;
   const int N = 4;
   
   // random read at most 1/N pages
   int test_random_read(const char *p, size_t sz)
   {
       srand(seed);
   
       int sum = 0;
       for (size_t i = 0; i < sz/4096/N; ++i) {
           double r = (double)rand() / RAND_MAX;
           r *= (sz - 2);
           sum += p[(size_t)r];
       }
       return sum;
   }
   
   int main(int argc, char *argv[])
   {
       // test.bin is filled with 1G ramdon data
       int fd = open("./test.bin", O_RDONLY);
       if (fd < 0) abort();
   
       struct stat statbuf;
       if (fstat(fd, &statbuf) < 0) abort();
       const size_t sz = statbuf.st_size;
   
       char *p = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
       if (p == MAP_FAILED) abort();
       printf("%d pages mapped\n", (int)(sz/4096));
   
       // in my test box (ubuntu20.04, linux 5.8, 16G ram, x86_64)
       // - with madvise, 22% (1/N) file is in page cache when program finishes
       // - without madvise, 98% file is in page cache
       if (argc == 1) {
           printf("with madvise(random)\n");
           if (posix_madvise(p, sz, POSIX_MADV_RANDOM) != 0) abort();
       } else {
           printf("without madvise\n");
       }
   
       int sum = test_random_read(p, sz);
   
       munmap(p, sz);
       return sum;
   }
   ```
   
   **Test steps:**
   
   - Build `pcstat` binary from https://github.com/tobert/pcstat, master branch.
   - Steps:
   ```bash
   # create 1G test file
   $ dd if=/dev/urandom of=test.bin bs=1M count=1K
   1024+0 records in
   1024+0 records out
   1073741824 bytes (1.1 GB, 1.0 GiB) copied, 15.5135 s, 69.2 MB/s
   $ sudo sync
   
   # make sure test.bin is not in page cache
   $ sudo sh -c 'echo 1 > /proc/sys/vm/drop_caches'
   $ pcstat test.bin
   +----------+----------------+------------+-----------+---------+
   | Name     | Size (bytes)   | Pages      | Cached    | Percent |
   |----------+----------------+------------+-----------+---------|
   | test.bin | 1073741824     | 262144     | 0         |   0.000 |
   +----------+----------------+------------+-----------+---------+
   
   # evaluate test program with madvise(random)
   $ gcc -O3 test.c && ./a.out
   262144 pages mapped
   with madvise(random)
   $ pcstat test.bin
   +----------+----------------+------------+-----------+---------+
   | Name     | Size (bytes)   | Pages      | Cached    | Percent |
   |----------+----------------+------------+-----------+---------|
   | test.bin | 1073741824     | 262144     | 57943     |  22.104 |
   +----------+----------------+------------+-----------+---------+
   
   # make sure test.bin is not in page cache
   $ sudo sh -c 'echo 1 > /proc/sys/vm/drop_caches'
   $ pcstat test.bin
   +----------+----------------+------------+-----------+---------+
   | Name     | Size (bytes)   | Pages      | Cached    | Percent |
   |----------+----------------+------------+-----------+---------|
   | test.bin | 1073741824     | 262144     | 0         |   0.000 |
   +----------+----------------+------------+-----------+---------+
   
   # evaluate test program without madvise
   $ gcc -O3 test.c && ./a.out nomadvise
   262144 pages mapped
   without madvise
   $ pcstat test.bin
   +----------+----------------+------------+-----------+---------+
   | Name     | Size (bytes)   | Pages      | Cached    | Percent |
   |----------+----------------+------------+-----------+---------|
   | test.bin | 1073741824     | 262144     | 258625    |  98.658 |
   +----------+----------------+------------+-----------+---------+
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to