From: Waldemar Kozaczuk <jwkozac...@gmail.com>
Committer: Waldemar Kozaczuk <jwkozac...@gmail.com>
Branch: master

rofs: optimize memory utilization by integrating with page cache

This patch optimizes memory utilization by integrating with page cache.
In essence it eliminates second copy of file data in memory when mapping files 
using mmap().
For example simple java example need 9MB less to run.

The crux of the changes involves adding new vnops function of type VOP_CACHE -
rofs_map_cached_page() - that ensures that requested page of a file is loaded
from disk into ROFS cache (by triggering read from disk if missing) and
eventually registers the page into pagecache by calling 
pagecache::map_read_cached_page().

This partially addresses #979

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>

---
diff --git a/fs/rofs/rofs.hh b/fs/rofs/rofs.hh
--- a/fs/rofs/rofs.hh
+++ b/fs/rofs/rofs.hh
@@ -128,6 +128,8 @@ struct rofs_info {
 namespace rofs {
     int
     cache_read(struct rofs_inode *inode, struct device *device, struct 
rofs_super_block *sb, struct uio *uio);
+    int
+    cache_get_page_address(struct rofs_inode *inode, struct device *device, 
struct rofs_super_block *sb, struct uio *uio, void **addr);
 }
 
 int rofs_read_blocks(struct device *device, uint64_t starting_block, uint64_t 
blocks_count, void* buf);
diff --git a/fs/rofs/rofs_cache.cc b/fs/rofs/rofs_cache.cc
--- a/fs/rofs/rofs_cache.cc
+++ b/fs/rofs/rofs_cache.cc
@@ -10,8 +10,10 @@
 #include <list>
 #include <unordered_map>
 #include <include/osv/uio.h>
+#include <include/osv/contiguous_alloc.hh>
 #include <osv/debug.h>
 #include <osv/sched.hh>
+#include <sys/mman.h>
 
 /*
  * From cache perspective let us divide each file into sequence of contiguous 
32K segments.
@@ -56,20 +58,36 @@ class file_cache_segment {
         this->starting_block = _starting_block;
         this->block_count = _block_count;
         this->data_ready = false;   // Data has to be loaded from disk
-        this->data = malloc(_cache->sb->block_size * _block_count);
+        auto size = _cache->sb->block_size * _block_count;
+        // Only allocate contiguous page-aligned memory if size greater or 
equal a page
+        // to make sure page-cache mapping works properly
+        if (size >= mmu::page_size) {
+            this->data = memory::alloc_phys_contiguous_aligned(size, 
mmu::page_size);
+        } else {
+            this->data = malloc(size);
+        }
 #if defined(ROFS_DIAGNOSTICS_ENABLED)
         rofs_block_allocated += block_count;
 #endif
     }
 
     ~file_cache_segment() {
-        free(this->data);
+        auto size = this->cache->sb->block_size * this->block_count;
+        if (size >= mmu::page_size) {
+            memory::free_phys_contiguous_aligned(this->data);
+        } else {
+            free(this->data);
+        }
     }
 
     uint64_t length() {
         return this->block_count * this->cache->sb->block_size;
     }
 
+    void* memory_address(off_t offset) {
+        return this->data + offset;
+    }
+
     bool is_data_ready() {
         return this->data_ready;
     }
@@ -93,12 +111,16 @@ class file_cache_segment {
             blocks_remaining++;
         }
         auto block_count_to_read = std::min(block_count, blocks_remaining);
-        print("[rofs] [%d] -> file_cache_segment::write() i-node: %d, starting 
block %d, reading [%d] blocks at disk offset [%d]\n",
+        print("[rofs] [%d] -> file_cache_segment::read_from_disk() i-node: %d, 
starting block %d, reading [%d] blocks at disk offset [%d]\n",
               sched::thread::current()->id(), cache->inode->inode_no, 
starting_block, block_count_to_read, block);
         auto error = rofs_read_blocks(device, block, block_count_to_read, 
data);
         this->data_ready = (error == 0);
         if (error) {
-            print("!!!!! Error reading from disk\n");
+            printf("!!!!! Error reading from disk\n");
+        } else {
+            if (bytes_remaining < this->length()) {
+                memset(data + bytes_remaining, 0, this->length() - 
bytes_remaining);
+            }
         }
         return error;
     }
@@ -190,8 +212,8 @@ plan_cache_transactions(struct file_cache *cache, struct 
uio *uio) {
             bytes_to_read -= transaction.bytes_to_read;
             transactions.push_back(transaction);
         }
-            //
-            // Miss -> read from disk
+        //
+        // Miss -> read from disk
         else {
             print("[rofs] [%d] -> rofs_cache_get_segment_operations i-node: 
%d, cache segment %d MISS at file offset %d\n",
                   sched::thread::current()->id(), cache->inode->inode_no, 
cache_segment_index, file_offset);
@@ -271,4 +293,43 @@ cache_read(struct rofs_inode *inode, struct device 
*device, struct rofs_super_bl
     return error;
 }
 
+// Ensure a page (4096 bytes) of a file specified by offset is in memory in 
cache. Otherwise
+// load it from disk and eventually return address of the page in memory.
+int
+cache_get_page_address(struct rofs_inode *inode, struct device *device, struct 
rofs_super_block *sb, struct uio *uio, void **addr)
+{
+    // Find existing one or create new file cache
+    struct file_cache *cache = get_or_create_file_cache(inode, sb);
+
+    //
+    // Prepare a cache transaction (copy from memory
+    // or read from disk into cache memory and then copy into memory)
+    auto segment_transactions = plan_cache_transactions(cache, uio);
+    print("[rofs] [%d] rofs_get_page_address called for i-node [%d] at %d with 
%d ops\n",
+          sched::thread::current()->id(), inode->inode_no, offset, 
segment_transactions.size());
+
+    int error = 0;
+
+    assert(segment_transactions.size() == 1);
+    auto transaction = segment_transactions[0];
+#if defined(ROFS_DIAGNOSTICS_ENABLED)
+    rofs_cache_reads += 1;
+#endif
+    if (transaction.transaction_type == CacheTransactionType::READ_FROM_DISK) {
+        // Read from disk into segment missing in cache or empty segment that 
was in cache but had not data because
+        // of failure to read
+        error = transaction.segment->read_from_disk(device);
+#if defined(ROFS_DIAGNOSTICS_ENABLED)
+        rofs_cache_misses += 1;
+#endif
+   }
+
+   if( !error)
+       *addr = transaction.segment->memory_address(transaction.segment_offset);
+   else
+       *addr = nullptr;
+
+   return error;
+}
+
 }
diff --git a/fs/rofs/rofs_vnops.cc b/fs/rofs/rofs_vnops.cc
--- a/fs/rofs/rofs_vnops.cc
+++ b/fs/rofs/rofs_vnops.cc
@@ -40,6 +40,7 @@
 #include <sys/types.h>
 #include <osv/device.h>
 #include <osv/sched.hh>
+#include <osv/pagecache.hh>
 
 #include "rofs.hh"
 
@@ -275,6 +276,41 @@ static int rofs_getattr(struct vnode *vnode, struct vattr 
*attr)
     return 0;
 }
 
+int rofs_map_cached_page(struct vnode *vnode, struct file* fp, struct uio 
*uio) {
+    struct rofs_info *rofs = (struct rofs_info *) vnode->v_mount->m_data;
+    struct rofs_super_block *sb = rofs->sb;
+    struct rofs_inode *inode = (struct rofs_inode *) vnode->v_data;
+    struct device *device = vnode->v_mount->m_dev;
+
+    if (vnode->v_type == VDIR)
+        return EISDIR;
+    /* Cant read anything but reg */
+    if (vnode->v_type != VREG)
+        return EINVAL;
+    /* Cant start reading before the first byte */
+    if (uio->uio_offset < 0)
+        return EINVAL;
+    /* Cant read after the end of the file */
+    if (uio->uio_offset >= (off_t)vnode->v_size)
+        return 0;
+    if (uio->uio_resid != mmu::page_size)
+        return EINVAL;
+    if (uio->uio_offset % mmu::page_size)
+        return EINVAL;
+
+    void *page_address;
+    int ret = rofs::cache_get_page_address(inode, device, sb, uio, 
&page_address);
+
+    if (!ret) {
+        
pagecache::map_read_cached_page((pagecache::hashkey*)uio->uio_iov->iov_base, 
page_address);
+        uio->uio_resid = 0;
+    } else {
+        abort("ROFS cache failed!");
+    }
+
+    return ret;
+}
+
 #define rofs_write       ((vnop_write_t)vop_erofs)
 #define rofs_seek        ((vnop_seek_t)vop_nullop)
 #define rofs_ioctl       ((vnop_ioctl_t)vop_nullop)
@@ -287,7 +323,6 @@ static int rofs_getattr(struct vnode *vnode, struct vattr 
*attr)
 #define rofs_inactive    ((vnop_inactive_t)vop_nullop)
 #define rofs_truncate    ((vnop_truncate_t)vop_erofs)
 #define rofs_link        ((vnop_link_t)vop_erofs)
-#define rofs_arc         ((vnop_cache_t) nullptr)
 #define rofs_fallocate   ((vnop_fallocate_t)vop_erofs)
 #define rofs_fsync       ((vnop_fsync_t)vop_nullop)
 #define rofs_symlink     ((vnop_symlink_t)vop_erofs)
@@ -312,12 +347,13 @@ struct vnops rofs_vnops = {
     rofs_inactive,           /* inactive */
     rofs_truncate,           /* truncate - returns error when called*/
     rofs_link,               /* link - returns error when called*/
-    rofs_arc,                /* arc */ //TODO: Implement to allow memory 
re-use when mapping files
+    rofs_map_cached_page,
     rofs_fallocate,          /* fallocate - returns error when called*/
     rofs_readlink,           /* read link */
     rofs_symlink             /* symbolic link - returns error when called*/
 };
 
 extern "C" void rofs_disable_cache() {
     rofs_vnops.vop_read = rofs_read_without_cache;
+    rofs_vnops.vop_cache = (vnop_cache_t) nullptr;
 }

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/000000000000bfe3fb05a484e629%40google.com.

Reply via email to