This patch mostly addresses the issue #854. In essence it changes malloc_large()
to use mmu::map_anon() if requested memory is greater than size of "huge page"
and it does not have be contiguous in physical memory. Also it makes it fallback
to map_anon() based allocation for requests > 4K and < 2MB that cannot be 
satisfied
when memory is heavily fragmented.

It is supposed to help OSv memory allocation behave better when memory
in free_page_ranges is heavily fragmented and large allocations (>=2MB)
cannot be satisfied with single contiguous page range. When working on
this patch it has also become clear, that applications that heavily use 
realloc()
which allocates new chunks of memory and frees old ones, might very much 
encounter
heavy fragmentation of memory. Hopefully this patch is going to help
in these scenarios.

It just happens that object_size() and large_object_size() needed to be 
adjusted to work with new malloc_large() implementation. It was discovered
that therefore other related issues are fixed by this patch as well.

Lastly this patch adds new test verifying realloc() and malloc_usable_size().

Fixes #784
Fixes #854
Fixes #1077

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 core/mempool.cc        | 81 ++++++++++++++++++++++++++++++------
 modules/tests/Makefile |  2 +-
 tests/tst-realloc.cc   | 94 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 14 deletions(-)
 create mode 100644 tests/tst-realloc.cc

diff --git a/core/mempool.cc b/core/mempool.cc
index 11fd1456..50f5e877 100644
--- a/core/mempool.cc
+++ b/core/mempool.cc
@@ -546,7 +546,7 @@ public:
     page_range_allocator() : _deferred_free(nullptr) { }
 
     template<bool UseBitmap = true>
-    page_range* alloc(size_t size);
+    page_range* alloc(size_t size, bool contiguous = true);
     page_range* alloc_aligned(size_t size, size_t offset, size_t alignment,
                               bool fill = false);
     void free(page_range* pr);
@@ -678,7 +678,7 @@ void 
page_range_allocator::bitmap_allocator<T>::deallocate(T* p, size_t n)
 }
 
 template<bool UseBitmap>
-page_range* page_range_allocator::alloc(size_t size)
+page_range* page_range_allocator::alloc(size_t size, bool contiguous)
 {
     auto exact_order = ilog2_roundup(size / page_size);
     if (exact_order > max_order) {
@@ -692,13 +692,12 @@ page_range* page_range_allocator::alloc(size_t size)
 
     page_range* range = nullptr;
     if (!bitset) {
-        if (!exact_order || _free[exact_order - 1].empty()) {
+        if (!contiguous || !exact_order || _free[exact_order - 1].empty()) {
             return nullptr;
         }
-        // TODO: This linear search makes worst case complexity of the 
allocator
-        // O(n). It would be better to fall back to non-contiguous allocation
-        // and make worst case complexity depend on the size of requested 
memory
-        // block and the logarithm of the number of free huge page ranges.
+        // This linear search makes worst case complexity of the allocator
+        // O(n). Unfortunately we do not have choice for contiguous allocation
+        // so let us hope there is large enough range.
         for (auto&& pr : _free[exact_order - 1]) {
             if (pr.size >= size) {
                 range = &pr;
@@ -823,7 +822,23 @@ void page_range_allocator::for_each(unsigned min_order, 
Func f)
     }
 }
 
-static void* malloc_large(size_t size, size_t alignment, bool block = true)
+static void* mapped_malloc_large(size_t size, size_t offset)
+{
+    //TODO: For now pre-populate the memory, in future consider doing lazy 
population
+    void* obj = mmu::map_anon(nullptr, size, mmu::mmap_populate, 
mmu::perm_read | mmu::perm_write);
+    size_t* ret_header = static_cast<size_t*>(obj);
+    *ret_header = size;
+    return obj + offset;
+}
+
+static void mapped_free_large(void *object)
+{
+    object = align_down(object - 1, mmu::page_size);
+    size_t* ret_header = static_cast<size_t*>(object);
+    mmu::munmap(object, *ret_header);
+}
+
+static void* malloc_large(size_t size, size_t alignment, bool block = true, 
bool contiguous = true)
 {
     auto requested_size = size;
     size_t offset;
@@ -835,6 +850,14 @@ static void* malloc_large(size_t size, size_t alignment, 
bool block = true)
     size += offset;
     size = align_up(size, page_size);
 
+    // Use mmap if requested memory greater than "huge page" size
+    // and does not need to be contiguous
+    if (size >= mmu::huge_page_size && !contiguous) {
+        void* obj = mapped_malloc_large(size, offset);
+        trace_memory_malloc_large(obj, requested_size, size, alignment);
+        return obj;
+    }
+
     while (true) {
         WITH_LOCK(free_page_ranges_lock) {
             reclaimer_thread.wait_for_minimum_memory();
@@ -842,7 +865,7 @@ static void* malloc_large(size_t size, size_t alignment, 
bool block = true)
             if (alignment > page_size) {
                 ret_header = free_page_ranges.alloc_aligned(size, page_size, 
alignment);
             } else {
-                ret_header = free_page_ranges.alloc(size);
+                ret_header = free_page_ranges.alloc(size, contiguous);
             }
             if (ret_header) {
                 on_alloc(size);
@@ -850,6 +873,11 @@ static void* malloc_large(size_t size, size_t alignment, 
bool block = true)
                 obj += offset;
                 trace_memory_malloc_large(obj, requested_size, size, 
alignment);
                 return obj;
+            } else if (!contiguous) {
+                // If we failed to get contiguous memory allocation and
+                // the caller does not require one let us use map-based 
allocation
+                // which we do after the loop below
+                break;
             }
             if (block)
                 reclaimer_thread.wait_for_memory(size);
@@ -857,6 +885,15 @@ static void* malloc_large(size_t size, size_t alignment, 
bool block = true)
                 return nullptr;
         }
     }
+
+    // We are deliberately executing this code here because doing it
+    // in WITH_LOCK section above, would likely lead to a deadlock,
+    // as map_anon() eventually would be pulling memory from free_page_ranges
+    // to satisfy the request and even worse this method might get
+    // called recursively.
+    void* obj = mapped_malloc_large(size, offset);
+    trace_memory_malloc_large(obj, requested_size, size, alignment);
+    return obj;
 }
 
 void shrinker::deactivate_shrinker()
@@ -1070,11 +1107,18 @@ static void free_large(void* obj)
     free_page_range(static_cast<page_range*>(obj));
 }
 
-static unsigned large_object_size(void *obj)
+static size_t large_object_offset(void *&obj)
 {
+    void *original_obj = obj;
     obj = align_down(obj - 1, page_size);
+    return reinterpret_cast<uint64_t>(original_obj) - 
reinterpret_cast<uint64_t>(obj);
+}
+
+static size_t large_object_size(void *obj)
+{
+    size_t offset = large_object_offset(obj);
     auto header = static_cast<page_range*>(obj);
-    return header->size;
+    return header->size - offset;
 }
 
 namespace page_pool {
@@ -1692,7 +1736,7 @@ static inline void* std_malloc(size_t size, size_t 
alignment)
                                        memory::alloc_page());
         trace_memory_malloc_page(ret, size, mmu::page_size, alignment);
     } else {
-        ret = memory::malloc_large(size, alignment);
+        ret = memory::malloc_large(size, alignment, true, false);
     }
     memory::tracker_remember(ret, size);
     return ret;
@@ -1714,6 +1758,12 @@ void* calloc(size_t nmemb, size_t size)
 
 static size_t object_size(void *object)
 {
+    if (!mmu::is_linear_mapped(object, 0)) {
+        size_t offset = memory::large_object_offset(object);
+        size_t* ret_header = static_cast<size_t*>(object);
+        return *ret_header - offset;
+    }
+
     switch (mmu::get_mem_area(object)) {
     case mmu::mem_area::main:
         return memory::large_object_size(object);
@@ -1763,6 +1813,11 @@ void free(void* object)
         return;
     }
     memory::tracker_forget(object);
+
+    if (!mmu::is_linear_mapped(object, 0)) {
+        return memory::mapped_free_large(object);
+    }
+
     switch (mmu::get_mem_area(object)) {
     case mmu::mem_area::page:
         object = mmu::translate_mem_area(mmu::mem_area::page,
@@ -1972,7 +2027,7 @@ void* alloc_phys_contiguous_aligned(size_t size, size_t 
align, bool block)
     assert(is_power_of_two(align));
     // make use of the standard large allocator returning properly aligned
     // physically contiguous memory:
-    auto ret = malloc_large(size, align, block);
+    auto ret = malloc_large(size, align, block, true);
     assert (!(reinterpret_cast<uintptr_t>(ret) & (align - 1)));
     return ret;
 }
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index ce004339..10df022f 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -129,7 +129,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
        tst-sigaltstack.so tst-fread.so tst-tcp-cork.so tst-tcp-v6.so \
        tst-calloc.so tst-crypt.so tst-non-fpic.so tst-small-malloc.so \
        tst-mmx-fpu.so tst-getopt.so tst-getopt-pie.so tst-non-pie.so 
tst-semaphore.so \
-       tst-elf-init.so
+       tst-elf-init.so tst-realloc.so
 #      libstatic-thread-variable.so tst-static-thread-variable.so \
 
 tests += testrunner.so
diff --git a/tests/tst-realloc.cc b/tests/tst-realloc.cc
new file mode 100644
index 00000000..49edfb7a
--- /dev/null
+++ b/tests/tst-realloc.cc
@@ -0,0 +1,94 @@
+/*
+* Copyright (C) 2020 Waldemar Kozaczuk
+*
+* This work is open source software, licensed under the terms of the
+* BSD license as described in the LICENSE file in the top-level directory.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <cassert>
+#include <iostream>
+
+extern "C" size_t malloc_usable_size (void *ptr);
+
+static void test_realloc(size_t original_size, size_t new_size)
+{
+    char data[11] = "0123456789";
+
+    void *original_buf = malloc(original_size);
+    assert(original_buf);
+
+    char *buf = static_cast<char*>(original_buf);
+    for (size_t i = 0; i < original_size; i++) {
+        buf[i] = data[i % 10];
+    }
+
+    void *new_buf = realloc(original_buf, new_size);
+    assert(new_buf);
+
+    auto expected_same_data_len = std::min(original_size, new_size);
+    buf = static_cast<char*>(new_buf);
+    for (size_t i = 0; i < expected_same_data_len; i++) {
+        assert(buf[i] == data[i % 10]);
+    }
+
+    free(new_buf);
+
+    std::cerr << "PASSED realloc() for original_size: " << original_size << ", 
new_size: " << new_size << std::endl;
+}
+
+static void test_usable_size(size_t size, size_t expected_usable_size)
+{
+    void* ptr = malloc(size);
+    assert(expected_usable_size == malloc_usable_size(ptr));
+    free(ptr);
+
+    std::cerr << "PASSED malloc_usable_size() for size: " << size << std::endl;
+}
+
+int main()
+{
+    test_realloc(1,2);
+    test_realloc(2,1);
+
+    test_realloc(4,7);
+    test_realloc(7,4);
+
+    test_realloc(63,128);
+    test_realloc(128,63);
+
+    test_realloc(4000,5000);
+    test_realloc(5000,4000);
+
+    test_realloc(4096,4096);
+
+    test_realloc(0x100000,0x100000);
+    test_realloc(0x100000,0x100900);
+    test_realloc(0x100900,0x100000);
+
+    test_realloc(0x200000,0x200000);
+    test_realloc(0x200000,0x300900);
+    test_realloc(0x300900,0x200000);
+
+    test_realloc(0x600900,0x600000);
+    test_realloc(0x400000,0x600000);
+    test_realloc(0x600000,0x400900);
+
+    void *buf = realloc(nullptr, 0);
+    assert(buf);
+    free(buf);
+
+    buf = malloc(16);
+    assert(!realloc(buf, 0));
+
+    test_usable_size(1, 8);
+    test_usable_size(8, 8);
+    test_usable_size(67, 128);
+    test_usable_size(0x4010, 0x4FC0);
+    test_usable_size(0x100000, 0x100FC0);
+    test_usable_size(0x200000, 0x200FC0);
+
+    std::cerr << "PASSED\n";
+    return 0;
+}
\ No newline at end of file
-- 
2.20.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20200329165640.23961-2-jwkozaczuk%40gmail.com.

Reply via email to