This patch enhances virtual memory mapping code to track both linear (linear_map) and non-linear (mmap()) virtual memory mappings.
It does so by adding simple struct - vma_range and vma_range_set collection to store all mappings. It then modifies mmu::find_hole() implementation to use vma_range_set instead of vma_list to find next hole. That way it can avoid collisions described in the issue #1135. Fixes #1135 Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com> --- core/mmu.cc | 76 +++++++++++++++++++++++++++++++++++++++++---- include/osv/mmu.hh | 31 ++++++++++++++++++ include/osv/prio.hh | 1 + 3 files changed, 102 insertions(+), 6 deletions(-) diff --git a/core/mmu.cc b/core/mmu.cc index a479e5c4..80ce6b63 100644 --- a/core/mmu.cc +++ b/core/mmu.cc @@ -47,6 +47,17 @@ extern const char text_start[], text_end[]; namespace mmu { +struct vma_range_compare { + bool operator()(const vma_range& a, const vma_range& b) { + return a.start() < b.start(); + } +}; + +//Set of all vma ranges - both linear and non-linear ones +__attribute__((init_priority((int)init_prio::vma_range_set))) +std::set<vma_range, vma_range_compare> vma_range_set; +rwlock_t vma_range_set_mutex; + struct linear_vma_compare { bool operator()(const linear_vma* a, const linear_vma* b) { return a->_virt_addr < b->_virt_addr; @@ -66,6 +77,9 @@ public: } }; +constexpr uintptr_t lower_vma_limit = 0x0; +constexpr uintptr_t upper_vma_limit = 0x800000000000; + typedef boost::intrusive::set<vma, bi::compare<vma_compare>, bi::member_hook<vma, @@ -78,9 +92,15 @@ struct vma_list_type : vma_list_base { vma_list_type() { // insert markers for the edges of allocatable area // simplifies searches - insert(*new anon_vma(addr_range(0, 0), 0, 0)); - uintptr_t e = 0x800000000000; - insert(*new anon_vma(addr_range(e, e), 0, 0)); + auto lower_edge = new anon_vma(addr_range(lower_vma_limit, lower_vma_limit), 0, 0); + insert(*lower_edge); + auto upper_edge = new anon_vma(addr_range(upper_vma_limit, upper_vma_limit), 0, 0); + insert(*upper_edge); + + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.insert(vma_range(lower_edge)); + vma_range_set.insert(vma_range(upper_edge)); + } } }; @@ -954,27 +974,41 @@ static error protect(const void *addr, size_t size, unsigned int perm) return no_error(); } +class vma_range_addr_compare { +public: + bool operator()(const vma_range& x, uintptr_t y) const { return x.start() < y; } + bool operator()(uintptr_t x, const vma_range& y) const { return x < y.start(); } +}; + uintptr_t find_hole(uintptr_t start, uintptr_t size) { bool small = size < huge_page_size; uintptr_t good_enough = 0; - // FIXME: use lower_bound or something - auto p = vma_list.begin(); + SCOPE_LOCK(vma_range_set_mutex.for_read()); + //Find first vma range which starts before the start parameter or is the 1st one + auto p = std::lower_bound(vma_range_set.begin(), vma_range_set.end(), start, vma_range_addr_compare()); + if (p != vma_range_set.begin()) { + --p; + } auto n = std::next(p); - while (n != vma_list.end()) { + while (n->start() <= upper_vma_limit) { //we only go up to the upper mmap vma limit + //See if desired hole fits between p and n vmas if (start >= p->end() && start + size <= n->start()) { return start; } + //See if shifting start to the end of p makes desired hole fit between p and n if (p->end() >= start && n->start() - p->end() >= size) { good_enough = p->end(); if (small) { return good_enough; } + //See if huge hole fits between p and n if (n->start() - align_up(good_enough, huge_page_size) >= size) { return align_up(good_enough, huge_page_size); } } + //If nothing worked move next in the list p = n; ++n; } @@ -999,6 +1033,9 @@ ulong evacuate(uintptr_t start, uintptr_t end) memory::stats::on_jvm_heap_free(size); } vma_list.erase(dead); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.erase(vma_range(&dead)); + } delete &dead; } } @@ -1140,6 +1177,9 @@ uintptr_t allocate(vma *v, uintptr_t start, size_t size, bool search) v->set(start, start+size); vma_list.insert(*v); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.insert(vma_range(v)); + } return start; } @@ -1493,6 +1533,9 @@ void anon_vma::split(uintptr_t edge) vma* n = new anon_vma(addr_range(edge, _range.end()), _perm, _flags); set(_range.start(), edge); vma_list.insert(*n); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.insert(vma_range(n)); + } } error anon_vma::sync(uintptr_t start, uintptr_t end) @@ -1600,6 +1643,9 @@ jvm_balloon_vma::~jvm_balloon_vma() // for a dangling mapping representing a balloon that was already moved // out. vma_list.erase(*this); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.erase(vma_range(this)); + } assert(!(_real_flags & mmap_jvm_balloon)); mmu::map_anon(addr(), size(), _real_flags, _real_perm); @@ -1667,6 +1713,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, size_t align, balloon_ptr b) // Since we will change its position in the tree, for the sake of future // lookups we need to reinsert it. vma_list.erase(*jvma); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.erase(vma_range(jvma)); + } if (jvma->start() < start) { assert(jvma->partial() >= (jvma->end() - start)); jvma->set(jvma->start(), start); @@ -1675,11 +1724,17 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, size_t align, balloon_ptr b) jvma->set(end, jvma->end()); } vma_list.insert(*jvma); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.insert(vma_range(jvma)); + } } else { // Note how v and jvma are different. This is because this one, // we will delete. auto& v = *i--; vma_list.erase(v); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.erase(vma_range(&v)); + } // Finish the move. In practice, it will temporarily remap an // anon mapping here, but this should be rare. Let's not // complicate the code to optimize it. There are no @@ -1692,6 +1747,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, size_t align, balloon_ptr b) evacuate(start, start + size); vma_list.insert(*vma); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.insert(vma_range(vma)); + } return vma->size(); } return 0; @@ -1753,6 +1811,9 @@ void file_vma::split(uintptr_t edge) vma *n = _file->mmap(addr_range(edge, _range.end()), _flags, _perm, off).release(); set(_range.start(), edge); vma_list.insert(*n); + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.insert(vma_range(n)); + } } error file_vma::sync(uintptr_t start, uintptr_t end) @@ -1903,6 +1964,9 @@ void linear_map(void* _virt, phys addr, size_t size, const char* name, WITH_LOCK(linear_vma_set_mutex.for_write()) { linear_vma_set.insert(_vma); } + WITH_LOCK(vma_range_set_mutex.for_write()) { + vma_range_set.insert(vma_range(_vma)); + } } void free_initial_memory_range(uintptr_t addr, size_t size) diff --git a/include/osv/mmu.hh b/include/osv/mmu.hh index f4bdaa84..60ad4a89 100644 --- a/include/osv/mmu.hh +++ b/include/osv/mmu.hh @@ -90,6 +90,37 @@ public: boost::intrusive::set_member_hook<> _vma_list_hook; }; +struct vma_range { + const void* _vma; + bool _is_linear; + + vma_range(const linear_vma* v) { + _vma = v; + _is_linear = true; + } + + vma_range(const vma* v) { + _vma = v; + _is_linear = false; + } + + uintptr_t start() const { + if (_is_linear) { + return static_cast<const linear_vma*>(_vma)->v_start(); + } else { + return static_cast<const vma*>(_vma)->start(); + } + } + + uintptr_t end() const { + if (_is_linear) { + return static_cast<const linear_vma*>(_vma)->v_end(); + } else { + return static_cast<const vma*>(_vma)->end(); + } + } +}; + class anon_vma : public vma { public: anon_vma(addr_range range, unsigned perm, unsigned flags); diff --git a/include/osv/prio.hh b/include/osv/prio.hh index 23adc08f..6deb0afd 100644 --- a/include/osv/prio.hh +++ b/include/osv/prio.hh @@ -16,6 +16,7 @@ enum { cpus, fpranges, pt_root, + vma_range_set, linear_vma_set, mempool, routecache, -- 2.31.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220317012428.872553-1-jwkozaczuk%40gmail.com.