This patch modifies all relevant places, where we statically know
that both interrupts and preemption should be enabled, to unconditionally
pre-fault the stack.

These include places in code before:
- WITH_LOCK(preemption_lock) block
- WITH_LOCK(rcu_read_lock) block
- sched::preempt_disable() call
- WITH_LOCK(irq_lock) block in one case

In general, these are the places that follow the assumption
that most of the time preemption and interrupts are enabled.
And the functions/method below are called directly or indirectly by
an application and there is no other kernel code in that application
call stack above that also disables interrupts or preemption.

One good example is the memory allocation code in mempool.cc
that disables preemption in quite few places. Many of those
use WITH_LOCK/DROP_LOCK(preempt_lock) combination that implies
they were intended to be called with preemption enabled otherwise
code in DROP_LOCK would not work. Also all of those end up calling
some form of thread::wait() method that asserts that both interrupts
and interrupts are enabled.

To validate the reasoning, we add relevant invariants before we pre-fault
the stack.

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 bsd/porting/uma_stub.cc     | 12 +++++++++
 bsd/sys/net/routecache.hh   |  6 +++++
 bsd/sys/netinet/arpcache.hh |  7 +++++
 core/async.cc               | 18 +++++++++++++
 core/condvar.cc             |  7 +++++
 core/elf.cc                 | 12 +++++++++
 core/mempool.cc             | 50 ++++++++++++++++++++++++++++++++++-
 core/rcu.cc                 |  6 +++++
 core/sched.cc               | 52 +++++++++++++++++++++++++++++++++++++
 fs/vfs/kern_descrip.cc      |  6 +++++
 include/osv/elf.hh          |  7 +++++
 include/osv/percpu_xmit.hh  | 13 ++++++++++
 include/osv/sched.hh        |  7 +++++
 13 files changed, 202 insertions(+), 1 deletion(-)

diff --git a/bsd/porting/uma_stub.cc b/bsd/porting/uma_stub.cc
index cf320b13..a9d689c3 100644
--- a/bsd/porting/uma_stub.cc
+++ b/bsd/porting/uma_stub.cc
@@ -34,6 +34,12 @@ void * uma_zalloc_arg(uma_zone_t zone, void *udata, int 
flags)
 {
     void * ptr;
 
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(preempt_lock) {
         ptr = (*zone->percpu_cache)->alloc();
     }
@@ -101,6 +107,12 @@ void uma_zfree_arg(uma_zone_t zone, void *item, void 
*udata)
         zone->uz_dtor(item, zone->uz_size, udata);
     }
 
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(preempt_lock) {
         if ((*zone->percpu_cache)->free(item)) {
             return;
diff --git a/bsd/sys/net/routecache.hh b/bsd/sys/net/routecache.hh
index bdcdf496..e2140f06 100644
--- a/bsd/sys/net/routecache.hh
+++ b/bsd/sys/net/routecache.hh
@@ -162,6 +162,12 @@ public:
         // route.cc).
         assert(fibnum == 0);
 
+#if CONF_lazy_stack_invariant
+        assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         WITH_LOCK(osv::rcu_read_lock) {
             auto *c = cache.read();
             auto entry = c->search(dst->sin_addr.s_addr);
diff --git a/bsd/sys/netinet/arpcache.hh b/bsd/sys/netinet/arpcache.hh
index 4556d40e..289a2f29 100644
--- a/bsd/sys/netinet/arpcache.hh
+++ b/bsd/sys/netinet/arpcache.hh
@@ -16,6 +16,7 @@
 #include <osv/rcu.hh>
 #include <osv/types.h>
 #include <osv/mutex.h>
+#include <osv/sched.hh>
 
 #include <boost/optional.hpp>
 #include <functional>
@@ -98,6 +99,12 @@ struct arp_cache {
 
     boost::optional<entry> lookup(const in_addr ip)
     {
+#if CONF_lazy_stack_invariant
+        assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         WITH_LOCK(osv::rcu_read_lock) {
             auto i = _entries.reader_find(ip, std::hash<in_addr>(), 
entry_compare());
             return boost::optional<entry>(!!i, *i);
diff --git a/core/async.cc b/core/async.cc
index 9f3d61d2..ada19fba 100644
--- a/core/async.cc
+++ b/core/async.cc
@@ -104,6 +104,12 @@ public:
 
     void insert(percpu_timer_task& task)
     {
+#if CONF_lazy_stack_invariant
+        assert(arch::irq_enabled() && sched::preemptable());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         WITH_LOCK(preempt_lock) {
             trace_async_timer_task_insert(this, &task);
 
@@ -125,6 +131,12 @@ public:
 
     percpu_timer_task& borrow_task()
     {
+#if CONF_lazy_stack_invariant
+        assert(arch::irq_enabled() && sched::preemptable());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         WITH_LOCK(preempt_lock) {
             auto task = released_timer_tasks.pop();
             if (task) {
@@ -142,6 +154,12 @@ public:
     {
         auto task = new one_shot_task(std::move(callback));
 
+#if CONF_lazy_stack_invariant
+        assert(arch::irq_enabled() && sched::preemptable());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         WITH_LOCK(preempt_lock) {
             if (_queue.empty()) {
                 _thread->wake();
diff --git a/core/condvar.cc b/core/condvar.cc
index 4d01e7b2..9806b3d7 100644
--- a/core/condvar.cc
+++ b/core/condvar.cc
@@ -35,6 +35,13 @@ int condvar::wait(mutex* user_mutex, sched::timer* tmr)
     _user_mutex = user_mutex;
     // This preempt_disable() is just an optimization, to avoid context
     // switch between the two unlocks.
+#if CONF_lazy_stack_invariant
+    assert(arch::irq_enabled());
+    assert(sched::preemptable());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     sched::preempt_disable();
     user_mutex->unlock();
     _m.unlock();
diff --git a/core/elf.cc b/core/elf.cc
index c96a45ec..b28247d1 100644
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -1654,6 +1654,12 @@ object *program::object_containing_addr(const void *addr)
 {
     object *ret = nullptr;
     module_delete_disable();
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(osv::rcu_read_lock) {
          const auto &modules = _modules_rcu.read()->objects;
          for (object *module : modules) {
@@ -1819,6 +1825,12 @@ void program::free_dtv(object* obj)
 program::modules_list program::modules_get() const
 {
     modules_list ret;
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(osv::rcu_read_lock) {
         auto modules = _modules_rcu.read();
         auto needed = modules->objects.size();
diff --git a/core/mempool.cc b/core/mempool.cc
index 74915b7c..df30c28e 100644
--- a/core/mempool.cc
+++ b/core/mempool.cc
@@ -216,6 +216,12 @@ TRACEPOINT(trace_pool_free_different_cpu, "this=%p, 
obj=%p, obj_cpu=%d", void*,
 void* pool::alloc()
 {
     void * ret = nullptr;
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(preempt_lock) {
 
         // We enable preemption because add_page() may take a Mutex.
@@ -255,8 +261,14 @@ void pool::add_page()
 {
     // FIXME: this function allocated a page and set it up but on rare cases
     // we may add this page to the free list of a different cpu, due to the
-    // enablment of preemption
+    // enablement of preemption
     void* page = untracked_alloc_page();
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(preempt_lock) {
         page_header* header = new (page) page_header;
         header->cpu_id = mempool_cpuid();
@@ -321,6 +333,12 @@ void pool::free(void* object)
 {
     trace_pool_free(this, object);
 
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(preempt_lock) {
 
         free_object* obj = static_cast<free_object*>(object);
@@ -1260,6 +1278,9 @@ public:
         while (!(pb = try_alloc_page_batch())) {
             WITH_LOCK(migration_lock) {
                 DROP_LOCK(preempt_lock) {
+#if CONF_lazy_stack_invariant
+                    assert(sched::preemptable());
+#endif
                     refill();
                 }
             }
@@ -1272,6 +1293,9 @@ public:
         while (!try_free_page_batch(pb)) {
             WITH_LOCK(migration_lock) {
                 DROP_LOCK(preempt_lock) {
+#if CONF_lazy_stack_invariant
+                    assert(sched::preemptable());
+#endif
                     unfill();
                 }
             }
@@ -1377,6 +1401,12 @@ void l1::fill_thread()
 
 void l1::refill()
 {
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     SCOPE_LOCK(preempt_lock);
     auto& pbuf = get_l1();
     if (pbuf.nr + page_batch::nr_pages < pbuf.max / 2) {
@@ -1398,6 +1428,12 @@ void l1::refill()
 
 void l1::unfill()
 {
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     SCOPE_LOCK(preempt_lock);
     auto& pbuf = get_l1();
     if (pbuf.nr > page_batch::nr_pages + pbuf.max / 2) {
@@ -1411,6 +1447,12 @@ void l1::unfill()
 
 void* l1::alloc_page_local()
 {
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     SCOPE_LOCK(preempt_lock);
     auto& pbuf = get_l1();
     if (pbuf.nr < pbuf.watermark_lo) {
@@ -1424,6 +1466,12 @@ void* l1::alloc_page_local()
 
 bool l1::free_page_local(void* v)
 {
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     SCOPE_LOCK(preempt_lock);
     auto& pbuf = get_l1();
     if (pbuf.nr > pbuf.watermark_hi) {
diff --git a/core/rcu.cc b/core/rcu.cc
index b27fc481..441a161e 100644
--- a/core/rcu.cc
+++ b/core/rcu.cc
@@ -196,6 +196,12 @@ using namespace rcu;
 
 void rcu_defer(std::function<void ()>&& func)
 {
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(preempt_lock) {
         auto p = &*percpu_callbacks;
         while (p->ncallbacks[p->buf] == p->callbacks[p->buf].size()) {
diff --git a/core/sched.cc b/core/sched.cc
index c721e156..1e109694 100644
--- a/core/sched.cc
+++ b/core/sched.cc
@@ -704,6 +704,12 @@ void thread::unpin()
     // to pin, unpin, or migrate the same thread, we need to run the actual
     // unpinning code on the same CPU as the target thread.
     if (this == current()) {
+#if CONF_lazy_stack_invariant
+        assert(arch::irq_enabled() && sched::preemptable());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         WITH_LOCK(preempt_lock) {
             if (_pinned) {
                 _pinned = false;
@@ -882,6 +888,12 @@ static thread_runtime::duration total_app_time_exited(0);
 
 thread_runtime::duration thread::thread_clock() {
     if (this == current()) {
+#if CONF_lazy_stack_invariant
+        assert(arch::irq_enabled() && sched::preemptable());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         WITH_LOCK (preempt_lock) {
             // Inside preempt_lock, we are running and the scheduler can't
             // intervene and change _total_cpu_time or _running_since
@@ -1158,6 +1170,13 @@ void thread::prepare_wait()
 {
     // After setting the thread's status to "waiting", we must not preempt it,
     // as it is no longer in "running" state and therefore will not return.
+#if CONF_lazy_stack_invariant
+    assert(arch::irq_enabled());
+    assert(sched::preemptable());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     preempt_disable();
     assert(_detached_state->st.load() == status::running);
     _detached_state->st.store(status::waiting);
@@ -1257,6 +1276,12 @@ void thread::wake_with_irq_disabled()
 void thread::wake_lock(mutex* mtx, wait_record* wr)
 {
     // must be called with mtx held
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(rcu_read_lock) {
         auto st = _detached_state.get();
         // We want to send_lock() to this thread, but we want to be sure we're 
the only
@@ -1283,6 +1308,12 @@ void thread::wake_lock(mutex* mtx, wait_record* wr)
 
 bool thread::unsafe_stop()
 {
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(rcu_read_lock) {
         auto st = _detached_state.get();
         auto expected = status::waiting;
@@ -1338,6 +1369,13 @@ void thread::complete()
     }
     // If this thread gets preempted after changing status it will never be
     // scheduled again to set terminating_thread. So must disable preemption.
+#if CONF_lazy_stack_invariant
+    assert(arch::irq_enabled());
+    assert(preemptable());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     preempt_disable();
     _detached_state->st.store(status::terminating);
     // We want to run destroy() here, but can't because it cause the stack 
we're
@@ -1462,6 +1500,13 @@ void thread::sleep_impl(timer &t)
 
 void thread_handle::wake()
 {
+#if CONF_lazy_stack_invariant
+    assert(arch::irq_enabled());
+    assert(sched::preemptable());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(rcu_read_lock) {
         thread::detached_state* ds = _t.read();
         if (ds) {
@@ -1589,6 +1634,13 @@ void 
timer_base::set_with_irq_disabled(osv::clock::uptime::time_point time)
 void timer_base::set(osv::clock::uptime::time_point time)
 {
     trace_timer_set(this, time.time_since_epoch().count());
+#if CONF_lazy_stack_invariant
+    assert(arch::irq_enabled());
+    assert(sched::preemptable());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     irq_save_lock_type irq_lock;
     WITH_LOCK(irq_lock) {
         _state = state::armed;
diff --git a/fs/vfs/kern_descrip.cc b/fs/vfs/kern_descrip.cc
index ae04e5eb..8e71f64f 100644
--- a/fs/vfs/kern_descrip.cc
+++ b/fs/vfs/kern_descrip.cc
@@ -145,6 +145,12 @@ int fget(int fd, struct file **out_fp)
     if (fd < 0 || fd >= FDMAX)
         return EBADF;
 
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     WITH_LOCK(rcu_read_lock) {
         fp = gfdt[fd].read();
         if (fp == nullptr) {
diff --git a/include/osv/elf.hh b/include/osv/elf.hh
index 31702bf8..15852b3e 100644
--- a/include/osv/elf.hh
+++ b/include/osv/elf.hh
@@ -16,6 +16,7 @@
 #include <unordered_set>
 #include <unordered_map>
 #include <osv/types.h>
+#include <osv/sched.hh>
 #include <atomic>
 
 #include "arch-elf.hh"
@@ -718,6 +719,12 @@ object::lookup(const char* symbol)
 
 object *program::tls_object(ulong module)
 {
+#if CONF_lazy_stack_invariant
+    assert(sched::preemptable() && arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     SCOPE_LOCK(osv::rcu_read_lock);
     return (*(get_program()->_module_index_list_rcu.read()))[module];
 }
diff --git a/include/osv/percpu_xmit.hh b/include/osv/percpu_xmit.hh
index 4b44bf6a..e5775659 100644
--- a/include/osv/percpu_xmit.hh
+++ b/include/osv/percpu_xmit.hh
@@ -470,6 +470,13 @@ lock:
     void push_cpu(void* cooky) {
         bool success = false;
 
+#if CONF_lazy_stack_invariant
+        assert(arch::irq_enabled());
+        assert(sched::preemptable());
+#endif
+#if CONF_lazy_stack
+        arch::ensure_next_stack_page();
+#endif
         sched::preempt_disable();
 
         cpu_queue_type* local_cpuq = _cpuq->get();
@@ -509,6 +516,12 @@ lock:
                 return;
             }
 
+#if CONF_lazy_stack_invariant
+            assert(arch::irq_enabled());
+#endif
+#if CONF_lazy_stack
+            arch::ensure_next_stack_page();
+#endif
             sched::preempt_disable();
 
             // Refresh: we could have been moved to a different CPU
diff --git a/include/osv/sched.hh b/include/osv/sched.hh
index 4b5c8ad5..1691bed8 100644
--- a/include/osv/sched.hh
+++ b/include/osv/sched.hh
@@ -1336,6 +1336,13 @@ template <class Action>
 inline
 void thread::wake_with(Action action)
 {
+#if CONF_lazy_stack_invariant
+    assert(arch::irq_enabled());
+    assert(preemptable());
+#endif
+#if CONF_lazy_stack
+    arch::ensure_next_stack_page();
+#endif
     return do_wake_with(action, (1 << unsigned(status::waiting)));
 }
 
-- 
2.34.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220831042433.140243-3-jwkozaczuk%40gmail.com.

Reply via email to