This patch annotates the relevant call sites with the invariant assert
expressions to validate assumptions that let us do "nothing" in all these
cases. We also reorganize some code in the scheduler to help differentiate
between cases when given function/method is called with interrupts or preemption
disabled or from kernel thread or by interrupt handler.

Following methods get added to scheduler code with names describing state of 
interrupts
or preemption or kernel caller:
- timer_base::set_with_irq_disabled(osv::clock::uptime::time_point time)
- timer_base::set_with_irq_disabled(std::chrono::duration<Rep, Period> duration)
- thread::wake_with_irq_disabled()
- thread::wake_with_irq_or_preemption_disabled(Action action)
- thread_handle::wake_from_kernel_or_with_irq_disabled()

In general: 
- we modify all interrupt handlers (those that are executed on interrupt stack)
  to call one of the 3 new wake_...() methods (mostly wake_with_irq_disabled())
  to indicate we do not need/should not pre-fault the stack; most of those are 
in
  device drivers code
- we modify all code executed on kernel threads that disables preemption or 
interrupts
  by adding relevant invariant - assert(!sched::thread::current()->is_app()); 
we do
  not need to pre-fault because the stack is populated
- we also modify the code whhich is indirectly called from kernel threads like
  classifier::post_packet() in net channels
- finally we also modify the scheduler code to use 
timer_bas::set_with_irq_disabled()
  mostly around preemption_timer to indicate that we should not pre-fault the 
stack
  downstream

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 arch/aarch64/exceptions.cc |  3 ++
 arch/aarch64/interrupt.cc  |  3 ++
 arch/x64/exceptions.cc     |  3 ++
 arch/x64/mmu.cc            |  2 +-
 arch/x64/msi.cc            |  2 +-
 core/async.cc              |  6 +++
 core/epoll.cc              |  2 +-
 core/mempool.cc            | 12 ++++--
 core/net_channel.cc        |  8 +++-
 core/rcu.cc                |  5 ++-
 core/sched.cc              | 79 ++++++++++++++++++++++++++++++++++----
 drivers/acpi.cc            |  2 +-
 drivers/ahci.cc            |  2 +-
 drivers/ahci.hh            |  2 +-
 drivers/cadence-uart.cc    |  2 +-
 drivers/isa-serial.cc      |  2 +-
 drivers/kbd.cc             |  2 +-
 drivers/mmio-isa-serial.cc |  2 +-
 drivers/pl011.cc           |  2 +-
 drivers/virtio-blk.cc      |  6 +--
 drivers/virtio-fs.cc       |  4 +-
 drivers/virtio-net.cc      |  6 +--
 drivers/virtio-rng.cc      |  2 +-
 drivers/virtio-scsi.cc     |  2 +-
 drivers/virtio-vring.hh    |  2 +-
 drivers/vmw-pvscsi.cc      |  2 +-
 drivers/xenconsole.cc      |  2 +-
 include/osv/net_channel.hh |  5 ++-
 include/osv/sched.hh       | 16 ++++++++
 include/osv/xen_intr.hh    |  2 +-
 libc/signal.cc             |  3 ++
 libc/timerfd.cc            |  3 ++
 32 files changed, 157 insertions(+), 39 deletions(-)

diff --git a/arch/aarch64/exceptions.cc b/arch/aarch64/exceptions.cc
index cadbb3a2..5c2c59ab 100644
--- a/arch/aarch64/exceptions.cc
+++ b/arch/aarch64/exceptions.cc
@@ -122,6 +122,9 @@ void interrupt_table::unregister_interrupt(interrupt 
*interrupt)
 
 bool interrupt_table::invoke_interrupt(unsigned int id)
 {
+#if CONF_lazy_stack_invariant
+    assert(!arch::irq_enabled());
+#endif
     WITH_LOCK(osv::rcu_read_lock) {
         assert(id < this->nr_irqs);
         interrupt_desc *desc = this->irq_desc[id].read();
diff --git a/arch/aarch64/interrupt.cc b/arch/aarch64/interrupt.cc
index b8337e23..e26e10ee 100644
--- a/arch/aarch64/interrupt.cc
+++ b/arch/aarch64/interrupt.cc
@@ -22,6 +22,9 @@ sgi_interrupt::~sgi_interrupt()
 
 void sgi_interrupt::send(sched::cpu* cpu)
 {
+#if CONF_lazy_stack_invariant
+    assert(!arch::irq_enabled() || !sched::preemptable());
+#endif
     gic::gic->send_sgi(gic::sgi_filter::SGI_TARGET_LIST,
                        cpu->arch.smp_idx, get_id());
 }
diff --git a/arch/x64/exceptions.cc b/arch/x64/exceptions.cc
index 7c9eaf51..fbf6be65 100644
--- a/arch/x64/exceptions.cc
+++ b/arch/x64/exceptions.cc
@@ -220,6 +220,9 @@ void 
interrupt_descriptor_table::unregister_interrupt(gsi_level_interrupt *inter
 
 void interrupt_descriptor_table::invoke_interrupt(unsigned vector)
 {
+#if CONF_lazy_stack_invariant
+    assert(!arch::irq_enabled());
+#endif
     WITH_LOCK(osv::rcu_read_lock) {
         unsigned i, nr_shared;
         bool handled = false;
diff --git a/arch/x64/mmu.cc b/arch/x64/mmu.cc
index 1af268c0..675410d0 100644
--- a/arch/x64/mmu.cc
+++ b/arch/x64/mmu.cc
@@ -64,7 +64,7 @@ std::atomic<int> tlb_flush_pendingconfirms;
 inter_processor_interrupt tlb_flush_ipi{IPI_TLB_FLUSH, [] {
         mmu::flush_tlb_local();
         if (tlb_flush_pendingconfirms.fetch_add(-1) == 1) {
-            tlb_flush_waiter.wake();
+            tlb_flush_waiter.wake_from_kernel_or_with_irq_disabled();
         }
 }};
 
diff --git a/arch/x64/msi.cc b/arch/x64/msi.cc
index 9a28e3a5..cf0c3dc5 100644
--- a/arch/x64/msi.cc
+++ b/arch/x64/msi.cc
@@ -126,7 +126,7 @@ static inline void set_affinity_and_wake(
         v->msix_unmask_entries();
     }
 
-    t->wake();
+    t->wake_with_irq_disabled();
 }
 
 bool interrupt_manager::easy_register(std::initializer_list<msix_binding> 
bindings)
diff --git a/core/async.cc b/core/async.cc
index a592ca27..9f3d61d2 100644
--- a/core/async.cc
+++ b/core/async.cc
@@ -160,6 +160,9 @@ public:
                 return _timer.expired() || !_queue.empty();
             });
 
+#if CONF_lazy_stack_invariant
+            assert(!sched::thread::current()->is_app());
+#endif
             WITH_LOCK(preempt_lock) {
                 _timer.cancel();
 
@@ -224,6 +227,9 @@ private:
         }
 
         auto& master = *task.master;
+#if CONF_lazy_stack_invariant
+        assert(!sched::thread::current()->is_app());
+#endif
         DROP_LOCK(preempt_lock) {
             master.fire(task);
         }
diff --git a/core/epoll.cc b/core/epoll.cc
index f22ff449..d76fb305 100644
--- a/core/epoll.cc
+++ b/core/epoll.cc
@@ -245,7 +245,7 @@ public:
         if (!_activity_ring.push(key)) {
             _activity_ring_overflow.store(true, std::memory_order_relaxed);
         }
-        _activity_ring_owner.wake();
+        _activity_ring_owner.wake_from_kernel_or_with_irq_disabled();
     }
 };
 
diff --git a/core/mempool.cc b/core/mempool.cc
index 5dab1093..74915b7c 100644
--- a/core/mempool.cc
+++ b/core/mempool.cc
@@ -154,6 +154,9 @@ void pool::collect_garbage()
 
 static void garbage_collector_fn()
 {
+#if CONF_lazy_stack_invariant
+    assert(!sched::thread::current()->is_app());
+#endif
     WITH_LOCK(preempt_lock) {
         pool::collect_garbage();
     }
@@ -1352,9 +1355,12 @@ void l1::fill_thread()
     auto& pbuf = get_l1();
     for (;;) {
         sched::thread::wait_until([&] {
-                WITH_LOCK(preempt_lock) {
-                    return pbuf.nr < pbuf.watermark_lo || pbuf.nr > 
pbuf.watermark_hi;
-                }
+#if CONF_lazy_stack_invariant
+            assert(!sched::thread::current()->is_app());
+#endif
+            WITH_LOCK(preempt_lock) {
+                return pbuf.nr < pbuf.watermark_lo || pbuf.nr > 
pbuf.watermark_hi;
+            }
         });
         if (pbuf.nr < pbuf.watermark_lo) {
             while (pbuf.nr + page_batch::nr_pages < pbuf.max / 2) {
diff --git a/core/net_channel.cc b/core/net_channel.cc
index 67ba99e6..c511e322 100644
--- a/core/net_channel.cc
+++ b/core/net_channel.cc
@@ -40,13 +40,16 @@ void net_channel::process_queue()
 
 void net_channel::wake_pollers()
 {
+#if CONF_lazy_stack_invariant
+    assert(!sched::thread::current()->is_app());
+#endif
     WITH_LOCK(osv::rcu_read_lock) {
         auto pl = _pollers.read();
         if (pl) {
             for (pollreq* pr : *pl) {
                 // net_channel is self synchronizing
                 pr->_awake.store(true, std::memory_order_relaxed);
-                pr->_poll_thread.wake();
+                pr->_poll_thread.wake_from_kernel_or_with_irq_disabled();
             }
         }
         // can't call epoll_wake from rcu, so copy the data
@@ -128,6 +131,9 @@ void classifier::remove(ipv4_tcp_conn_id id)
 
 bool classifier::post_packet(mbuf* m)
 {
+#if CONF_lazy_stack_invariant
+    assert(!sched::thread::current()->is_app());
+#endif
     WITH_LOCK(osv::rcu_read_lock) {
         if (auto nc = classify_ipv4_tcp(m)) {
             log_packet_in(m, NETISR_ETHER);
diff --git a/core/rcu.cc b/core/rcu.cc
index 2bc7a75f..b27fc481 100644
--- a/core/rcu.cc
+++ b/core/rcu.cc
@@ -118,6 +118,9 @@ void cpu_quiescent_state_thread::do_work()
 {
     while (true) {
         bool toclean = false;
+#if CONF_lazy_stack_invariant
+        assert(!sched::thread::current()->is_app());
+#endif
         WITH_LOCK(preempt_lock) {
             auto p = &*percpu_callbacks;
             if (p->ncallbacks[p->buf]) {
@@ -242,7 +245,7 @@ void rcu_flush()
             rcu_defer([&] { s.post(); });
             // rcu_defer() might not wake the cleanup thread until enough 
deferred
             // callbacks have accumulated, so wake it up now.
-            percpu_quiescent_state_thread->wake();
+            
percpu_quiescent_state_thread->wake_from_kernel_or_with_irq_disabled();
         }, sched::thread::attr().pin(c)));
         t->start();
         t->join();
diff --git a/core/sched.cc b/core/sched.cc
index a06a331f..c721e156 100644
--- a/core/sched.cc
+++ b/core/sched.cc
@@ -291,7 +291,7 @@ void cpu::reschedule_from_interrupt(bool called_from_yield,
                 preemption_timer.cancel();
                 auto delta = p->_runtime.time_until(t._runtime.get_local());
                 if (delta > 0) {
-                    preemption_timer.set(now + delta);
+                    preemption_timer.set_with_irq_disabled(now + delta);
                 }
 #ifdef __aarch64__
                 return switch_data;
@@ -352,11 +352,11 @@ void cpu::reschedule_from_interrupt(bool 
called_from_yield,
             auto& t = *runqueue.begin();
             auto delta = n->_runtime.time_until(t._runtime.get_local());
             if (delta > 0) {
-                preemption_timer.set(now + delta);
+                preemption_timer.set_with_irq_disabled(now + delta);
             }
         }
     } else {
-        preemption_timer.set(now + preempt_after);
+        preemption_timer.set_with_irq_disabled(now + preempt_after);
     }
 
     if (app_thread.load(std::memory_order_relaxed) != n->_app) { // don't 
write into a cache line if it can be avoided
@@ -444,6 +444,9 @@ void cpu::do_idle()
                 }
             }
         }
+#if CONF_lazy_stack_invariant
+        assert(!thread::current()->is_app());
+#endif
         std::unique_lock<irq_lock_type> guard(irq_lock);
         handle_incoming_wakeups();
         if (!runqueue.empty()) {
@@ -462,6 +465,9 @@ void cpu::idle()
     // The idle thread must not sleep, because the whole point is that the
     // scheduler can always find at least one runnable thread.
     // We set preempt_disable just to help us verify this.
+#if CONF_lazy_stack_invariant
+    assert(!thread::current()->is_app());
+#endif
     preempt_disable();
 
     if (id == 0) {
@@ -477,6 +483,9 @@ void cpu::idle()
 
 void cpu::handle_incoming_wakeups()
 {
+#if CONF_lazy_stack_invariant
+    assert(!arch::irq_enabled() || !thread::current()->is_app());
+#endif
     cpu_set queues_with_wakes{incoming_wakeups_mask.fetch_clear()};
     if (!queues_with_wakes) {
         return;
@@ -569,7 +578,7 @@ void thread::pin(cpu *target_cpu)
         t._detached_state->st.store(thread::status::waiting);
         // Note that wakeme is on the same CPU, and irq is disabled,
         // so it will not actually run until we stop running.
-        wakeme->wake_with([&] { do_wakeme = true; });
+        wakeme->wake_with_irq_or_preemption_disabled([&] { do_wakeme = true; 
});
 #ifdef __aarch64__
         reschedule_from_interrupt(source_cpu, false, thyst);
 #else
@@ -591,6 +600,9 @@ void thread::pin(thread *t, cpu *target_cpu)
     // helper thread to follow the target thread's CPU. We could have also
     // re-used an existing thread (e.g., the load balancer thread).
     thread_unique_ptr helper(thread::make_unique([&] {
+#if CONF_lazy_stack_invariant
+        assert(!thread::current()->is_app());
+#endif
         WITH_LOCK(irq_lock) {
             // This thread started on the same CPU as t, but by now t might
             // have moved. If that happened, we need to move too.
@@ -658,7 +670,7 @@ void thread::pin(thread *t, cpu *target_cpu)
                 // comment above).
                 if (t->_detached_state->st.load(std::memory_order_relaxed) == 
status::waking) {
                     t->_detached_state->st.store(status::waiting);
-                    t->wake();
+                    t->wake_with_irq_disabled();
                 }
                 break;
             case status::queued:
@@ -672,7 +684,7 @@ void thread::pin(thread *t, cpu *target_cpu)
                 t->remote_thread_local_var(current_cpu) = target_cpu;
                 // pretend the thread was waiting, so we can wake it
                 t->_detached_state->st.store(status::waiting);
-                t->wake();
+                t->wake_with_irq_disabled();
                 break;
             default:
                 // Thread is in an unexpected state (for example, already
@@ -702,6 +714,9 @@ void thread::unpin()
         return;
     }
     thread_unique_ptr helper(thread::make_unique([this] {
+#if CONF_lazy_stack_invariant
+        assert(!thread::current()->is_app());
+#endif
         WITH_LOCK(preempt_lock) {
             // helper thread started on the same CPU as "this", but by now
             // "this" might migrated. If that happened helper need to migrate.
@@ -741,6 +756,9 @@ void cpu::load_balance()
         if (min->load() >= (load() - 1)) {
             continue;
         }
+#if CONF_lazy_stack_invariant
+        assert(!thread::current()->is_app());
+#endif
         WITH_LOCK(irq_lock) {
             auto i = std::find_if(runqueue.rbegin(), runqueue.rend(),
                     [](thread& t) { return t._migration_lock_counter == 0; });
@@ -1175,7 +1193,7 @@ void thread::destroy()
             ds->st.store(status::terminated);
         } else {
             // The joiner won the race, and will wait. We need to wake it.
-            joiner->wake_with([&] { ds->st.store(status::terminated); });
+            joiner->wake_with_irq_or_preemption_disabled([&] { 
ds->st.store(status::terminated); });
         }
     }
 }
@@ -1200,6 +1218,9 @@ void thread::wake_impl(detached_state* st, unsigned 
allowed_initial_states_mask)
         unsigned c = cpu::current()->id;
         // we can now use st->t here, since the thread cannot terminate while
         // it's waking, but not afterwards, when it may be running
+#if CONF_lazy_stack_invariant
+        assert(!sched::preemptable());
+#endif
         irq_save_lock_type irq_lock;
         WITH_LOCK(irq_lock) {
             tcpu->incoming_wakeups[c].push_back(*st->t);
@@ -1223,6 +1244,16 @@ void thread::wake()
     }
 }
 
+void thread::wake_with_irq_disabled()
+{
+#if CONF_lazy_stack_invariant
+    assert(!arch::irq_enabled());
+#endif
+    WITH_LOCK(rcu_read_lock) {
+        wake_impl(_detached_state.get());
+    }
+}
+
 void thread::wake_lock(mutex* mtx, wait_record* wr)
 {
     // must be called with mtx held
@@ -1395,7 +1426,7 @@ void thread::set_cleanup(std::function<void ()> cleanup)
 
 void thread::timer_fired()
 {
-    wake();
+    wake_with_irq_disabled();
 }
 
 unsigned int thread::id() const
@@ -1439,6 +1470,19 @@ void thread_handle::wake()
     }
 }
 
+void thread_handle::wake_from_kernel_or_with_irq_disabled()
+{
+#if CONF_lazy_stack_invariant
+    assert(!sched::thread::current()->is_app() || !arch::irq_enabled());
+#endif
+    WITH_LOCK(rcu_read_lock) {
+        thread::detached_state* ds = _t.read();
+        if (ds) {
+            thread::wake_impl(ds);
+        }
+    }
+}
+
 timer_list::callback_dispatch::callback_dispatch()
 {
     clock_event->set_callback(this);
@@ -1526,6 +1570,22 @@ void timer_base::expire()
     _t.timer_fired();
 }
 
+void timer_base::set_with_irq_disabled(osv::clock::uptime::time_point time)
+{
+#if CONF_lazy_stack_invariant
+    assert(!arch::irq_enabled());
+#endif
+    trace_timer_set(this, time.time_since_epoch().count());
+    _state = state::armed;
+    _time = time;
+
+    auto& timers = cpu::current()->timers;
+    _t._active_timers.push_back(*this);
+    if (timers._list.insert(*this)) {
+        timers.rearm();
+    }
+};
+
 void timer_base::set(osv::clock::uptime::time_point time)
 {
     trace_timer_set(this, time.time_since_epoch().count());
@@ -1566,6 +1626,9 @@ void timer_base::reset(osv::clock::uptime::time_point 
time)
 
     auto& timers = cpu::current()->timers;
 
+#if CONF_lazy_stack_invariant
+    assert(!thread::current()->is_app() || !sched::preemptable());
+#endif
     irq_save_lock_type irq_lock;
     WITH_LOCK(irq_lock) {
         if (_state == state::armed) {
diff --git a/drivers/acpi.cc b/drivers/acpi.cc
index af4140a4..cfc3cf27 100644
--- a/drivers/acpi.cc
+++ b/drivers/acpi.cc
@@ -234,7 +234,7 @@ public:
         , _stopped(false)
         , _counter(0)
         , _thread(sched::thread::make([this] { process_interrupts(); }))
-        , _intr(gsi, [this] { _counter.fetch_add(1); _thread->wake(); })
+        , _intr(gsi, [this] { _counter.fetch_add(1); 
_thread->wake_with_irq_disabled(); })
     {
         _thread->start();
     }
diff --git a/drivers/ahci.cc b/drivers/ahci.cc
index 1b94124c..45388396 100644
--- a/drivers/ahci.cc
+++ b/drivers/ahci.cc
@@ -276,7 +276,7 @@ void port::req_done()
             _slot_free++;
 
             // Wakeup the thread waiting for a free slot
-            _cmd_send_waiter.wake();
+            _cmd_send_waiter.wake_from_kernel_or_with_irq_disabled();
         }
     }
 }
diff --git a/drivers/ahci.hh b/drivers/ahci.hh
index 087fd9dd..fde7abbc 100644
--- a/drivers/ahci.hh
+++ b/drivers/ahci.hh
@@ -242,7 +242,7 @@ public:
     void enable_irq();
     void wait_device_ready();
     void wait_ci_ready(u8 slot);
-    void wakeup() { _irq_thread->wake(); }
+    void wakeup() { _irq_thread->wake_with_irq_disabled(); }
     bool linkup() { return _linkup; }
 
     u32 port2hba(u32 port_reg)
diff --git a/drivers/cadence-uart.cc b/drivers/cadence-uart.cc
index 52fc84ff..00c17d0e 100644
--- a/drivers/cadence-uart.cc
+++ b/drivers/cadence-uart.cc
@@ -109,7 +109,7 @@ void Cadence_Console::irq_handler()
     // IRQ must be cleared after character is read from FIFO
     uart->cisr = cisr;
 
-    _thread->wake();
+    _thread->wake_with_irq_disabled();
 }
 
 void Cadence_Console::dev_start() {
diff --git a/drivers/isa-serial.cc b/drivers/isa-serial.cc
index 5e2b8bbf..96353033 100644
--- a/drivers/isa-serial.cc
+++ b/drivers/isa-serial.cc
@@ -23,7 +23,7 @@ void isa_serial_console::early_init()
 }
 
 void isa_serial_console::dev_start() {
-    _irq.reset(new gsi_edge_interrupt(4, [&] { _thread->wake(); }));
+    _irq.reset(new gsi_edge_interrupt(4, [&] { 
_thread->wake_with_irq_disabled(); }));
     enable_interrupt();
 }
 
diff --git a/drivers/kbd.cc b/drivers/kbd.cc
index f50eef8c..2e978c43 100644
--- a/drivers/kbd.cc
+++ b/drivers/kbd.cc
@@ -81,7 +81,7 @@ static uint32_t ctlmap[256] =
 };
 
 Keyboard::Keyboard(sched::thread* poll_thread)
-    : _irq(1, [=] { poll_thread->wake(); })
+    : _irq(1, [=] { poll_thread->wake_with_irq_disabled(); })
 {
   shiftcode[0x1D] = MOD_CTL;
   shiftcode[0x2A] = MOD_SHIFT;
diff --git a/drivers/mmio-isa-serial.cc b/drivers/mmio-isa-serial.cc
index 8bce1814..9633db0b 100644
--- a/drivers/mmio-isa-serial.cc
+++ b/drivers/mmio-isa-serial.cc
@@ -74,7 +74,7 @@ void mmio_isa_serial_console::clean_cmdline(char *cmdline)
 void mmio_isa_serial_console::dev_start() {
     _irq.reset(new spi_interrupt(gic::irq_type::IRQ_TYPE_EDGE, irqid,
                                  [&] { return true; },
-                                 [&] { _thread->wake(); }));
+                                 [&] { _thread->wake_with_irq_disabled(); }));
     enable_interrupt();
 }
 
diff --git a/drivers/pl011.cc b/drivers/pl011.cc
index daaf7d93..30c36a09 100644
--- a/drivers/pl011.cc
+++ b/drivers/pl011.cc
@@ -66,7 +66,7 @@ bool PL011_Console::ack_irq() {
 }
 
 void PL011_Console::irq_handler() {
-    _thread->wake();
+    _thread->wake_with_irq_disabled();
 }
 
 void PL011_Console::dev_start() {
diff --git a/drivers/virtio-blk.cc b/drivers/virtio-blk.cc
index 643ca275..b643c991 100644
--- a/drivers/virtio-blk.cc
+++ b/drivers/virtio-blk.cc
@@ -143,7 +143,7 @@ blk::blk(virtio_device& virtio_dev)
         return new pci_interrupt(
             pci_dev,
             [=] { return this->ack_irq(); },
-            [=] { t->wake(); });
+            [=] { t->wake_with_irq_disabled(); });
     };
 #endif
 
@@ -153,14 +153,14 @@ blk::blk(virtio_device& virtio_dev)
                 gic::irq_type::IRQ_TYPE_EDGE,
                 _dev.get_irq(),
                 [=] { return this->ack_irq(); },
-                [=] { t->wake(); });
+                [=] { t->wake_with_irq_disabled(); });
     };
 #else
 #if CONF_drivers_mmio
     int_factory.create_gsi_edge_interrupt = [this,t]() {
         return new gsi_edge_interrupt(
                 _dev.get_irq(),
-                [=] { if (this->ack_irq()) t->wake(); });
+                [=] { if (this->ack_irq()) t->wake_with_irq_disabled(); });
     };
 #endif
 #endif
diff --git a/drivers/virtio-fs.cc b/drivers/virtio-fs.cc
index e87d0ce1..bbbafc53 100644
--- a/drivers/virtio-fs.cc
+++ b/drivers/virtio-fs.cc
@@ -114,7 +114,7 @@ fs::fs(virtio_device& virtio_dev)
         return new pci_interrupt(
             pci_dev,
             [=] { return this->ack_irq(); },
-            [=] { t->wake(); });
+            [=] { t->wake_with_irq_disabled(); });
     };
 #endif
 
@@ -123,7 +123,7 @@ fs::fs(virtio_device& virtio_dev)
     int_factory.create_gsi_edge_interrupt = [this, t]() {
         return new gsi_edge_interrupt(
             _dev.get_irq(),
-            [=] { if (this->ack_irq()) t->wake(); });
+            [=] { if (this->ack_irq()) t->wake_with_irq_disabled(); });
     };
 #endif
 #endif
diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc
index d2ab7c76..6053ce81 100644
--- a/drivers/virtio-net.cc
+++ b/drivers/virtio-net.cc
@@ -316,7 +316,7 @@ net::net(virtio_device& dev)
         return new pci_interrupt(
             pci_dev,
             [=] { return this->ack_irq(); },
-            [=] { poll_task->wake(); });
+            [=] { poll_task->wake_with_irq_disabled(); });
     };
 #endif
 
@@ -326,14 +326,14 @@ net::net(virtio_device& dev)
             gic::irq_type::IRQ_TYPE_EDGE,
             _dev.get_irq(),
             [=] { return this->ack_irq(); },
-            [=] { poll_task->wake(); });
+            [=] { poll_task->wake_with_irq_disabled(); });
     };
 #else
 #if CONF_drivers_mmio
     int_factory.create_gsi_edge_interrupt = [this,poll_task]() {
         return new gsi_edge_interrupt(
             _dev.get_irq(),
-            [=] { if (this->ack_irq()) poll_task->wake(); });
+            [=] { if (this->ack_irq()) poll_task->wake_with_irq_disabled(); });
     };
 #endif
 #endif
diff --git a/drivers/virtio-rng.cc b/drivers/virtio-rng.cc
index c276af04..b12b648f 100644
--- a/drivers/virtio-rng.cc
+++ b/drivers/virtio-rng.cc
@@ -89,7 +89,7 @@ size_t rng::get_random_bytes(char* buf, size_t size)
 
 void rng::handle_irq()
 {
-    _thread->wake();
+    _thread->wake_with_irq_disabled();
 }
 
 bool rng::ack_irq()
diff --git a/drivers/virtio-scsi.cc b/drivers/virtio-scsi.cc
index a9617f07..45015b60 100644
--- a/drivers/virtio-scsi.cc
+++ b/drivers/virtio-scsi.cc
@@ -173,7 +173,7 @@ scsi::scsi(virtio_device& dev)
         return new pci_interrupt(
                 pci_dev,
                 [=] { return this->ack_irq(); },
-                [=] { t->wake(); });
+                [=] { t->wake_with_irq_disabled(); });
     };
     _dev.register_interrupt(int_factory);
 
diff --git a/drivers/virtio-vring.hh b/drivers/virtio-vring.hh
index 00ffa3dd..29921987 100644
--- a/drivers/virtio-vring.hh
+++ b/drivers/virtio-vring.hh
@@ -234,7 +234,7 @@ class virtio_driver;
 
         void wakeup_waiter()
         {
-            _waiter.wake();
+            _waiter.wake_from_kernel_or_with_irq_disabled();
         }
 
 
diff --git a/drivers/vmw-pvscsi.cc b/drivers/vmw-pvscsi.cc
index 6150f2f8..1ef0839c 100644
--- a/drivers/vmw-pvscsi.cc
+++ b/drivers/vmw-pvscsi.cc
@@ -277,7 +277,7 @@ void pvscsi::req_done()
             barrier();
             s->cmp_cons_idx++;
 
-            _waiter.wake();
+            _waiter.wake_from_kernel_or_with_irq_disabled();
         }
     }
 }
diff --git a/drivers/xenconsole.cc b/drivers/xenconsole.cc
index f180cd67..63105ffe 100644
--- a/drivers/xenconsole.cc
+++ b/drivers/xenconsole.cc
@@ -29,7 +29,7 @@ XEN_Console::XEN_Console()
 
 void XEN_Console::handle_intr()
 {
-    _thread->wake();
+    _thread->wake_with_irq_disabled();
 }
 
 void XEN_Console::write(const char *str, size_t len) {
diff --git a/include/osv/net_channel.hh b/include/osv/net_channel.hh
index 11cc09cb..ea91d3d9 100644
--- a/include/osv/net_channel.hh
+++ b/include/osv/net_channel.hh
@@ -46,7 +46,10 @@ public:
     bool push(mbuf* m) { return _queue.push(m); }
     // consumer: wake the consumer (best used after multiple push()s)
     void wake() {
-        _waiting_thread.wake();
+#if CONF_lazy_stack_invariant
+        assert(!sched::thread::current()->is_app());
+#endif
+        _waiting_thread.wake_from_kernel_or_with_irq_disabled();
         if (_pollers || !_epollers.empty()) {
             wake_pollers();
         }
diff --git a/include/osv/sched.hh b/include/osv/sched.hh
index 2976b956..4b5c8ad5 100644
--- a/include/osv/sched.hh
+++ b/include/osv/sched.hh
@@ -186,6 +186,7 @@ public:
     explicit timer_base(client& t);
     ~timer_base();
     void set(osv::clock::uptime::time_point time);
+    void set_with_irq_disabled(osv::clock::uptime::time_point time);
     void reset(osv::clock::uptime::time_point time);
     // Set a timer using absolute wall-clock time.
     // CAVEAT EMPTOR: Internally timers are kept using the monotonic (uptime)
@@ -203,6 +204,10 @@ public:
     void set(std::chrono::duration<Rep, Period> duration) {
         set(osv::clock::uptime::now() + duration);
     }
+    template <class Rep, class Period>
+    void set_with_irq_disabled(std::chrono::duration<Rep, Period> duration) {
+        set_with_irq_disabled(osv::clock::uptime::now() + duration);
+    }
     osv::clock::uptime::time_point get_timeout() {
         return _time;
     }
@@ -468,6 +473,7 @@ public:
     static void wait_for(mutex& mtx, waitable&&... waitables);
 
     void wake();
+    void wake_with_irq_disabled();
     cpu* get_cpu() const {
         return _detached_state.get()->_cpu;
     }
@@ -481,6 +487,8 @@ public:
     bool interrupted();
     void interrupted(bool f);
     template <class Action>
+    inline void wake_with_irq_or_preemption_disabled(Action action);
+    template <class Action>
     inline void wake_with(Action action);
     // for mutex internal use
     template <class Action>
@@ -817,6 +825,7 @@ public:
     }
     void reset(thread& t) { _t.assign(t._detached_state.get()); }
     void wake();
+    void wake_from_kernel_or_with_irq_disabled();
     void clear() { _t.assign(nullptr); }
     operator bool() const { return _t; }
     bool operator==(const thread_handle& x) const {
@@ -1316,6 +1325,13 @@ void thread::sleep(std::chrono::duration<Rep, Period> 
duration)
     sleep_impl(t);
 }
 
+template <class Action>
+inline
+void thread::wake_with_irq_or_preemption_disabled(Action action)
+{
+    return do_wake_with(action, (1 << unsigned(status::waiting)));
+}
+
 template <class Action>
 inline
 void thread::wake_with(Action action)
diff --git a/include/osv/xen_intr.hh b/include/osv/xen_intr.hh
index 1c8c6ca7..0c7eb3de 100644
--- a/include/osv/xen_intr.hh
+++ b/include/osv/xen_intr.hh
@@ -15,7 +15,7 @@ namespace xen {
 class xen_irq {
 public:
     explicit xen_irq(interrupt *intr);
-    void wake() { (*_thread)->wake(); }
+    void wake() { (*_thread)->wake_with_irq_disabled(); }
     static void register_irq(int vector, driver_intr_t handler, void *arg);
     static void unregister_irq(int vector);
 private:
diff --git a/libc/signal.cc b/libc/signal.cc
index b530a5b8..95f6d66e 100644
--- a/libc/signal.cc
+++ b/libc/signal.cc
@@ -502,6 +502,9 @@ void itimer::work()
                         _owner_thread->interrupted(true);
                     }
                 } else {
+#if CONF_lazy_stack_invariant
+                    assert(!sched::thread::current()->is_app());
+#endif
                     tmr.cancel();
                 }
             } else {
diff --git a/libc/timerfd.cc b/libc/timerfd.cc
index a5118a0e..dffa492d 100644
--- a/libc/timerfd.cc
+++ b/libc/timerfd.cc
@@ -116,6 +116,9 @@ void timerfd::wakeup_thread_func()
                     _blocked_reader.wake_one();
                     poll_wake(this, POLLIN);
                 } else {
+#if CONF_lazy_stack_invariant
+                    assert(!sched::thread::current()->is_app());
+#endif
                     tmr.cancel();
                 }
             } else {
-- 
2.34.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220831042433.140243-2-jwkozaczuk%40gmail.com.

Reply via email to