From: Nadav Har'El <n...@scylladb.com>
Committer: Nadav Har'El <n...@scylladb.com>
Branch: master

gcc 7: replacement for "new" for types of special alignment

OSv has various types which have special alignment requirements - e.g.,
some objects are aligned to cache-line size for performance, some objects
are aligned because of processor requirements (e.g., fpu state needs to
be 64-byte aligned) or ABI requirements (stacks need to be 16-byte aligned).

While C++11 correctly aligns these object on the stack, when created on
the heap with "new", it actually ignores the requested alignment!
The reason is that "new" calls "operator new()" which cannot specify the
desired alignment. We're lucky that in practice missing the alignment
didn't matter: In some cases the alignment was just supposed to be a
performance optimization that is not mandatory. In other cases like
the thread object, because it is large, it was already allocated using
entire pages and ended up getting a 4096 byte alignment, more than enough.

But Gcc starting in release 7, warns about using "new" on a type with a
non-standard alignment requirement. What we do in this patch is to create
a template function aligned_new<T>(...) which can be used instead of new,
and uses aligned_alloc() to implement the properly aligned allocation.

When we switch to C++17 in the future, its "new" fully supports any alignment
(and to do that, it has a version of "operator new" taking an alignment -
see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3396.htm)
so we will be able to revert this patch, and go back to using regular new.

Signed-off-by: Nadav Har'El <n...@scylladb.com>

---
diff --git a/arch/x64/smp.cc b/arch/x64/smp.cc
--- a/arch/x64/smp.cc
+++ b/arch/x64/smp.cc
@@ -21,6 +21,7 @@ extern "C" {
 #include <osv/barrier.hh>
 #include <osv/prio.hh>
 #include "osv/percpu.hh"
+#include <osv/aligned_new.hh>

 extern "C" { void smp_main(void); }

@@ -76,7 +77,7 @@ void smp_init()
     parse_madt();
     sched::current_cpu = sched::cpus[0];
     for (auto c : sched::cpus) {
- c->incoming_wakeups = new sched::cpu::incoming_wakeup_queue[sched::cpus.size()]; + c->incoming_wakeups = aligned_array_new<sched::cpu::incoming_wakeup_queue>(sched::cpus.size());
     }
     smpboot_cr0 = read_cr0();
     smpboot_cr4 = read_cr4();
diff --git a/bsd/sys/netinet/tcp_input.cc b/bsd/sys/netinet/tcp_input.cc
--- a/bsd/sys/netinet/tcp_input.cc
+++ b/bsd/sys/netinet/tcp_input.cc
@@ -90,6 +90,7 @@
 #include <machine/in_cksum.h>
 #include <osv/poll.h>
 #include <osv/net_trace.hh>
+#include <osv/aligned_new.hh>

TRACEPOINT(trace_tcp_input_ack, "%p: We've got ACK: %u", void*, unsigned int);

@@ -3226,7 +3227,7 @@ static ipv4_tcp_conn_id tcp_connection_id(tcpcb* tp)
 void
 tcp_setup_net_channel(tcpcb* tp, struct ifnet* intf)
 {
- auto nc = new net_channel([=] (mbuf *m) { tcp_net_channel_packet(tp, m); }); + auto nc = aligned_new<net_channel>([=] (mbuf *m) { tcp_net_channel_packet(tp, m); });
        tp->nc = nc;
        tp->nc_intf = intf;
        intf->add_net_channel(nc, tcp_connection_id(tp));
diff --git a/core/async.cc b/core/async.cc
--- a/core/async.cc
+++ b/core/async.cc
@@ -18,6 +18,7 @@
 #include <boost/intrusive/set.hpp>
 #include <boost/intrusive/parent_from_member.hpp>
 #include <osv/timer-set.hh>
+#include <osv/aligned_new.hh>

 namespace async {

@@ -247,7 +248,7 @@ class async_worker {
 static PERCPU(async_worker*, _percpu_worker);

 static sched::cpu::notifier _notifier([] () {
-    *_percpu_worker = new async_worker(sched::cpu::current());
+    *_percpu_worker = aligned_new<async_worker>(sched::cpu::current());
 });

 static inline async_worker& get_worker()
diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc
--- a/drivers/virtio-net.cc
+++ b/drivers/virtio-net.cc
@@ -861,7 +861,7 @@ hw_driver* net::probe(hw_device* dev)
             if (opt_maxnic && maxnic-- <= 0) {
                 return nullptr;
             } else {
-                return new net(*pci_dev);
+                return aligned_new<net>(*pci_dev);
             }
         }
     }
diff --git a/drivers/vmxnet3.cc b/drivers/vmxnet3.cc
--- a/drivers/vmxnet3.cc
+++ b/drivers/vmxnet3.cc
@@ -39,6 +39,7 @@

 #include <osv/sched.hh>
 #include <osv/trace.hh>
+#include <osv/aligned_new.hh>

 #include "drivers/clock.hh"
 #include "drivers/clockevent.hh"
@@ -428,7 +429,7 @@ hw_driver* vmxnet3::probe(hw_device* dev)
                 if (opt_maxnic && maxnic-- <= 0) {
                     return nullptr;
                 } else {
-                    return new vmxnet3(*pci_dev);
+                    return aligned_new<vmxnet3>(*pci_dev);
                 }
             }
         }
diff --git a/include/osv/aligned_new.hh b/include/osv/aligned_new.hh
--- a/include/osv/aligned_new.hh
+++ b/include/osv/aligned_new.hh
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#ifndef ALIGNED_NEW_HH
+#define ALIGNED_NEW_HH
+
+/**
+ * In C++11, "new T()" is not guaranteed to fulfill unusual alignment
+ * requirements that T might have. The problem is that there is no way to
+ * tell "operator new" (which "new T()" calls to allocate memory) to use
+ * a particular alignment. C++17 fixed both oversights, but we cannot use
+ * C++17 yet in OSv which only assumes C++11.
+ * So our workaround is a template aligned_new<T> which behaves like a
+ * "new" expression, but uses the C11 aligned_alloc() instead of C++'s
+ * operator new to allocate memory. We assume, and this is currently the
+ * case in OSv, that operator new and malloc() do the same thing anyway
+ * and it is legal to call "operator delete" on memory returned by
+ * aligned_alloc().
+ *
+ * In the future, when C++17 becomes common, we can switch to using ordinary
+ * new instead of aligned_new.
+ */
+
+template<typename T, typename... Args>
+T* aligned_new(Args&&... args) {
+    void *p = aligned_alloc(alignof(T), sizeof(T));
+    assert(p);
+    return new(p) T(std::forward<Args>(args)...);
+}
+
+// Similar function for allocating an array of objects. But here we have
+// a problem: While an object created with aligned_new<>() can be deleted by +// an ordinary "delete" (as explained above), here, an array allocated by an +// aligned_array_new() CANNOT be deleted by a "delete[]" expression! This is
+// because we do not know the internal layout of "new[]" of this compiler
+// to allow a delete[] to work out of the box (delete[] needs to be able
+// to figure out the number of individual objects in the array which it
+// needs to destruct). So we have a separate aligned_array_delete().
+template<typename T>
+T* aligned_array_new(size_t len) {
+    // Allocate one extra item in the beginning, for length.
+    static_assert(sizeof(T) > sizeof(size_t));
+    void *p = aligned_alloc(alignof(T), sizeof(T) * (len+1));
+    assert(p);
+    *(size_t *)p = len;
+    T* ret = (T*) (p + sizeof(T));
+    for (unsigned i = 0; i < len; i++) {
+        p += sizeof(T);
+        new(p) T();
+    }
+    return ret;
+}
+
+template<typename T>
+void aligned_array_delete(T* p) {
+    static_assert(sizeof(T) > sizeof(size_t));
+    size_t len = *(size_t*)p;
+    for (unsigned i = 0; i < len ; i++) {
+        ++p;
+        *p->~T();
+    }
+    free(p);
+}
+#endif /* ALIGNED_NEW_HH */
diff --git a/include/osv/percpu_xmit.hh b/include/osv/percpu_xmit.hh
--- a/include/osv/percpu_xmit.hh
+++ b/include/osv/percpu_xmit.hh
@@ -14,6 +14,7 @@

 #include <osv/clock.hh>
 #include <osv/migration-lock.hh>
+#include <osv/aligned_new.hh>

 #include <bsd/sys/sys/mbuf.h>

@@ -210,7 +211,7 @@ public:

         std::string worker_name_base(name + "-");
         for (auto c : sched::cpus) {
-            _cpuq.for_cpu(c)->reset(new cpu_queue_type);
+            _cpuq.for_cpu(c)->reset(aligned_new<cpu_queue_type>());
             _all_cpuqs.push_back(_cpuq.for_cpu(c)->get());

             _worker.for_cpu(c)->me =
diff --git a/include/osv/sched.hh b/include/osv/sched.hh
--- a/include/osv/sched.hh
+++ b/include/osv/sched.hh
@@ -406,7 +406,18 @@ public:
     // view of the application's page table (see issue #790).
     template <typename... Args>
     static thread* make(Args&&... args) {
-        return new thread(std::forward<Args>(args)...);
+        // As explained in <osv/aligned_new.hh>, in C++11 we cannot use
+        // new() on an object which has non-default alignment requirements.
+        // We cannot use the tool aligned_new<thread> from that header
+        // because of the private constructor, so need to repeat the trick.
+        // Note that avoiding new() is is not *really* important because
+        // sizeof(thread) very large (over 20 KB) and would get a 4096-byte
+        // alignment anyway, even if we allocated it with normal new.
+        void *p = aligned_alloc(alignof(thread), sizeof(thread));
+        if (!p) {
+            return nullptr;
+        }
+        return new(p) thread(std::forward<Args>(args)...);
     }
 private:
     explicit thread(std::function<void ()> func, attr attributes = attr(),

--
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to