Module Name:    src
Committed By:   mjf
Date:           Tue Jun  1 22:13:30 UTC 2010

Modified Files:
        src/sys/arch/amd64/conf: kern.ldscript kern.ldscript.2MB
            kern.ldscript.Xen
        src/sys/arch/i386/conf: kern.ldscript
        src/sys/arch/sparc64/conf: kern.ldscript kern32.ldscript
        src/sys/modules/xldscripts: kmodule
        src/sys/net: if_gre.h
        src/sys/sys: cdefs_elf.h

Log Message:
Add __cacheline_aligned and __read_mostly annotations.

These annotations help to mitigate false sharing on multiprocessor
systems.

Variables annotated with __cacheline_aligned are placed into the
.data.cacheline_aligned section in the kernel. Each item in this
section is aligned on a cachline boundary - this avoids false
sharing. Highly contended global locks are a good candidate for
__cacheline_aligned annotation.

Variables annotated with __read_mostly are packed together tightly
into a .data.read_mostly section in the kernel. The idea here is that
we can pack infrequently modified data items into a cacheline and
avoid having to purge the cache, which would happen if read mostly
data and write mostly data shared a cachline. Initialisation variables
are a prime candiate for __read_mostly annotations.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/arch/amd64/conf/kern.ldscript
cvs rdiff -u -r1.2 -r1.3 src/sys/arch/amd64/conf/kern.ldscript.2MB \
    src/sys/arch/amd64/conf/kern.ldscript.Xen
cvs rdiff -u -r1.7 -r1.8 src/sys/arch/i386/conf/kern.ldscript
cvs rdiff -u -r1.10 -r1.11 src/sys/arch/sparc64/conf/kern.ldscript
cvs rdiff -u -r1.9 -r1.10 src/sys/arch/sparc64/conf/kern32.ldscript
cvs rdiff -u -r1.1 -r1.2 src/sys/modules/xldscripts/kmodule
cvs rdiff -u -r1.39 -r1.40 src/sys/net/if_gre.h
cvs rdiff -u -r1.30 -r1.31 src/sys/sys/cdefs_elf.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/conf/kern.ldscript
diff -u src/sys/arch/amd64/conf/kern.ldscript:1.4 src/sys/arch/amd64/conf/kern.ldscript:1.5
--- src/sys/arch/amd64/conf/kern.ldscript:1.4	Thu Oct 18 15:28:33 2007
+++ src/sys/arch/amd64/conf/kern.ldscript	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern.ldscript,v 1.4 2007/10/18 15:28:33 yamt Exp $	*/
+/*	$NetBSD: kern.ldscript,v 1.5 2010/06/01 22:13:30 mjf Exp $	*/
 
 OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
 	      "elf64-x86-64")
@@ -32,8 +32,22 @@
    AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
    {
      *(.data)
-     *(.data.*)
    }
+
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.cacheline_aligned :
+   AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
+   {
+     *(.data.cacheline_aligned)
+   }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.read_mostly :
+   AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
+   {
+     *(.data.read_mostly)
+   }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+
    _edata = . ;
    PROVIDE (edata = .) ;
    __bss_start = . ;

Index: src/sys/arch/amd64/conf/kern.ldscript.2MB
diff -u src/sys/arch/amd64/conf/kern.ldscript.2MB:1.2 src/sys/arch/amd64/conf/kern.ldscript.2MB:1.3
--- src/sys/arch/amd64/conf/kern.ldscript.2MB:1.2	Thu Oct 18 15:28:33 2007
+++ src/sys/arch/amd64/conf/kern.ldscript.2MB	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern.ldscript.2MB,v 1.2 2007/10/18 15:28:33 yamt Exp $	*/
+/*	$NetBSD: kern.ldscript.2MB,v 1.3 2010/06/01 22:13:30 mjf Exp $	*/
 
 OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
 	      "elf64-x86-64")
@@ -33,8 +33,20 @@
    AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
    {
      *(.data)
-     *(.data.*)
    }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.cacheline_aligned :
+   AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
+   {
+     *(.data.cacheline_aligned)
+   }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.read_mostly :
+   AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
+   {
+     *(.data.read_mostly)
+   }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
    _edata = . ;
    PROVIDE (edata = .) ;
    __bss_start = . ;
Index: src/sys/arch/amd64/conf/kern.ldscript.Xen
diff -u src/sys/arch/amd64/conf/kern.ldscript.Xen:1.2 src/sys/arch/amd64/conf/kern.ldscript.Xen:1.3
--- src/sys/arch/amd64/conf/kern.ldscript.Xen:1.2	Thu Nov 22 16:16:44 2007
+++ src/sys/arch/amd64/conf/kern.ldscript.Xen	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern.ldscript.Xen,v 1.2 2007/11/22 16:16:44 bouyer Exp $	*/
+/*	$NetBSD: kern.ldscript.Xen,v 1.3 2010/06/01 22:13:30 mjf Exp $	*/
 
 OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
 	      "elf64-x86-64")
@@ -26,8 +26,20 @@
    AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
    {
      *(.data)
-     *(.data.*)
    }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.cacheline_aligned :
+   AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
+   {
+     *(.data.cacheline_aligned)
+   }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.read_mostly :
+   AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
+   {
+     *(.data.read_mostly)
+   }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
    _edata = . ;
    PROVIDE (edata = .) ;
    __bss_start = . ;

Index: src/sys/arch/i386/conf/kern.ldscript
diff -u src/sys/arch/i386/conf/kern.ldscript:1.7 src/sys/arch/i386/conf/kern.ldscript:1.8
--- src/sys/arch/i386/conf/kern.ldscript:1.7	Thu Oct 18 15:28:34 2007
+++ src/sys/arch/i386/conf/kern.ldscript	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern.ldscript,v 1.7 2007/10/18 15:28:34 yamt Exp $	*/
+/*	$NetBSD: kern.ldscript,v 1.8 2010/06/01 22:13:30 mjf Exp $	*/
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386",
 	      "elf32-i386")
@@ -32,8 +32,21 @@
    AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
    {
      *(.data)
-     *(.data.*)
    }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.cacheline_aligned :
+   AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
+   {
+     *(.data.cacheline_aligned)
+   }
+
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.read_mostly :
+   AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
+   {
+     *(.data.read_mostly)
+   }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
    _edata = . ;
    PROVIDE (edata = .) ;
    __bss_start = . ;

Index: src/sys/arch/sparc64/conf/kern.ldscript
diff -u src/sys/arch/sparc64/conf/kern.ldscript:1.10 src/sys/arch/sparc64/conf/kern.ldscript:1.11
--- src/sys/arch/sparc64/conf/kern.ldscript:1.10	Wed Oct 17 19:57:28 2007
+++ src/sys/arch/sparc64/conf/kern.ldscript	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern.ldscript,v 1.10 2007/10/17 19:57:28 garbled Exp $	*/
+/*	$NetBSD: kern.ldscript,v 1.11 2010/06/01 22:13:30 mjf Exp $	*/
 
 /*
  * Kernel linker script for NetBSD/sparc64.  This script is based on
@@ -74,6 +74,11 @@
     CONSTRUCTORS
   }
   .data1   : { *(.data1) }
+  . = ALIGN(64);	/* COHERENCY_UNIT */
+  .data.cacheline_aligned	: { *(.data.cacheline_aligned) }
+  . = ALIGN(64);	/* COHERENCY_UNIT */
+  .data.read_mostly		: { *(.data.read_mostly) }
+  . = ALIGN(64);	/* COHERENCY_UNIT */
   .ctors         :
   {
     *(.ctors)

Index: src/sys/arch/sparc64/conf/kern32.ldscript
diff -u src/sys/arch/sparc64/conf/kern32.ldscript:1.9 src/sys/arch/sparc64/conf/kern32.ldscript:1.10
--- src/sys/arch/sparc64/conf/kern32.ldscript:1.9	Wed Oct 17 19:57:28 2007
+++ src/sys/arch/sparc64/conf/kern32.ldscript	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern32.ldscript,v 1.9 2007/10/17 19:57:28 garbled Exp $	*/
+/*	$NetBSD: kern32.ldscript,v 1.10 2010/06/01 22:13:30 mjf Exp $	*/
 
 /*
  * Kernel linker script for NetBSD/sparc.  This script is based on
@@ -75,6 +75,11 @@
     CONSTRUCTORS
   }
   .data1   : { *(.data1) }
+  . = ALIGN(64);	/* COHERENCY_UNIT */
+  .data.cacheline_aligned : { *(.data_cacheline_aligned) }
+  . = ALIGN(64);	/* COHERENCY_UNIT */
+  .data.read_mostly : { *(.data.read_mostly) }
+  . = ALIGN(64);	/* COHERENCY_UNIT */
   .ctors         :
   {
     *(.ctors)

Index: src/sys/modules/xldscripts/kmodule
diff -u src/sys/modules/xldscripts/kmodule:1.1 src/sys/modules/xldscripts/kmodule:1.2
--- src/sys/modules/xldscripts/kmodule:1.1	Tue Nov 10 14:47:52 2009
+++ src/sys/modules/xldscripts/kmodule	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: kmodule,v 1.1 2009/11/10 14:47:52 skrll Exp $	*/
+/*	$NetBSD: kmodule,v 1.2 2010/06/01 22:13:30 mjf Exp $	*/
 
 SECTIONS
 {
@@ -6,11 +6,15 @@
    .data 0 :
    {
      *(.data)
-     *(.data.*)
      *(.bss)
      *(.bss.*)
      *(COMMON)
    }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.cacheline_aligned	: { *(.data.cacheline_aligned) }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
+   .data.read_mostly		: { *(.data.read_mostly) }
+   . = ALIGN(64);	/* COHERENCY_UNIT */
    /* Pre-loaded modules do not need the following. */
    /DISCARD/ :
    {

Index: src/sys/net/if_gre.h
diff -u src/sys/net/if_gre.h:1.39 src/sys/net/if_gre.h:1.40
--- src/sys/net/if_gre.h:1.39	Mon Sep  8 23:36:55 2008
+++ src/sys/net/if_gre.h	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_gre.h,v 1.39 2008/09/08 23:36:55 gmcgarry Exp $ */
+/*	$NetBSD: if_gre.h,v 1.40 2010/06/01 22:13:30 mjf Exp $ */
 
 /*
  * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
@@ -63,12 +63,10 @@
 	, GRE_S_DIE
 };
 
-#define	__cacheline_aligned	__aligned(CACHE_LINE_SIZE)
-
 struct gre_bufq {
 	volatile int	bq_prodidx;
 	volatile int	bq_considx;
-	size_t		bq_len __cacheline_aligned;
+	size_t		bq_len __aligned(CACHE_LINE_SIZE);
 	size_t		bq_lenmask;
 	volatile int	bq_drops;
 	struct mbuf	**bq_buf;

Index: src/sys/sys/cdefs_elf.h
diff -u src/sys/sys/cdefs_elf.h:1.30 src/sys/sys/cdefs_elf.h:1.31
--- src/sys/sys/cdefs_elf.h:1.30	Mon Jul 21 15:22:19 2008
+++ src/sys/sys/cdefs_elf.h	Tue Jun  1 22:13:30 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: cdefs_elf.h,v 1.30 2008/07/21 15:22:19 lukem Exp $	*/
+/*	$NetBSD: cdefs_elf.h,v 1.31 2010/06/01 22:13:30 mjf Exp $	*/
 
 /*
  * Copyright (c) 1995, 1996 Carnegie-Mellon University.
@@ -30,6 +30,10 @@
 #ifndef _SYS_CDEFS_ELF_H_
 #define	_SYS_CDEFS_ELF_H_
 
+#ifdef _KERNEL_OPT
+#include "opt_multiprocessor.h"
+#endif
+
 #ifdef __LEADING_UNDERSCORE
 #define	_C_LABEL(x)	__CONCAT(_,x)
 #define _C_LABEL_STRING(x)	"_"x
@@ -156,4 +160,38 @@
 #define	__link_set_count(set)						\
 	(__link_set_end(set) - __link_set_start(set))
 
+/*
+ * On multiprocessor systems we can gain an improvement in performance
+ * by being mindful of which cachelines data is placed in.
+ *
+ * __read_mostly:
+ *
+ *	It makes sense to ensure that rarely modified data is not
+ *	placed in the same cacheline as frequently modified data.
+ *	To mitigate the phenomenon known as "false-sharing" we
+ *	can annotate rarely modified variables with __read_mostly.
+ *	All such variables are placed into the .data.read_mostly
+ *	section in the kernel ELF.
+ *
+ *	Prime candidates for __read_mostly annotation are variables
+ *	which are hardly ever modified and which are used in code
+ *	hot-paths, e.g. pmap_initialized.
+ *
+ * __cacheline_aligned:
+ *
+ *	Some data structures (mainly locks) benefit from being aligned
+ *	on a cacheline boundary, and having a cacheline to themselves.
+ *	This way, the modification of other data items cannot adversely
+ *	affect the lock and vice versa.
+ *
+ *	Any variables annotated with __cacheline_aligned will be
+ *	placed into the .data.cacheline_aligned ELF section.
+ */
+#define	__read_mostly						\
+    __attribute__((__section__(".data.read_mostly")))
+
+#define	__cacheline_aligned					\
+    __attribute__((__aligned__(COHERENCY_UNIT)			\
+		 __section__(".data.cacheline_aligned")))
+
 #endif /* !_SYS_CDEFS_ELF_H_ */

Reply via email to