Stefan Lambrev wrote:

I'll use again hwpmc and LOCK_PROFILING to see what's going on.
And will try the same benchmark on quad core processor as now numbers of cores/cpus matter :)

Here are promised results - http://89.186.204.158/lock_profiling-8.txt

Thanks. There is further work needed on the route locking, and also you are hitting limitations of the em driver (or possibly hardware; if you only have a single transmit queue then outbound packets from multiple CPUs have to be serialized in the driver no matter what). Hopefully there will be further improvements in the coming months, and these changes will also migrate into CVS.

If you want to start hacking things to see how much further progress is feasible, you can apply the attached hack that nulls out all route locking :) This should be OK as long as your routes are not changing, although you might get some spam on the console (if this is excessive, comment out the printfs also ;-). It may not help much though, all the contention will probably just fall through onto the ethernet driver.

Btw I got kernel panic first time when I run sysctl debug.lock.prof.stats

Yeah, it is a bit broken in 8.0 even in CVS. Also make sure not to reset it while the CPUs are loaded :)

I'm still trying to get hwpmc working with my cpu's and new kernel.
Do you have any patches Kris?
Is it supposed to work with your sources on my CPU?
I can fetch your latest src/lib/libpmc from from p4 if this will help :)

It works on my systems...try with libpmc from my branch, make sure to install the new includes first and then rebuild and reinstall libpmc and pmcstat. I have attached a patch against the CVS libpmc which might be easier than checking it out from p4...it relies on kernel changes also though, which are in the kernel you already have but not in CVS.

Kris
--- //depot/vendor/freebsd/src/lib/libpmc/libpmc.c      2007/12/07 14:42:05
+++ //depot/user/kris/contention/lib/libpmc/libpmc.c    2007/12/28 20:32:24
@@ -46,16 +46,14 @@
 #if defined(__i386__)
 static int k7_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
+static int p5_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
 #endif
 #if defined(__amd64__) || defined(__i386__)
 static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
-#endif
-#if defined(__i386__)
-static int p5_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
-    struct pmc_op_pmcallocate *_pmc_config);
 static int p6_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 #endif
@@ -1282,26 +1280,6 @@
        return (0);
 }
 
-#endif
-
-#if defined(__i386__)
-
-/*
- * Pentium style PMCs
- */
-
-static struct pmc_event_alias p5_aliases[] = {
-       EV_ALIAS("cycles", "tsc"),
-       EV_ALIAS(NULL, NULL)
-};
-
-static int
-p5_allocate_pmc(enum pmc_event pe, char *ctrspec,
-    struct pmc_op_pmcallocate *pmc_config)
-{
-       return (-1 || pe || ctrspec || pmc_config); /* shut up gcc */
-}
-
 /*
  * Pentium Pro style PMCs.  These PMCs are found in Pentium II, Pentium III,
  * and Pentium M CPUs.
@@ -1629,9 +1607,30 @@
        return (0);
 }
 
+
 #endif
 
+#if defined(__i386__)
+
 /*
+ * Pentium style PMCs
+ */
+
+static struct pmc_event_alias p5_aliases[] = {
+       EV_ALIAS("cycles", "tsc"),
+       EV_ALIAS(NULL, NULL)
+};
+
+static int
+p5_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+       return -1 || pe || ctrspec || pmc_config; /* shut up gcc */
+}
+
+#endif
+
+/*
  * API entry points
  */
 
@@ -1940,6 +1939,8 @@
                pmc_mdep_event_aliases = p5_aliases;
                pmc_mdep_allocate_pmc = p5_allocate_pmc;
                break;
+#endif
+#if defined(__amd64__) || defined(__i386__)
        case PMC_CPU_INTEL_P6:          /* P6 ... Pentium M CPUs have */
        case PMC_CPU_INTEL_PII:         /* similar PMCs. */
        case PMC_CPU_INTEL_PIII:
@@ -1947,8 +1948,6 @@
                pmc_mdep_event_aliases = p6_aliases;
                pmc_mdep_allocate_pmc = p6_allocate_pmc;
                break;
-#endif
-#if defined(__amd64__) || defined(__i386__)
        case PMC_CPU_INTEL_PIV:
                pmc_mdep_event_aliases = p4_aliases;
                pmc_mdep_allocate_pmc = p4_allocate_pmc;
==== //depot/user/kris/net/net/route.c#2 - /zoo/kris/net/net/route.c ====
@@ -1153,7 +1153,6 @@
        struct radix_node_head *rnh = rt_tables[dst->sa_family];
        int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
 
-again:
        RT_LOCK_ASSERT(rt);
 
        /*
@@ -1187,15 +1186,6 @@
                        return (EADDRINUSE); /* failure */
                }
                /*
-                * Try to reacquire the lock on rt, and if it fails,
-                * clean state and restart from scratch.
-                */
-               if (!RT_TRYLOCK(rt)) {
-                       RTFREE_LOCKED(gwrt);
-                       RT_LOCK(rt);
-                       goto again;
-               }
-               /*
                 * If there is already a gwroute, then drop it. If we
                 * are asked to replace route with itself, then do
                 * not leak its refcounter.
==== //depot/user/kris/net/net/route.h#2 - /zoo/kris/net/net/route.h ====
@@ -288,6 +288,7 @@
 
 #define        RT_LOCK_INIT(_rt) \
        rw_init_flags(&(_rt)->rt_lock, "rtentry", RW_DUPOK)
+#if 0
 #define       RT_LOCK(_rt)            rw_wlock(&(_rt)->rt_lock)
 #define       RT_TRYLOCK(_rt)          rw_try_wlock(&(_rt)->rt_lock)
 #define       RT_UNLOCK(_rt)          rw_wunlock(&(_rt)->rt_lock)
@@ -297,6 +298,16 @@
 #define       RT_LOCK_DESTROY(_rt)    rw_destroy(&(_rt)->rt_lock)
 #define       RT_LOCK_ASSERT(_rt)     rw_assert(&(_rt)->rt_lock, RA_LOCKED)
 #define       RT_UNLOCK_ASSERT(_rt)   rw_assert(&(_rt)->rt_lock, RA_UNLOCKED)
+#endif
+#define       RT_LOCK(_rt)  
+#define       RT_TRYLOCK(_rt)
+#define       RT_UNLOCK(_rt)          
+#define       RT_LOCK_SHARED(_rt)     
+#define       RT_UNLOCK_SHARED(_rt)   
+#define       RT_LOCK_DOWNGRADE(_rt)  
+#define       RT_LOCK_DESTROY(_rt)   
+#define       RT_LOCK_ASSERT(_rt)   
+#define       RT_UNLOCK_ASSERT(_rt)   
 
 #define        RT_ADDREF(_rt)  do {                                    \
        RT_LOCK_ASSERT(_rt);                                    \
_______________________________________________
freebsd-performance@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-performance
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to