On Tue, Aug 23, 2016 at 06:13:43PM -0700, Jason Low wrote:

> I tested this patch on an 8 socket system with the high_systime AIM7
> workload with diskfs. The patch provided big performance improvements in
> terms of throughput in the highly contended cases.
> 
> -------------------------------------------------
> |  users      | avg throughput | avg throughput |
>               | without patch  | with patch     |
> -------------------------------------------------
> | 10 - 90     |   13,943 JPM   |   14,432 JPM   |
> -------------------------------------------------
> | 100 - 900   |   75,475 JPM   |  102,922 JPM   |
> -------------------------------------------------
> | 1000 - 1900 |   77,299 JPM   |  115,271 JPM   |
> -------------------------------------------------
> 
> Unfortunately, at 2000 users, the modified kernel locked up.
> 
> # INFO: task reaim:<#> blocked for more than 120 seconds.
> 
> So something appears to be buggy.

Right, so like said I think I found the reason for the lockup and Waiman
appears to have found the reason for your insane performance increase.

Running AIM7 takes ludicrous amounts of time though, so I hacked it up
like below.

That changes two things, it uses log10(rl->runnum) as scale factor and
allows overriding chld_alrm. I run it with -O60, which gets semi decent
runtimes.



---
diff --git a/osdl-aim-7/src/driver.c b/osdl-aim-7/src/driver.c
index 306e23b..03be655 100644
--- a/osdl-aim-7/src/driver.c
+++ b/osdl-aim-7/src/driver.c
@@ -98,6 +98,8 @@ struct runloop_input *rl_vars;
 struct disk_data *my_disk;
 struct _aimList *global_list;
 
+int alarm_timeout = 0;
+
 int flag = 0;
 /* for getopt */
 int opt_num = 0;
@@ -222,13 +224,14 @@ int main(int argc, char **argv)
                        {"config", 1, NULL, 'c'},
                        {"nosync", 0, NULL, 'y'},  /* Remove the sync'y 
behavior */
                        {"guesspeak", 0, NULL, 'g'}, /* terrible, but we've 
exhausted the alphabet */
+                       {"timeout", 1, NULL, 'O'},
                        {0, 0, 0, 0}
                };
 
-               c = getopt_long(argc, argv, 
"bvs:e:i:j:d::f:l:p:r:c:Z:z:mqothxyg",
+               c = getopt_long(argc, argv, 
"bvs:e:i:j:d::f:l:p:r:c:Z:z:O:mqothxyg",
                                long_options, &option_index);
 #elif hpux
-               c = getopt(argc, argv, "bvs:e:i:j:d::f:l:p:r:c:Z:z:mqothxyg");
+               c = getopt(argc, argv, "bvs:e:i:j:d::f:l:p:r:c:Z:z:O:mqothxyg");
 #endif
 
                if (c == -1)
@@ -325,6 +328,9 @@ int main(int argc, char **argv)
                        print_usage();
                        exit(1);
                        break;
+               case 'O':
+                       alarm_timeout = atoi(optarg);
+                       break;
 /* MARCIA - DAN z: pass config file, Z: pass tool/script name (default 
perf_tools.sh) */
                case 'Z':
                        tool_name = optarg;
@@ -909,7 +915,7 @@ int runloop(struct _aimList *tlist, struct runloop_input 
*rl)
                long start_tick;
                long delta = 0;
                int chld_alrm = 0;
-
+               int timo;
 
                close(umbilical[0]);
                /* Step 1: seed random number generators
@@ -945,7 +951,15 @@ int runloop(struct _aimList *tlist, struct runloop_input 
*rl)
                        chld_alrm = 10;
                }
                /* now we set a timeout alarm */
-               alarm(rl->runnum * chld_alrm);
+
+               if (alarm_timeout > 0)
+                       chld_alrm = alarm_timeout;
+
+               timo = (unsigned int)(log10((double)rl->runnum) * chld_alrm);
+
+               fprintf(stderr, "alarm: %d = log10(%d) * %d\n", timo, 
rl->runnum, chld_alrm);
+
+               alarm(timo);
                /*
                 * Step 4: Set up mechanism for random 
                 * selection of directory for writes during tests

Reply via email to