[ewg] [PATCH] Perftest: optimize ConnectX gen2 inline default

2008-07-28 Thread Oren Meron
--- a/write_bw.c
+++ b/write_bw.c
@@ -864,9 +864,13 @@ int main(int argc, char *argv[])
fprintf(stderr, "Failed to query device props");
return 1;
}
-   if ((device_attribute.vendor_part_id == 25418) && 
(!inline_given_in_cmd)) {
+   if ((device_attribute.vendor_part_id == 25408 ||
+   device_attribute.vendor_part_id == 25418 ||
+   device_attribute.vendor_part_id == 26408 ||
+   device_attribute.vendor_part_id == 26418 ||
+   device_attribute.vendor_part_id == 26428) && 
(!inline_given_in_cmd)) {
user_param.inline_size = 1;
-   }
+}
printf("Inline data is used up to %d bytes message\n", 
user_param.inline_size);
 
ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, ib_port, 
&user_param);
diff --git a/write_bw_postlist.c b/write_bw_postlist.c
index 87903c0..aa8b4c3 100755 (executable)

--- a/write_bw_postlist.c
+++ b/write_bw_postlist.c
@@ -855,9 +855,13 @@ int main(int argc, char *argv[])
fprintf(stderr, "Failed to query device props");
return 1;
}
-   if ((device_attribute.vendor_part_id == 25418) && 
(!inline_given_in_cmd)) {
+   if ((device_attribute.vendor_part_id == 25408 ||
+   device_attribute.vendor_part_id == 25418 ||
+   device_attribute.vendor_part_id == 26408 ||
+   device_attribute.vendor_part_id == 26418 ||
+   device_attribute.vendor_part_id == 26428) && 
(!inline_given_in_cmd)) {
user_param.inline_size = 1;
-   }
+}
printf("Inline data is used up to %d bytes message\n", 
user_param.inline_size);
 
ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, ib_port, 
&user_param);

--- a/send_bw.c
+++ b/send_bw.c
@@ -1129,7 +1129,11 @@ int main(int argc, char *argv[])
fprintf(stderr, "Failed to query device props");
return 1;
}
-   if ((device_attribute.vendor_part_id == 25418) && 
(!inline_given_in_cmd)) {
+   if ((device_attribute.vendor_part_id == 25408 ||
+   device_attribute.vendor_part_id == 25418 ||
+   device_attribute.vendor_part_id == 26408 ||
+   device_attribute.vendor_part_id == 26418 ||
+   device_attribute.vendor_part_id == 26428) && 
(!inline_given_in_cmd)) {
user_param.inline_size = 1;
}
printf("Inline data is used up to %d bytes message\n", 
user_param.inline_size);
___
ewg mailing list
ewg@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg


[ewg] [PATCH] Perftest: update README

2008-03-31 Thread Oren Meron
Perftest: update README

Signed-off-by: Oren Meron <[EMAIL PROTECTED]



--- a/README
+++ b/README
@@ -1,7 +1,7 @@
 Open Fabrics Enterprise Distribution (OFED)
 Performance Tests README for OFED 1.3
  
- February 2008
+ March 2008
 
 
 
@@ -57,20 +57,22 @@ Architectures tested:   i686, x86_64, ia64
 3. Test Descriptions
 ===
 
-rdma_lat.c latency test with RDMA write transactions
-rdma_bw.c  streaming bandwidth test with RDMA write transactions
 
 
 The following tests are mainly useful for hardware/software benchmarking.
-They are not intended as actual usage examples.
 
-send_lat.c latency test with send transactions
-send_bw.c  bandwidth test with send transactions
 write_lat.clatency test with RDMA write transactions
 write_bw.c bandwidth test with RDMA write transactions
+send_lat.c latency test with send transactions
+send_bw.c  bandwidth test with send transactions
 read_lat.c latency test with RDMA read transactions
 read_bw.c  bandwidth test with RDMA read transactions
 
+
+Legacy tests: (To be removed in the next release)
+rdma_lat.c latency test with RDMA write transactions
+rdma_bw.c  streaming bandwidth test with RDMA write transactions
+
 The executable name of each test starts with the general prefix "ib_";
 for example, ib_write_lat.
 
@@ -88,13 +90,32 @@ Server: ./ 
 Client:./  
 
o   is IPv4 or IPv6 address. You can use the 
IPoIB
+diags_release_notes.txt 
+mpi-selector_release_notes.txt
+rdma_cm_release_notes.txt
+MSTFLINT_README.txt   
+open_mpi_release_notes.txtRDS_README.txt
+ib-bonding.txt  
+mthca_release_notes.txt  
+opensm_release_notes.txt  
+rds_release_notes.txt
+ibutils_release_notes.txt*  
+mvapich_release_notes.txt 
+PERF_TEST_README.txt  
+sdp_release_notes.txt
+ipoib_release_notes.txt 
+srp_release_notes.txt
+QoS_in_OFED.txt   
+SRPT_README.txt
+mlx4_release_notes.txt  
+QoS_management_in_OpenSM.
address if IPoIB is configured.
o  --help lists the available 
 
   *** IMPORTANT NOTE: The SAME OPTIONS must be passed to both server and 
client.
 
 
-Common Options to tests:
+Common Options to all tests:
   -p, --port=listen on/connect to port  (default: 
18515)
   -m, --mtu=  mtu size (default: 1024)
   -d, --ib-dev=   use IB device  (default: first device 
found)
@@ -102,8 +123,6 @@ Common Options to tests:
   -s, --size=size of message to exchange (default: 1)
   -a, --allrun sizes from 2 till 2^23
   -t, --tx-depth= size of tx queue (default: 50)
-  -g, --mcgsend messages to multicast group 0xc001
-   (only available in send-UD)
   -n, --iters=  number of exchanges (at least 100, default: 
1000)
   -C, --report-cycles  report times in cpu cycle units
(default: microseconds)
@@ -112,17 +131,14 @@ Common Options to tests:
   -U, --report-unsorted(implies -H) print out unsorted results
(default: sorted)
   -V, --versiondisplay version number
-  -I, --inline_size= max size of message to be sent in inline mode
-   (default 400)
-  -N, --no peak-bw cancel peak-bw calculation
-   (default: peak-bw. only available in 
write_bw)
 
   *** IMPORTANT NOTE: You need to be running a Subnet Manager on the switch or
  on one of the nodes in your fabric.
 
 Example:
-Run "ib_rdma_lat -C" on the server side.
-Then run "ib_rdma_lat -C " on the client.
+Run "ib_write_lat -a" on the server side.
+Then run "ib_write_lat -a " on the client side.
+
+ib_write_lat will exit on both server and client after printing results.
 
-ib_rdma_lat will exit on both server and client after printing results.
___
ewg mailing list
ewg@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg


[ewg] [PATCH] Perftest: Replace write_bw_postlist file by correct one

2008-03-31 Thread Oren Meron
Perftest: Replace write_bw_postlist file by correct one

Signed-off-by: Oren Meron <[EMAIL PROTECTED]

--- a/write_bw_postlist.c
+++ b/write_bw_postlist.c
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,13 +34,14 @@
  */
 
 #if HAVE_CONFIG_H
-#include 
+#  include 
 #endif /* HAVE_CONFIG_H */
 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -60,8 +60,9 @@
 #define VERSION 1.0
 #define ALL 1
 #define MAX_INLINE 400
-static int page_size;
-cycles_t*tstamp;
+#define RC 0
+#define UC 1
+
 struct user_parameters {
const char  *servername;
int connection_type;
@@ -69,28 +70,31 @@ struct user_parameters {
int all; /* run all msg size */
int iters;
int tx_depth;
-   int inline_size;
+int numofqps;
+int maxpostsofqpiniteration;
+int inline_size;
 };
-struct report_options {
-   int unsorted;
-   int histogram;
-   int cycles;   /* report delta's in cycles, not microsec's */
+struct extended_qp {
+  struct ibv_qp   *qp;
+  int  scnt, ccnt ;
 };
+static int page_size;
 
-
+cycles_t   *tposted;
+cycles_t   *tcompleted;
 struct pingpong_context {
struct ibv_context *context;
struct ibv_pd  *pd;
struct ibv_mr  *mr;
struct ibv_cq  *cq;
-   struct ibv_qp  *qp;
+   struct ibv_qp  **qp;
void   *buf;
-   volatile char  *post_buf;
-   volatile char  *poll_buf;
-   int size;
+   unsignedsize;
int tx_depth;
-   struct ibv_sge list;
-   struct ibv_send_wr wr;
+   struct ibv_sge  list;
+struct ibv_send_wr  wr;
+int *scnt;
+int *ccnt ;
 };
 
 struct pingpong_dest {
@@ -112,74 +116,10 @@ static uint16_t pp_get_local_lid(struct pingpong_context 
*ctx, int port)
return attr.lid;
 }
 
-static struct ibv_device *pp_find_dev(const char *ib_devname) {
-   struct ibv_device **dev_list;
-   struct ibv_device *ib_dev = NULL;
-
-   dev_list = ibv_get_device_list(NULL);
-
-   if (!ib_devname) {
-   ib_dev = dev_list[0];
-   if (!ib_dev)
-   fprintf(stderr, "No IB devices found\n");
-   } else {
-   for (; (ib_dev = *dev_list); ++dev_list)
-   if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
-   break;
-   if (!ib_dev)
-   fprintf(stderr, "IB device %s not found\n", ib_devname);
-   }
-   return ib_dev;
-}
-
-#define KEY_MSG_SIZE (sizeof ":00:00::")
-#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
-
-static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
-{
-   char msg[KEY_MSG_SIZE];
-
-   sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,
-   my_dest->psn, my_dest->rkey, my_dest->vaddr);
-
-   if (write(sockfd, msg, sizeof msg) != sizeof msg) {
-   perror("client write");
-   fprintf(stderr, "Couldn't send local address\n");
-   return -1;
-   }
-
-   return 0;
-}
-
-static int pp_read_keys(int sockfd, const struct pingpong_dest *my_dest,
-   struct pingpong_dest *rem_dest)
-{
-   int parsed;
-   char msg[KEY_MSG_SIZE];
-
-   if (read(sockfd, msg, sizeof msg) != sizeof msg) {
-   perror("pp_read_keys");
-   fprintf(stderr, "Couldn't read remote address\n");
-   return -1;
-   }
-
-   parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
-   &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
-
-   if (parsed != 5) {
-   fprintf(stderr, "Couldn't parse line <%.*s>\n",
-   (int)sizeof msg, msg);
-   return -1;
-   }
-
-   return 0;
-}
-
 static int pp_client_connect(const char *servername, int port)
 {
struct addrinfo *res, *t;
-   struct addrinfo hints = 
-   {
+   struct addrinfo hints = {
.ai_family   = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
@@ -216,16 +156,46 @@ static int pp_client_connect(const char *servername, int 
port)
return sockfd;
 }
 
-static int pp_client_exch_dest(int sockfd, const struct pingpong_dest *my_dest,

[ewg] [PATCH] Perftest: send_bw: Added option to cancel peak-bw calculation

2008-03-26 Thread Oren Meron
Perftest: send_bw: Added option to cancel peak-bw calculation in report

Signed-off-by: Oren Meron <[EMAIL PROTECTED]


--- a/send_bw.c
+++ b/send_bw.c
@@ -629,10 +629,11 @@ static void usage(const char *argv0)
printf("  -b, --bidirectional measure bidirectional bandwidth 
(default unidirectional)\n");
printf("  -V, --version   display version number\n");
printf("  -e, --eventssleep on CQ events (default 
poll)\n");
+   printf("  -N, --no peak-bw  cancel peak-bw calculation (default 
with peak-bw)\n");
 }
 
 static void print_report(unsigned int iters, unsigned size, int duplex,
-cycles_t *tposted, cycles_t *tcompleted)
+cycles_t *tposted, cycles_t *tcompleted, int noPeak)
 {
double cycles_to_units;
unsigned long tsize;/* Transferred size, in megabytes */
@@ -644,23 +645,25 @@ static void print_report(unsigned int iters, unsigned 
size, int duplex,
 
opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
 
-   /* Find the peak bandwidth */
-   for (i = 0; i < iters; ++i)
-   for (j = i; j < iters; ++j) {
-   t = (tcompleted[j] - tposted[i]) / (j - i + 1);
-   if (t < opt_delta) {
-   opt_delta  = t;
-   opt_posted = i;
-   opt_completed = j;
+   if (!noPeak) {
+   /* Find the peak bandwidth, unless asked not to in command line 
*/
+   for (i = 0; i < iters; ++i)
+   for (j = i; j < iters; ++j) {
+   t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+   if (t < opt_delta) {
+   opt_delta  = t;
+   opt_posted = i;
+   opt_completed = j;
+   }
}
-   }
+   }
 
cycles_to_units = get_cpu_mhz() * 100;
 
tsize = duplex ? 2 : 1;
tsize = tsize * size;
printf("%7d%d%7.2f   %7.2f\n",
-  size,iters,tsize * cycles_to_units / opt_delta / 0x10,
+  size,iters,!(noPeak) * tsize * cycles_to_units / opt_delta / 
0x10,
   tsize * iters * cycles_to_units /(tcompleted[iters - 1] - 
tposted[0]) / 0x10);
 }
 int run_iter_bi(struct pingpong_context *ctx, struct user_parameters 
*user_param,
@@ -933,6 +936,7 @@ int main(int argc, char *argv[])
int sockfd;
int  i = 0;
int  size_max_pow = 24;
+   int  noPeak = 0;/*noPeak == 0: regular peak-bw 
calculation done*/
int  inline_given_in_cmd = 0;
struct ibv_context   *context;
/* init default values to user's parameters */
@@ -964,10 +968,11 @@ int main(int argc, char *argv[])
{ .name = "version",.has_arg = 0, .val = 'V' },
{ .name = "events", .has_arg = 0, .val = 'e' },
{ .name = "mcg",.has_arg = 0, .val = 'g' },
+   { .name = "noPeak", .has_arg = 0, .val = 'N' },
{ 0 }
};
 
-   c = getopt_long(argc, argv, "p:d:i:m:c:s:n:t:I:r:ebaVg", 
long_options, NULL);
+   c = getopt_long(argc, argv, "p:d:i:m:c:s:n:t:I:r:ebaVgN", 
long_options, NULL);
if (c == -1)
break;
 
@@ -1053,6 +1058,10 @@ int main(int argc, char *argv[])
user_param.duplex = 1;
break;
 
+   case 'N':
+   noPeak = 1;
+   break;
+
default:
usage(argv[0]);
return 1;
@@ -1238,7 +1247,7 @@ int main(int argc, char *argv[])
return 17;
}
if (user_param.servername) {
-   print_report(user_param.iters, size, 
user_param.duplex, tposted, tcompleted);
+   print_report(user_param.iters, size, 
user_param.duplex, tposted, tcompleted, noPeak);
/* sync again for the sake of UC/UC */
rem_dest = pp_client_exch_dest(sockfd, 
&my_dest);
} else
@@ -1255,7 +1264,7 @@ int main(int argc, char *argv[])
}
 
if (user_param.servername)
-

[ewg] [PATCH] Perftest: Added support to 4K MTU

2008-03-24 Thread Oren Meron
Perftest: Added support to 4K MTU

Signed-off-by: Oren Meron <[EMAIL PROTECTED]


--- a/read_bw.c
+++ b/read_bw.c
@@ -410,7 +410,7 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int 
port, int my_psn,
attr.path_mtu   = IBV_MTU_2048;
break;
case 4096 :
-   attr.path_mtu   = IBV_MTU_4096;4kmtu
+   attr.path_mtu   = IBV_MTU_4096;
break;
}
printf("Mtu : %d\n", user_parm->mtu);
@@ -463,7 +463,7 @@ static void usage(const char *argv0)
printf("  -p, --port=  listen on/connect to port  
(default 18515)\n");
printf("  -d, --ib-dev= use IB device  (default first 
device found)\n");
printf("  -i, --ib-port=   use port  of IB device (default 
1)\n");
-   printf("  -m, --mtu=mtu size (256 - 4096. default for 
hermon is 2048)\n");4kmtu
+   printf("  -m, --mtu=mtu size (256 - 4096. default for 
hermon is 2048)\n");
printf("  -o, --outs=   num of outstanding read/atom(default 
4)\n");
printf("  -s, --size=  size of message to exchange (default 
65536)\n");
printf("  -a, --all  Run sizes from 2 till 2^23\n");
diff --git a/read_lat.c b/read_lat.c
index d4cc844..8119f57 100755 (executable)

--- a/read_lat.c
+++ b/read_lat.c
@@ -433,7 +433,7 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int 
port, int my_psn,
attr.path_mtu   = IBV_MTU_2048;
break;
case 4096 :
-   attr.path_mtu   = IBV_MTU_4096;4kmtu
+   attr.path_mtu   = IBV_MTU_4096;
break;
}
printf("Mtu : %d\n", user_parm->mtu);
@@ -554,7 +554,7 @@ static void usage(const char *argv0)
printf("Options:\n");
printf("  -p, --port=listen on/connect to port  
(default 18515)\n");
printf("  -c, --connection= connection type RC/UC (default 
RC)\n");
-   printf("  -m, --mtu=  mtu size (256 - 4096. default 
for hermon is 2048)\n");4kmtu
+   printf("  -m, --mtu=  mtu size (256 - 4096. default 
for hermon is 2048)\n");
printf("  -d, --ib-dev=   use IB device  (default 
first device found)\n");
printf("  -i, --ib-port= use port  of IB device 
(default 1)\n");
printf("  -s, --size=size of message to exchange 
(default 1)\n");
diff --git a/send_bw.c b/send_bw.c
index a80fc41..e5c26c3 100755 (executable)

--- a/send_bw.c
+++ b/send_bw.c
@@ -490,7 +490,7 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int 
port, int my_psn,
attr.path_mtu   = IBV_MTU_2048;
break;
case 4096 :
-   attr.path_mtu   = IBV_MTU_4096;4kmtu
+   attr.path_mtu   = IBV_MTU_4096;
break;
}
printf("Mtu : %d\n", user_parm->mtu);
@@ -618,7 +618,7 @@ static void usage(const char *argv0)
printf("  -d, --ib-dev=  use IB device  (default 
first device found)\n");
printf("  -i, --ib-port=use port  of IB device 
(default 1)\n");
printf("  -c, --connection= connection type RC/UC/UD (default 
RC)\n");
-   printf("  -m, --mtu= mtu size (256 - 4096. default for 
hermon is 2048)\n");4kmtu
+   printf("  -m, --mtu= mtu size (256 - 4096. default for 
hermon is 2048)\n");
printf("  -s, --size=   size of message to exchange 
(default 65536)\n");
printf("  -a, --all   Run sizes from 2 till 2^23\n");
printf("  -t, --tx-depth=size of tx queue (default 
300)\n");
diff --git a/send_lat.c b/send_lat.c
index 667cd13..b2796d6 100755 (executable)

--- a/send_lat.c
+++ b/send_lat.c
@@ -507,7 +507,7 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int 
port, int my_psn,
attr.path_mtu   = IBV_MTU_2048;
break;
case 4096 :
-   attr.path_mtu   = IBV_MTU_4096;4kmtu
+   attr.path_mtu   = IBV_MTU_4096;
break;
}
printf("Mtu : %d\n", user_parm->mtu);
@@ -700,7 +700,7 @@ static void usage(const char *argv0)
printf("Options:\n");
printf("  -p, --port=listen on/connect to port  
(default 18515)\n");
printf("  -c, --connection=  connection type RC/UC/UD 
(default RC)\n");
-   printf("  -m,

[ewg] [PATCH] perftest README update

2008-02-25 Thread Oren Meron
Update perftest README

Signed-off-by: Oren Meron <[EMAIL PROTECTED]>

--- a/README
+++ b/README
@@ -1,5 +1,5 @@
 Open Fabrics Enterprise Distribution (OFED)
-Performance Tests README for OFED 1.2
+Performance Tests README for OFED 1.3
  
  February 2008
 
@@ -9,7 +9,7 @@
 Table of Contents
 ===
 1. Overview
-2. Notes on Testing Methodology
+2. Notes on Testing Method
 3. Test Descriptions
 4. Running Tests
 
@@ -18,30 +18,30 @@ Table of Contents
 ===
 This is a collection of tests written over uverbs intended for use as a
 performance micro-benchmark. As an example, the tests can be used for
-HW or SW tuning and/or functional testing.
+hardware or software tuning and/or functional testing.
 
-Please post results/observations to the openib-general mailing list.
+Please post results and observations to the openib-general mailing list.
 See "Contact Us" at http://openib.org/mailman/listinfo/openib-general and
 http://www.openib.org.
 
 
 ===
-2. Notes on Testing Methodology
+2. Notes on Testing Method
 ===
-- The benchmark used the CPU cycle counter to get time stamps without context
-  switch.  Some CPU architectures (e.g., Intel's 80486 or older PPC) do NOT
-  have such capability.
+- The benchmark uses the CPU cycle counter to get time stamps without a context
+  switch. Some CPU architectures (e.g., Intel's 80486 or older PPC) do NOT have
+  such capability.
 
 - The benchmark measures round-trip time but reports half of that as one-way
   latency. This means that it may not be sufficiently accurate for asymmetrical
   configurations.
 
-- Min/Median/Max result is reported.
-  The median (vs average) is less sensitive to extreme scores.
-  Typically, the "Max" value is the first value measured.
+- Min/Median/Max results are reported.
+  The Median (vs average) is less sensitive to extreme scores.
+  Typically, the Max value is the first value measured.
 
-- Larger samples help marginally only. The default (1000) is pretty good.
-  Note that an array of cycles_t (typically unsigned long) is allocated
+- Larger samples only help marginally. The default (1000) is very satisfactory.
+  Note that an array of cycles_t (typically an unsigned long) is allocated
   once to collect samples and again to store the difference between them.
   Really big sample sizes (e.g., 1 million) might expose other problems
   with the program.
@@ -53,30 +53,31 @@ http://www.openib.org.
 Architectures tested:  i686, x86_64, ia64
 
 
-
 ===
-4. Test Descriptions
+3. Test Descriptions
 ===
 
 rdma_lat.c latency test with RDMA write transactions
-rdma_bw.c  streaming BW test with RDMA write transactions
+rdma_bw.c  streaming bandwidth test with RDMA write transactions
 
 
-The following tests are mainly useful for HW/SW benchmarking.
+The following tests are mainly useful for hardware/software benchmarking.
 They are not intended as actual usage examples.
 
 send_lat.c latency test with send transactions
-send_bw.c  BW test with send transactions
+send_bw.c  bandwidth test with send transactions
 write_lat.clatency test with RDMA write transactions
-write_bw.c BW test with RDMA write transactions
+write_bw.c bandwidth test with RDMA write transactions
 read_lat.c latency test with RDMA read transactions
-read_bw.c  BW test with RDMA read transactions
+read_bw.c  bandwidth test with RDMA read transactions
+
+The executable name of each test starts with the general prefix "ib_";
+for example, ib_write_lat.
 
-The executable name of each test starts with the general prefix "ib_",
-e.g., ib_write_lat.
 
-Running Tests
--
+===
+4. Running Tests
+===
 
 Prerequisites: 
kernel 2.6
@@ -101,14 +102,20 @@ Common Options to tests:
   -s, --size=size of message to exchange (default: 1)
   -a, --allrun sizes from 2 till 2^23
   -t, --tx-depth= size of tx queue (default: 50)
-  -g, --mcgsend messages to multicast group 0xc001 (only 
available in send-UD)
+  -g, --mcgsend messages to multicast group 0xc001
+   (only available in send-UD)
   -n, --iters=  number of exchanges (at least 100, default: 
1000)
-  -C, --report-

[ewg] [PATCH] update perftest README

2008-02-19 Thread Oren Meron
Update README

Signed-off-by: Oren Meron <[EMAIL PROTECTED]>

--- a/README
+++ b/README
@@ -1,37 +1,52 @@
-This directory includes gen2 uverbs micro-benchmarks.
+Open Fabrics Enterprise Distribution (OFED)
+Performance Tests README for OFED 1.2
+ 
+ February 2008
 
-The tests are intended as:
-   1) simple, efficient usage examples.
-   Please see the COPYING file if you intend to copy it literally.
 
-   2) a useful benchmark
-   e.g. for HW or SW tuning and/or functional testing.
-   Please post results/observations to the openib-general mailing
-   list. See http://openib.org/mailman/listinfo/openib-general
-   and http://www.openib.org "Contact Us" link for contact details.
 
+===
+Table of Contents
+===
+1. Overview
+2. Notes on Testing Methodology
+3. Test Descriptions
+4. Running Tests
 
-Testing methodology

+===
+1. Overview
+===
+This is a collection of tests written over uverbs intended for use as a
+performance micro-benchmark. As an example, the tests can be used for
+HW or SW tuning and/or functional testing.
 
-- uses CPU cycle counter to get time stamps without context switch.
-  Some CPU architectures do NOT have such capability. e.g. Intel 80486
-  or older PPC.
+Please post results/observations to the openib-general mailing list.
+See "Contact Us" at http://openib.org/mailman/listinfo/openib-general and
+http://www.openib.org.
 
-- measures round-trip time but reports half of that as one-way latency.
-  ie. May not be sufficiently accurate for asymmetrical configurations.
+
+===
+2. Notes on Testing Methodology
+===
+- The benchmark used the CPU cycle counter to get time stamps without context
+  switch.  Some CPU architectures (e.g., Intel's 80486 or older PPC) do NOT
+  have such capability.
+
+- The benchmark measures round-trip time but reports half of that as one-way
+  latency. This means that it may not be sufficiently accurate for asymmetrical
+  configurations.
 
 - Min/Median/Max result is reported.
   The median (vs average) is less sensitive to extreme scores.
-  Typically the "Max" value is the first value measured.
+  Typically, the "Max" value is the first value measured.
 
-- larger samples only marginally help. The default (1000) is pretty good.
+- Larger samples help marginally only. The default (1000) is pretty good.
   Note that an array of cycles_t (typically unsigned long) is allocated
   once to collect samples and again to store the difference between them.
-  Really big sample sizes (e.g. 1 million) might expose other problems
+  Really big sample sizes (e.g., 1 million) might expose other problems
   with the program.
 
-- "-H" option will dump the histogram for additional statistical analysis.
+- The "-H" option will dump the histogram for additional statistical analysis.
   See xgraph, ygraph, r-base (http://www.r-project.org/), pspp, or other 
   statistical math programs.
 
@@ -39,79 +54,68 @@ Architectures tested:   i686, x86_64, ia64
 
 
 
-Test Descriptions
--
+===
+4. Test Descriptions
+===
 
-rdma_lat.c - latency test with RDMA write transactions
-rdma_bw.c - streaming BW test with RDMA write transactions
+rdma_lat.c latency test with RDMA write transactions
+rdma_bw.c  streaming BW test with RDMA write transactions
 
 
 The following tests are mainly useful for HW/SW benchmarking.
 They are not intended as actual usage examples.
--
-
-send_lat.c - latency test with send transactions
-send_bw.c - BW test with send transactions
-write_lat.c - latency test with RDMA write transactions
-write_bw.c - BW test with RDMA write transactions
-read_lat.c - latency test with RDMA read transactions
-read_bw.c - BW test with RDMA read transactions
-
-Test's executable name starts with the general prefix ib_ (e.g. ib_write_lat).
 
-Build Tests

+send_lat.c latency test with send transactions
+send_bw.c  BW test with send transactions
+write_lat.clatency test with RDMA write transactions
+write_bw.c BW test with RDMA write transactions
+read_lat.c latency test with RDMA read transactions
+read_bw.c  BW test with RDMA read transactions
 
-"make" to build 

[ewg] [PATCH] rdma_lat: Add option to support devices with different inline max values

2008-02-13 Thread Oren Meron
--- a/rdma_lat.c
+++ b/rdma_lat.c
@@ -60,6 +60,7 @@
 #define PINGPONG_RDMA_WRID 3
 #define MAX_INLINE 400
 
+static int inline_size = MAX_INLINE;
 static int page_size;
 static pid_t pid;
 
@@ -603,7 +604,7 @@ static struct pingpong_context *pp_init_ctx(void *ptr, 
struct pp_data *data)
.max_recv_wr  = 1,
.max_send_sge = 1,
.max_recv_sge = 1,
-   .max_inline_data = MAX_INLINE
+   .max_inline_data = inline_size,
},
.qp_type = IBV_QPT_RC
};
@@ -737,7 +738,12 @@ static int pp_open_port(struct pingpong_context *ctx, 
struct pp_data *data )
return -1;
}
 
-   write(data->sockfd, "done", sizeof "done");
+   if (write(data->sockfd, "done", sizeof "done") != sizeof "done"){
+   perror("write");
+   fprintf(stderr, "Couldn't write to socket\n");
+   return 1;
+   }
+
close(data->sockfd);

return 0;
@@ -915,6 +921,7 @@ static void usage(const char *argv0)
printf("  -s, --size=  size of message to exchange (default 
1)\n");
printf("  -t, --tx-depth=   size of tx queue (default 50)\n");
printf("  -n, --iters=number of exchanges (at least 2, 
default 1000)\n");
+   printf("  -I, --inline_size=  max size of message to be sent in 
inline mode (default 400)\n");
printf("  -C, --report-cyclesreport times in cpu cycle units 
(default microseconds)\n");
printf("  -H, --report-histogram print out all results (default print 
summary only)\n");
printf("  -U, --report-unsorted  (implies -H) print out unsorted 
results (default sorted)\n");
@@ -1036,6 +1043,7 @@ int main(int argc, char *argv[])
{ .name = "size",   .has_arg = 1, .val = 's' },
{ .name = "iters",  .has_arg = 1, .val = 'n' },
{ .name = "tx-depth",   .has_arg = 1, .val = 't' },
+   { .name = "inline_size", .has_arg = 1, .val = 'I' },
{ .name = "report-cycles",  .has_arg = 0, .val = 'C' },
{ .name = "report-histogram",.has_arg = 0, .val = 'H' },
{ .name = "report-unsorted",.has_arg = 0, .val = 'U' },
@@ -1043,7 +1051,7 @@ int main(int argc, char *argv[])
{ 0 }
};
 
-   c = getopt_long(argc, argv, "p:d:i:s:n:t:CHUc", long_options, 
NULL);
+   c = getopt_long(argc, argv, "p:d:i:s:n:t:I:CHUc", long_options, 
NULL);
if (c == -1)
break;
 
@@ -1084,7 +1092,10 @@ int main(int argc, char *argv[])
usage(argv[0]);
return 5;
}
+   break;
 
+   case 'I':
+   inline_size = strtol(optarg, NULL, 0);
break;
 
case 'C':
@@ -1192,7 +1203,7 @@ int main(int argc, char *argv[])
ctx->wr.sg_list= &ctx->list;
ctx->wr.num_sge= 1;
ctx->wr.opcode = IBV_WR_RDMA_WRITE;
-   if (ctx->size > MAX_INLINE || ctx->size == 0) {
+   if (ctx->size > inline_size || ctx->size == 0) {
ctx->wr.send_flags = IBV_SEND_SIGNALED;
} else {
ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
___
ewg mailing list
ewg@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg


[ewg] [PATCH] cancel inline default for Hermon

2008-01-31 Thread Oren Meron
perftest: cancel inline default for Hermon to optimize BW performance in send 
and write

Signed-off-by: Oren Meron <[EMAIL PROTECTED]>

--- a/send_bw.c
+++ b/send_bw.c
@@ -922,6 +922,7 @@ int main(int argc, char *argv[])
struct pingpong_dest my_dest;
struct pingpong_dest*rem_dest;
struct user_parameters  user_param;
+   struct ibv_device_attr device_attribute;
char*ib_devname = NULL;
int  port = 18515;
int  ib_port = 1;
@@ -929,6 +930,8 @@ int main(int argc, char *argv[])
int sockfd;
int  i = 0;
int  size_max_pow = 24;
+   int  inline_given_in_cmd = 0;
+   struct ibv_context   *context;
/* init default values to user's parameters */
memset(&user_param, 0, sizeof(struct user_parameters));
user_param.mtu = 0;
@@ -937,7 +940,7 @@ int main(int argc, char *argv[])
user_param.servername = NULL;
user_param.use_event = 0;
user_param.duplex = 0;
-user_param.inline_size = MAX_INLINE;
+   user_param.inline_size = MAX_INLINE;
/* Parameter parsing. */
while (1) {
int c;
@@ -1022,6 +1025,7 @@ int main(int argc, char *argv[])
 
case 'I':
user_param.inline_size = strtol(optarg, NULL, 0);
+   inline_given_in_cmd =1;
if (user_param.inline_size > MAX_INLINE) {
usage(argv[0]);
return 7;
@@ -1069,7 +1073,6 @@ int main(int argc, char *argv[])
else
printf("Send BW Test\n");
 
-   printf("Inline data is used up to %d bytes message\n", 
user_param.inline_size);
if (user_param.connection_type == RC)
printf("Connection type : RC\n");
else if (user_param.connection_type == UC)
@@ -1109,6 +1112,16 @@ int main(int argc, char *argv[])
}
}
 
+   context = ibv_open_device(ib_dev);
+   if (ibv_query_device(context, &device_attribute)) {
+   fprintf(stderr, "Failed to query device props");
+   return 1;
+   }
+   if ((device_attribute.vendor_part_id == 25418) && 
(!inline_given_in_cmd)) {
+   user_param.inline_size = 1;
+   }
+   printf("Inline data is used up to %d bytes message\n", 
user_param.inline_size);
+
ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, 
user_param.rx_depth,
  ib_port, &user_param);
if (!ctx)
diff --git a/write_bw.c b/write_bw.c
index 83a8af4..7c518aa 100644 (file)

--- a/write_bw.c
+++ b/write_bw.c
@@ -666,6 +666,7 @@ int main(int argc, char *argv[])
struct pingpong_dest *my_dest;
struct pingpong_dest**rem_dest;
struct user_parameters  user_param;
+   struct ibv_device_attr device_attribute;
char*ib_devname = NULL;
int  port = 18515;
int  ib_port = 1;
@@ -674,6 +675,8 @@ int main(int argc, char *argv[])
int  duplex = 0;
int  i = 0;
int  noPeak = 0;/*noPeak == 0: regular peak-bw 
calculation done*/
+   int  inline_given_in_cmd = 0;
+   struct ibv_context   *context;
 
/* init default values to user's parameters */
memset(&user_param, 0, sizeof(struct user_parameters));
@@ -767,6 +770,7 @@ int main(int argc, char *argv[])
 
case 'I':
user_param.inline_size = strtol(optarg, NULL, 0);
+   inline_given_in_cmd =1;
if (user_param.inline_size > MAX_INLINE) {
usage(argv[0]);
return 7;
@@ -810,7 +814,6 @@ int main(int argc, char *argv[])
  printf("RDMA_Write BW Test\n");
}

-   printf("Inline data is used up to %d bytes message\n", 
user_param.inline_size);
printf("Number of qp's running %d\n",user_param.numofqps);
if (user_param.connection_type==RC) {
printf("Connection type : RC\n");
@@ -853,6 +856,16 @@ int main(int argc, char *argv[])
}
}
 
+   context = ibv_open_device(ib_dev);
+   if (ibv_query_device(context, &device_attribute)) {
+   fprintf(stderr, "Failed to query device props");
+   return 1;
+   }
+   if ((device_attribute.vendor_part_id == 25418) && 
(!inline_given_in_cmd)) {
+   user_param.inline_size = 1;

[ewg] [PATCH] perftest rdma_lat.c bug fix

2008-01-14 Thread Oren Meron
Perftest: fix bug rdma_lat.c. Messages up to 400 bytes will be
sent Inline.

Signed-off-by: Oren Meron <[EMAIL PROTECTED]>

--- a/rdma_lat.c
  
+++ b/rdma_lat.c
  
@@ -58,6

+58,7
  @@

#include "get_clock.h" 
#define PINGPONG_RDMA_WRID 3 
+#define MAX_INLINE 400 
static int page_size; 
static pid_t pid; 
@@ -602,7

+603,7
  @@
static struct pingpong_context *pp_init_ctx(void *ptr, struct pp_data
*data) 
.max_recv_wr = 1, 
.max_send_sge = 1, 
.max_recv_sge = 1, 
- .max_inline_data = 0 
+ .max_inline_data = MAX_INLINE 
}, 
.qp_type = IBV_QPT_RC 
}; 
@@ -640,7

+641,7
  @@
static struct pingpong_context *pp_init_ctx(void *ptr, struct pp_data
*data) 
} 
} 
- return ctx; 
+ return ctx; 
} 
static int pp_connect_ctx(struct pingpong_context *ctx, struct
pp_data *data) 
@@ -1191,7

+1192,11
  @@
int main(int argc, char *argv[]) 
ctx->wr.sg_list = &ctx->list; 
ctx->wr.num_sge = 1; 
ctx->wr.opcode = IBV_WR_RDMA_WRITE; 
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; 
+ if (ctx->size > MAX_INLINE || ctx->size == 0) { 
+ ctx->wr.send_flags = IBV_SEND_SIGNALED; 
+ } else { 
+ ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; 
+ } 
    ctx->wr.next = NULL; 
scnt = 0;


Oren   Meron
Performance

___
ewg mailing list
ewg@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg