Re: [lng-odp] [PATCHv4] example:generator:move verbose from worker to control
+static void print_global_stats(int num_workers) +{ + uint64_t start, now, diff; + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; + int verbose_interval = 20, worker_count; + odp_thrmask_t thrd_mask; + + start = odp_time_cycles(); + while (1) { + now = odp_time_cycles(); + diff = odp_time_diff_cycles(start, now); + if (odp_time_cycles_to_ns(diff) +verbose_interval * ODP_TIME_SEC) { + continue; + } There's no exit condition check in this loop above, so you'll wait up to verbose_interval extra seconds unnecessarily, for example when running with -n 100 the workers will finish quickly. + + start = odp_time_cycles(); + + worker_count = odp_thrmask_worker(thrd_mask); There's a potential race here as the worker thread counts get incremented after the thread has started (rather than during the odph_linux_pthread_create() call), and it's pretty likely this code will be reached by the main thread before the workers have started. Should wait until worker count has reached num_workers before entering the loop. verbose_interval * ODP_TIME_SEC internally a sleep time before reaching this check. Even if we introduce the worker_count == num_workers, we might have to do a default sleep anyway to make sure we have workers up and running otherwise we break the loop. do we still need a separate check for this ? + if (worker_count num_workers) + break; -- Stuart. ___ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp
Re: [lng-odp] [PATCHv4] example:generator:move verbose from worker to control
On Thu, Aug 20, 2015 at 01:39:29PM +0200, Krishna Garapati wrote: +static void print_global_stats(int num_workers) +{ + uint64_t start, now, diff; + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; + int verbose_interval = 20, worker_count; + odp_thrmask_t thrd_mask; + + start = odp_time_cycles(); + while (1) { + now = odp_time_cycles(); + diff = odp_time_diff_cycles(start, now); + if (odp_time_cycles_to_ns(diff) +verbose_interval * ODP_TIME_SEC) { + continue; + } There's no exit condition check in this loop above, so you'll wait up to verbose_interval extra seconds unnecessarily, for example when running with -n 100 the workers will finish quickly. + + start = odp_time_cycles(); + + worker_count = odp_thrmask_worker(thrd_mask); There's a potential race here as the worker thread counts get incremented after the thread has started (rather than during the odph_linux_pthread_create() call), and it's pretty likely this code will be reached by the main thread before the workers have started. Should wait until worker count has reached num_workers before entering the loop. verbose_interval * ODP_TIME_SEC internally a sleep time before reaching this check. Even if we introduce the worker_count == num_workers, we might have to do a default sleep anyway to make sure we have workers up and running otherwise we break the loop. do we still need a separate check for this ? Sleeps/delays are generally a bad way to resolve race conditions, since the race is still there but you're just avoiding it. If at some point in the future we were to make verbose_interval configurable then it may be exposed again. It's better to have an explicit check that the precondition (workers started) has been met. Also as I said in the previous comment the existing 20 second delay before entering the rest of the loop causes other problems. I was thinking of something like; while (odp_thrmask_workers(thr_mask) num_workers) { } while (odp_thrmask_workers(thr_mask) == num_workers) { .. } -- Stuart. ___ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp
Re: [lng-odp] [PATCHv4] example:generator:move verbose from worker to control
On Fri, Aug 07, 2015 at 11:00:17AM +0200, Balakrishna.Garapati wrote: Signed-off-by: Balakrishna.Garapati balakrishna.garap...@linaro.org --- Made updates to print recv stats based on mode example/generator/odp_generator.c | 81 --- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c index bdee222..5d1c54a 100644 --- a/example/generator/odp_generator.c +++ b/example/generator/odp_generator.c @@ -101,6 +101,7 @@ static void usage(char *progname); static int scan_ip(char *buf, unsigned int *paddr); static int scan_mac(char *in, odph_ethaddr_t *des); static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); +static void print_global_stats(int num_workers); /** * Sleep for the specified amount of milliseconds @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool) static void *gen_send_thread(void *arg) { int thr; - uint64_t start, now, diff; odp_pktio_t pktio; thread_args_t *thr_args; odp_queue_t outq_def; @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) return NULL; } - start = odp_time_cycles(); printf( [%02i] created mode: SEND\n, thr); for (;;) { int err; @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) = (unsigned int)args-appl.number) { break; } - - now = odp_time_cycles(); - diff = odp_time_diff_cycles(start, now); - if (odp_time_cycles_to_ns(diff) 20 * ODP_TIME_SEC) { - start = odp_time_cycles(); - printf( [%02i] total send: %ju\n, - thr, odp_atomic_load_u64(counters.seq)); - fflush(stdout); - } } /* receive number of reply pks until timeout */ @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len) continue; odp_atomic_inc_u64(counters.ip); - rlen += sprintf(msg, receive Packet proto:IP ); buf = odp_packet_data(pkt); ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); - rlen += sprintf(msg + rlen, id %d , - odp_be_to_cpu_16(ip-id)); offset = odp_packet_l4_offset(pkt); /* udp */ if (ip-proto == ODPH_IPPROTO_UDP) { odp_atomic_inc_u64(counters.udp); + rlen += sprintf(msg, receive Packet proto:IP ); + rlen += sprintf(msg + rlen, id %d , + odp_be_to_cpu_16(ip-id)); udp = (odph_udphdr_t *)(buf + offset); rlen += sprintf(msg + rlen, UDP payload %d , odp_be_to_cpu_16(udp-length) - There's still an unconditional print for every received packet. @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) return arg; } + +/** + * printing verbose statistics + * + */ +static void print_global_stats(int num_workers) +{ + uint64_t start, now, diff; + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; + int verbose_interval = 20, worker_count; + odp_thrmask_t thrd_mask; + + start = odp_time_cycles(); + while (1) { + now = odp_time_cycles(); + diff = odp_time_diff_cycles(start, now); + if (odp_time_cycles_to_ns(diff) +verbose_interval * ODP_TIME_SEC) { + continue; + } There's no exit condition check in this loop above, so you'll wait up to verbose_interval extra seconds unnecessarily, for example when running with -n 100 the workers will finish quickly. + + start = odp_time_cycles(); + + worker_count = odp_thrmask_worker(thrd_mask); There's a potential race here as the worker thread counts get incremented after the thread has started (rather than during the odph_linux_pthread_create() call), and it's pretty likely this code will be reached by the main thread before the workers have started. Should wait until worker count has reached num_workers before entering the loop. + if (worker_count num_workers) + break; + if (args-appl.mode == APPL_MODE_PING) { + if (worker_count == num_workers) + break; + } + + if (args-appl.mode == APPL_MODE_RCV) { + pkts = odp_atomic_load_u64(counters.udp); + if (pkts != 0) This check isn't needed, I want to see how many packets have been received even (especially) if it's 0. + printf( total receive(UDP: % PRIu64 )\n, + pkts); + continue; + } + + if (args-appl.mode == APPL_MODE_PING) { + pkts = odp_atomic_load_u64(counters.icmp); + if (pkts != 0) Same here. + printf( total receive(ICMP: % PRIu64 )\n, + pkts); + } + + pkts = odp_atomic_load_u64(counters.seq); + printf( total sent: % PRIu64 \n, pkts); + + if (args-appl.mode == APPL_MODE_UDP) { Alignment problem here, need another tab. + pps = (pkts - pkts_prev) / verbose_interval; + if (pps maximum_pps) + maximum_pps = pps; + printf( % PRIu64 pps, % PRIu64 max pps\n, + pps, maximum_pps); + } + + pkts_prev = pkts; This should go inside the condition above. + }; Spurious ; +} + /** * ODP packet example main function */ @@ -796,6 +847,8 @@ int main(int argc, char *argv[]) } } + print_global_stats(num_workers); + /* Master thread waits for
Re: [lng-odp] [PATCHv4] example:generator:move verbose from worker to control
Stuart, does this patch look ok ? On 7 August 2015 at 15:37, Maxim Uvarov maxim.uva...@linaro.org wrote: On 08/07/15 15:23, Krishna Garapati wrote: I will look into the API replacement. If I understood your comment correctly, I am already reusing the initial calculated num_workers and trying to compare it to the current workers to see if any thread has been exited. So that we can break out from the loop. And this check happens only after each verbose_timeout, does it still effects the performance?. Ok, I did not release first that you use odp_thrmask_worker() to check how many worker threads run now. I double checked with Bill and Stuart that we support dynamic workers and that function can be used in that way. And about the MAX_WORKERS, I kinda fixed the issue in the other patch [PATCHv4] example:generator:option to supply core mask ( http://patches.opendataplane.org/patch/2561/). Can you look at that and give me comments which might be relevant patch for this comment?. Ok. Maxim. /Krishna Ma to supply core mask On 7 August 2015 at 13:36, Maxim Uvarov maxim.uva...@linaro.org mailto: maxim.uva...@linaro.org wrote: On 08/07/15 12:00, Balakrishna.Garapati wrote: Signed-off-by: Balakrishna.Garapati balakrishna.garap...@linaro.org mailto:balakrishna.garap...@linaro.org --- Made updates to print recv stats based on mode example/generator/odp_generator.c | 81 --- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c index bdee222..5d1c54a 100644 --- a/example/generator/odp_generator.c +++ b/example/generator/odp_generator.c @@ -101,6 +101,7 @@ static void usage(char *progname); static int scan_ip(char *buf, unsigned int *paddr); static int scan_mac(char *in, odph_ethaddr_t *des); static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); +static void print_global_stats(int num_workers); /** * Sleep for the specified amount of milliseconds @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool) static void *gen_send_thread(void *arg) { int thr; - uint64_t start, now, diff; odp_pktio_t pktio; thread_args_t *thr_args; odp_queue_t outq_def; @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) return NULL; } - start = odp_time_cycles(); printf( [%02i] created mode: SEND\n, thr); for (;;) { int err; @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) = (unsigned int)args-appl.number) { break; } - - now = odp_time_cycles(); - diff = odp_time_diff_cycles(start, now); - if (odp_time_cycles_to_ns(diff) 20 * ODP_TIME_SEC) { - start = odp_time_cycles(); - printf( [%02i] total send: %ju\n, - thr, odp_atomic_load_u64(counters.seq)); - fflush(stdout); - } } /* receive number of reply pks until timeout */ @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len) continue; odp_atomic_inc_u64(counters.ip); - rlen += sprintf(msg, receive Packet proto:IP ); buf = odp_packet_data(pkt); ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); - rlen += sprintf(msg + rlen, id %d , - odp_be_to_cpu_16(ip-id)); offset = odp_packet_l4_offset(pkt); /* udp */ if (ip-proto == ODPH_IPPROTO_UDP) { odp_atomic_inc_u64(counters.udp); + rlen += sprintf(msg, receive Packet proto:IP ); + rlen += sprintf(msg + rlen, id %d , + odp_be_to_cpu_16(ip-id)); udp = (odph_udphdr_t *)(buf + offset); rlen += sprintf(msg + rlen, UDP payload %d , odp_be_to_cpu_16(udp-length) - @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) return arg; } + +/** + * printing verbose statistics
[lng-odp] [PATCHv4] example:generator:move verbose from worker to control
Signed-off-by: Balakrishna.Garapati balakrishna.garap...@linaro.org --- Made updates to print recv stats based on mode example/generator/odp_generator.c | 81 --- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c index bdee222..5d1c54a 100644 --- a/example/generator/odp_generator.c +++ b/example/generator/odp_generator.c @@ -101,6 +101,7 @@ static void usage(char *progname); static int scan_ip(char *buf, unsigned int *paddr); static int scan_mac(char *in, odph_ethaddr_t *des); static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); +static void print_global_stats(int num_workers); /** * Sleep for the specified amount of milliseconds @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool) static void *gen_send_thread(void *arg) { int thr; - uint64_t start, now, diff; odp_pktio_t pktio; thread_args_t *thr_args; odp_queue_t outq_def; @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) return NULL; } - start = odp_time_cycles(); printf( [%02i] created mode: SEND\n, thr); for (;;) { int err; @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) = (unsigned int)args-appl.number) { break; } - - now = odp_time_cycles(); - diff = odp_time_diff_cycles(start, now); - if (odp_time_cycles_to_ns(diff) 20 * ODP_TIME_SEC) { - start = odp_time_cycles(); - printf( [%02i] total send: %ju\n, - thr, odp_atomic_load_u64(counters.seq)); - fflush(stdout); - } } /* receive number of reply pks until timeout */ @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len) continue; odp_atomic_inc_u64(counters.ip); - rlen += sprintf(msg, receive Packet proto:IP ); buf = odp_packet_data(pkt); ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); - rlen += sprintf(msg + rlen, id %d , - odp_be_to_cpu_16(ip-id)); offset = odp_packet_l4_offset(pkt); /* udp */ if (ip-proto == ODPH_IPPROTO_UDP) { odp_atomic_inc_u64(counters.udp); + rlen += sprintf(msg, receive Packet proto:IP ); + rlen += sprintf(msg + rlen, id %d , + odp_be_to_cpu_16(ip-id)); udp = (odph_udphdr_t *)(buf + offset); rlen += sprintf(msg + rlen, UDP payload %d , odp_be_to_cpu_16(udp-length) - @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) return arg; } + +/** + * printing verbose statistics + * + */ +static void print_global_stats(int num_workers) +{ + uint64_t start, now, diff; + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; + int verbose_interval = 20, worker_count; + odp_thrmask_t thrd_mask; + + start = odp_time_cycles(); + while (1) { + now = odp_time_cycles(); + diff = odp_time_diff_cycles(start, now); + if (odp_time_cycles_to_ns(diff) + verbose_interval * ODP_TIME_SEC) { + continue; + } + + start = odp_time_cycles(); + + worker_count = odp_thrmask_worker(thrd_mask); + if (worker_count num_workers) + break; + if (args-appl.mode == APPL_MODE_PING) { + if (worker_count == num_workers) + break; + } + + if (args-appl.mode == APPL_MODE_RCV) { + pkts = odp_atomic_load_u64(counters.udp); + if (pkts != 0) + printf( total receive(UDP: % PRIu64 )\n, + pkts); + continue; + } + + if (args-appl.mode == APPL_MODE_PING) { + pkts = odp_atomic_load_u64(counters.icmp); + if (pkts != 0) + printf( total receive(ICMP: % PRIu64 )\n, + pkts); + } + + pkts = odp_atomic_load_u64(counters.seq); + printf( total sent: % PRIu64 \n, pkts); + + if (args-appl.mode == APPL_MODE_UDP) { + pps = (pkts - pkts_prev) / verbose_interval; + if (pps maximum_pps) + maximum_pps = pps; +
Re: [lng-odp] [PATCHv4] example:generator:move verbose from worker to control
On 08/07/15 12:00, Balakrishna.Garapati wrote: Signed-off-by: Balakrishna.Garapati balakrishna.garap...@linaro.org --- Made updates to print recv stats based on mode example/generator/odp_generator.c | 81 --- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c index bdee222..5d1c54a 100644 --- a/example/generator/odp_generator.c +++ b/example/generator/odp_generator.c @@ -101,6 +101,7 @@ static void usage(char *progname); static int scan_ip(char *buf, unsigned int *paddr); static int scan_mac(char *in, odph_ethaddr_t *des); static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); +static void print_global_stats(int num_workers); /** * Sleep for the specified amount of milliseconds @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool) static void *gen_send_thread(void *arg) { int thr; - uint64_t start, now, diff; odp_pktio_t pktio; thread_args_t *thr_args; odp_queue_t outq_def; @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) return NULL; } - start = odp_time_cycles(); printf( [%02i] created mode: SEND\n, thr); for (;;) { int err; @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) = (unsigned int)args-appl.number) { break; } - - now = odp_time_cycles(); - diff = odp_time_diff_cycles(start, now); - if (odp_time_cycles_to_ns(diff) 20 * ODP_TIME_SEC) { - start = odp_time_cycles(); - printf( [%02i] total send: %ju\n, - thr, odp_atomic_load_u64(counters.seq)); - fflush(stdout); - } } /* receive number of reply pks until timeout */ @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len) continue; odp_atomic_inc_u64(counters.ip); - rlen += sprintf(msg, receive Packet proto:IP ); buf = odp_packet_data(pkt); ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); - rlen += sprintf(msg + rlen, id %d , - odp_be_to_cpu_16(ip-id)); offset = odp_packet_l4_offset(pkt); /* udp */ if (ip-proto == ODPH_IPPROTO_UDP) { odp_atomic_inc_u64(counters.udp); + rlen += sprintf(msg, receive Packet proto:IP ); + rlen += sprintf(msg + rlen, id %d , + odp_be_to_cpu_16(ip-id)); udp = (odph_udphdr_t *)(buf + offset); rlen += sprintf(msg + rlen, UDP payload %d , odp_be_to_cpu_16(udp-length) - @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) return arg; } + +/** + * printing verbose statistics + * + */ +static void print_global_stats(int num_workers) +{ + uint64_t start, now, diff; + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; + int verbose_interval = 20, worker_count; + odp_thrmask_t thrd_mask; + + start = odp_time_cycles(); + while (1) { + now = odp_time_cycles(); + diff = odp_time_diff_cycles(start, now); + if (odp_time_cycles_to_ns(diff) + verbose_interval * ODP_TIME_SEC) { + continue; + } + + start = odp_time_cycles(); + + worker_count = odp_thrmask_worker(thrd_mask); try to not call any odp_api in while 1 loops. Api might be itself slow or block something else, depends on implementation. Here you already calculated number of workers and init. Just reuse that number. Also just looked to that calculation and there is bug: /* Default to system CPU count unless user specified */ num_workers = MAX_WORKERS; if (args-appl.cpu_count) num_workers = args-appl.cpu_count; /* ping mode need two worker */ if (args-appl.mode == APPL_MODE_PING) num_workers = 2; /* Get default worker cpumask */ num_workers = odp_cpumask_def_worker(cpumask, num_workers); (void)odp_cpumask_to_str(cpumask, cpumaskstr, sizeof(cpumaskstr)); MAX_WORKERS is not needed. It has to be just 0. In that case odp_cpumask_def_worker will return all available workers. Maxim. + if (worker_count num_workers) + break; + if (args-appl.mode == APPL_MODE_PING) { + if (worker_count == num_workers) + break; + } + + if (args-appl.mode == APPL_MODE_RCV) { + pkts =