On Mon, Oct 01, 2007 at 08:50:50PM -0700, David Miller wrote: > From: [EMAIL PROTECTED] (Larry McVoy) > Date: Mon, 1 Oct 2007 19:20:59 -0700 > > > A short summary is "can someone please post a test program that sources > > and sinks data at the wire speed?" because apparently I'm too old and > > clueless to write such a thing. > > You're not showing us your test program so there is no way we > can help you out.
Attached. Drop it into an lmbench tree and build it. > My initial inclination, even without that critical information, > is to ask whether you are setting any socket options in way? The only one I was playing with was SO_RCVBUF/SO_SNDBUF and I tried disabling that and I tried playing with the read/write size. Didn't help. > In particular, SO_RCVLOWAT can have a large effect here, if you're > setting it to something, that would explain why dd is doing better. A > lot of people link to "helper libraries" with interfaces to setup > sockets with all sorts of socket option settings by default, try not > using such things if possible. Agreed. That was my first thought as well, I must have been doing something that messed up the defaults. But you did get the strace output, there wasn't anything weird there. > You also shouldn't dork at all with the receive and send buffer sizes. > They are adjusted dynamically by the kernel as the window grows. But > if you set them to specific values, this dynamic logic is turned off. Yeah, dorking with those is left over from the bad old days of '95 when lmbench was first shipped. But I turned that all off and no difference. So feel free to show me where I'm an idiot in the code, but if you can't, then what would rock would be a little send.c / recv.c that demonstrated filling the pipe. -- --- Larry McVoy lm at bitmover.com http://www.bitkeeper.com
/* * bytes_tcp.c - simple TCP bandwidth source/sink * * server usage: bytes_tcp -s * client usage: bytes_tcp hostname [msgsize] * * Copyright (c) 1994 Larry McVoy. * Copyright (c) 2002 Carl Staelin. Distributed under the FSF GPL with * additional restriction that results may published only if * (1) the benchmark is unmodified, and * (2) the version in the sccsid below is included in the report. * Support for this development by Sun Microsystems is gratefully acknowledged. */ char *id = "$Id$\n"; #include "bench.h" #define XFER (1024*1024) int server_main(int ac, char **av); int client_main(int ac, char **av); void source(int data); void transfer(int get, int server, char *buf) { int c; while ((get > 0) && (c = read(server, buf, XFER)) > 0) { get -= c; } if (c < 0) { perror("bytes_tcp: transfer: read failed"); exit(4); } } /* ARGSUSED */ int client_main(int ac, char **av) { int server; int get = 256 << 20; char buf[XFER]; char* usage = "usage: %s -remotehost OR %s remotehost [msgsize]\n"; if (ac != 2 && ac != 3) { (void)fprintf(stderr, usage, av[0], av[0]); exit(0); } if (ac == 3) get = bytes(av[2]); server = tcp_connect(av[1], TCP_DATA+1, SOCKOPT_READ|SOCKOPT_REUSE); if (server < 0) { perror("bytes_tcp: could not open socket to server"); exit(2); } transfer(get, server, buf); close(server); exit(0); /*NOTREACHED*/ } void child() { wait(0); signal(SIGCHLD, child); } /* ARGSUSED */ int server_main(int ac, char **av) { int data, newdata; signal(SIGCHLD, child); data = tcp_server(TCP_DATA+1, SOCKOPT_READ|SOCKOPT_WRITE|SOCKOPT_REUSE); for ( ;; ) { newdata = tcp_accept(data, SOCKOPT_WRITE|SOCKOPT_READ); switch (fork()) { case -1: perror("fork"); break; case 0: source(newdata); exit(0); default: close(newdata); break; } } } void source(int data) { char buf[XFER]; while (write(data, buf, sizeof(buf)) > 0); } int main(int ac, char **av) { char* usage = "Usage: %s -s OR %s -serverhost OR %s serverhost [msgsize]\n"; if (ac < 2 || 3 < ac) { fprintf(stderr, usage, av[0], av[0], av[0]); exit(1); } if (ac == 2 && !strcmp(av[1], "-s")) { if (fork() == 0) server_main(ac, av); exit(0); } else { client_main(ac, av); } return(0); }
/* * tcp_lib.c - routines for managing TCP connections. * * Positive port/program numbers are RPC ports, negative ones are TCP ports. * * Copyright (c) 1994-1996 Larry McVoy. */ #define _LIB /* bench.h needs this */ #include "bench.h" /* * Get a TCP socket, bind it, figure out the port, * and advertise the port as program "prog". * * XXX - it would be nice if you could advertise ascii strings. */ int tcp_server(int prog, int rdwr) { int sock; struct sockaddr_in s; #ifdef LIBTCP_VERBOSE fprintf(stderr, "tcp_server(%u, %u)\n", prog, rdwr); #endif if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { perror("socket"); exit(1); } sock_optimize(sock, rdwr); bzero((void*)&s, sizeof(s)); s.sin_family = AF_INET; if (prog < 0) { s.sin_port = htons(-prog); } if (bind(sock, (struct sockaddr*)&s, sizeof(s)) < 0) { perror("bind"); exit(2); } if (listen(sock, 100) < 0) { perror("listen"); exit(4); } if (prog > 0) { #ifdef LIBTCP_VERBOSE fprintf(stderr, "Server port %d\n", sockport(sock)); #endif (void)pmap_unset((u_long)prog, (u_long)1); if (!pmap_set((u_long)prog, (u_long)1, (u_long)IPPROTO_TCP, (unsigned short)sockport(sock))) { perror("pmap_set"); exit(5); } } return (sock); } /* * Unadvertise the socket */ int tcp_done(int prog) { if (prog > 0) { pmap_unset((u_long)prog, (u_long)1); } return (0); } /* * Accept a connection and return it */ int tcp_accept(int sock, int rdwr) { struct sockaddr_in s; int newsock, namelen; namelen = sizeof(s); bzero((void*)&s, namelen); retry: if ((newsock = accept(sock, (struct sockaddr*)&s, &namelen)) < 0) { if (errno == EINTR) goto retry; perror("accept"); exit(6); } #ifdef LIBTCP_VERBOSE fprintf(stderr, "Server newsock port %d\n", sockport(newsock)); #endif sock_optimize(newsock, rdwr); return (newsock); } /* * Connect to the TCP socket advertised as "prog" on "host" and * return the connected socket. * * Hacked Thu Oct 27 1994 to cache pmap_getport calls. This saves * about 4000 usecs in loopback lat_connect calls. I suppose we * should time gethostbyname() & pmap_getprot(), huh? */ int tcp_connect(char *host, int prog, int rdwr) { static struct hostent *h; static struct sockaddr_in s; static u_short save_port; static u_long save_prog; static char *save_host; int sock; static int tries = 0; if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { perror("socket"); exit(1); } if (rdwr & SOCKOPT_PID) { static unsigned short port; struct sockaddr_in sin; if (!port) { port = (unsigned short)(getpid() << 4); if (port < 1024) { port += 1024; } } do { port++; bzero((void*)&sin, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_port = htons(port); } while (bind(sock, (struct sockaddr*)&sin, sizeof(sin)) == -1); } #ifdef LIBTCP_VERBOSE else { struct sockaddr_in sin; bzero((void*)&sin, sizeof(sin)); sin.sin_family = AF_INET; if (bind(sock, (struct sockaddr*)&sin, sizeof(sin)) < 0) { perror("bind"); exit(2); } } fprintf(stderr, "Client port %d\n", sockport(sock)); #endif sock_optimize(sock, rdwr); if (!h || host != save_host || prog != save_prog) { save_host = host; /* XXX - counting on them not * changing it - benchmark only. */ save_prog = prog; if (!(h = gethostbyname(host))) { perror(host); exit(2); } bzero((void *) &s, sizeof(s)); s.sin_family = AF_INET; bcopy((void*)h->h_addr, (void *)&s.sin_addr, h->h_length); if (prog > 0) { save_port = pmap_getport(&s, prog, (u_long)1, IPPROTO_TCP); if (!save_port) { perror("lib TCP: No port found"); exit(3); } #ifdef LIBTCP_VERBOSE fprintf(stderr, "Server port %d\n", save_port); #endif s.sin_port = htons(save_port); } else { s.sin_port = htons(-prog); } } if (connect(sock, (struct sockaddr*)&s, sizeof(s)) < 0) { if (errno == ECONNRESET || errno == ECONNREFUSED) { close(sock); if (++tries > 10) return(-1); return (tcp_connect(host, prog, rdwr)); } perror("connect"); exit(4); } tries = 0; return (sock); } #define LIBTCP_VERBOSE void sock_optimize(int sock, int flags) { return; if (flags & SOCKOPT_READ) { int sockbuf = SOCKBUF; while (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &sockbuf, sizeof(int))) { sockbuf >>= 1; } #ifdef LIBTCP_VERBOSE fprintf(stderr, "sockopt %d: RCV: %dK\n", sock, sockbuf>>10); #endif } if (flags & SOCKOPT_WRITE) { int sockbuf = SOCKBUF; while (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sockbuf, sizeof(int))) { sockbuf >>= 1; } #ifdef LIBTCP_VERBOSE fprintf(stderr, "sockopt %d: SND: %dK\n", sock, sockbuf>>10); #endif } if (flags & SOCKOPT_REUSE) { int val = 1; if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) == -1) { perror("SO_REUSEADDR"); } } } int sockport(int s) { int namelen; struct sockaddr_in sin; namelen = sizeof(sin); if (getsockname(s, (struct sockaddr *)&sin, &namelen) < 0) { perror("getsockname"); return(-1); } return ((int)ntohs(sin.sin_port)); }