Eureka!

Nadav Har'El wrote:

> On Mon, Mar 18, 2002, Malcolm Kavalsky wrote about "Re: pthreads 
> question":
>
>> I asked one of the top Unix hackers that I know, and he said:
>>
>> "I would guess that if you do large af_unix transfers that are page
>> aligned then the system doesn't have to actually copy the data rather it
>> can share the page and do a copy on write. This preserves the socket
>> semantics and can be faster than memcpy. This was done many years ago in
>> Solaris."
>>
>> I wonder if digging deep enough in the kernel sources, will reveal 
>> this ...
>
>
> You can try to check if this is the case, by following each send or 
> memcpy
> by a memset() of the buffer. If the memcpy method suddenly becomes 
> quicker,
> this explanation might be true.
> Strange though - how come malloc() returns page-aligned buffers? Does the
> Linux code really checks for this rare and rather esoteric case (if you
> write to the buffer after sending it, and the kernel can't know you're
> writing whole pages, it will have to do a copy-on- write and do the copy
> anyway).
>
This is exactly what happened! I added in memset after memcpy, and also 
after sending
the buffer, the results are:

Memcpy'ed and memsetted 1000 blocks of size 1048576 in 18 seconds => 55 
Mbytes/second

Started receiving at Mon Mar 18 13:41:13 2002
Received 1048576000 bytes in 17 seconds over unix socket =>   59 
Mbytes/second

Started sending at Mon Mar 18 13:41:13 2002
Sent and memsetted 1000 blocks of size 1048576 in 17 seconds over unix 
socket => 58 Mbytes/second


(You notice that I also added printing exact time that send and receive 
started, to ensure no
delay between the two)

I also attach the source file for reference.


Malcolm


#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#define BUFSIZE 0x100000  /* 1 Megabyte */
#define NBLOCKS   1000
#define PORT_NAME    "/tmp/foo"

void server()
{
  struct sockaddr_un sin,from;
  int s,g,len,n;
  char *buf;
  float nbytes;
  time_t start_time, elapsed_time;
  
  buf = malloc( BUFSIZE );
  /* Create an unbound socket */
  if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
    printf( "Bad socket\n");
    return;
  }
  strcpy( sin.sun_path, PORT_NAME );
  sin.sun_family = PF_UNIX;
  if( bind( s, (struct sockaddr *)&sin, 
            strlen(sin.sun_path) + sizeof(sin.sun_family)) < 0){
    printf( "Bad bind\n");
    return;
  }
  listen( s, 5 );
  len = sizeof(from);
  g = accept( s, (struct sockaddr *)&from, &len );
  nbytes = read( g, buf, BUFSIZE );
  start_time = time(0);
  while( (n = read( g, buf, BUFSIZE )) > 0 ) {
    nbytes += n;
  }
  elapsed_time = time(0) - start_time;
  close(g);
  close(s);
  unlink( PORT_NAME );
  printf("\nStarted receiving at %s", ctime( &start_time ));
  printf( "Received %10.0f bytes in %d seconds over unix socket =>",
          nbytes, (int)elapsed_time );
  printf( " %4.0f Mbytes/second \n", nbytes / (0x100000 * elapsed_time) );
}

void client()
{
  struct sockaddr_un sin;
  int s;
  char *buf;
  time_t start_time, elapsed_time;
  int i;
  
  buf = malloc( BUFSIZE );
  
  if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
    printf( "Bad socket\n");
    return;
  }
  strcpy( sin.sun_path, PORT_NAME );
  sin.sun_family = PF_UNIX;
  if( connect( s, (struct sockaddr *)&sin, sizeof(sin)) < 0 ){
    printf("Bad connect\n");
    close(s);
    return;
  }

  start_time = time(0);
  for( i=0; i< NBLOCKS && write(s, buf, BUFSIZE) == BUFSIZE ; i++ ) {
    memset( buf, 'A', BUFSIZE );
  }
  elapsed_time = time(0) - start_time;
  close(s);
  printf("\nStarted sending at %s", ctime( &start_time ));
  printf( "Sent and memsetted %d blocks of size %d in %d seconds over unix socket =>",
          i, BUFSIZE, (int)elapsed_time );
  printf( " %d Mbytes/second \n", (NBLOCKS * BUFSIZE) / (0x100000 * (int)elapsed_time) 
);

}

void memcpy_benchmark()
{
  char *src, *dst;
  time_t start_time, elapsed_time;
  int i;

  src = malloc ( BUFSIZE );
  dst = malloc ( BUFSIZE );
  start_time = time(0);
  for( i=0; i< NBLOCKS; i++ ){
    memcpy( dst, src, BUFSIZE );
    memset( dst, 'A', BUFSIZE );
  }
  elapsed_time = time(0) - start_time;

  printf( "Memcpy'ed and memsetted %d blocks of size %d in %d seconds =>",
          NBLOCKS, BUFSIZE, (int)elapsed_time );
  printf( " %d Mbytes/second\n", (NBLOCKS * BUFSIZE) / (0x100000 * (int)elapsed_time) 
);
}

void socket_benchmark()
{
  int status;
  if ( fork() == 0 ) {
    server();
  } else {
    sleep(1); /* Dirty, but ensures client runs after server is ready */
    client();
  }
  wait(&status);
}

int main()
{
  memcpy_benchmark();
  socket_benchmark();
  return 0;
}

Reply via email to