Just finished a vblade-19 MPIO patch.  So far it seems to work great  
for throughput and disables an interface on poll() error.  I'm not  
sure how the change will be handle on the initiator - seems like aoe- 
revalidate works ok.  Hopefully somebody can give me some feedback.


diff -uprN vblade-19.orig/aoe.c vblade-19/aoe.c
--- vblade-19.orig/aoe.c        2008-10-08 21:07:40.000000000 +0000
+++ vblade-19/aoe.c     2009-02-26 04:39:02.000000000 +0000
@@ -9,6 +9,7 @@
  #include <sys/stat.h>
  #include <fcntl.h>
  #include <netinet/in.h>
+#include <poll.h>
  #include "dat.h"
  #include "fns.h"

@@ -22,11 +23,12 @@ int nmasks;
  char config[Nconfig];
  int nconfig = 0;
  int maxscnt = 2;
-char *ifname;
+char **ifname;
+int ifname_count = 0;
  int bufcnt = Bufcount;

  void
-aoead(int fd)  // advertise the virtual blade
+aoead(int fd, uchar *mac_n, char *ifname_n)    // advertise the virtual  
blade
  {
        uchar buf[2000];
        Conf *p;
@@ -35,14 +37,14 @@ aoead(int fd)       // advertise the virtual b
        p = (Conf *)buf;
        memset(p, 0, sizeof *p);
        memset(p->h.dst, 0xff, 6);
-       memmove(p->h.src, mac, 6);
+       memmove(p->h.src, mac_n, 6);
        p->h.type = htons(0x88a2);
        p->h.flags = Resp;
        p->h.maj = htons(shelf);
        p->h.min = slot;
        p->h.cmd = Config;
        p->bufcnt = htons(bufcnt);
-       p->scnt = maxscnt = (getmtu(sfd, ifname) - sizeof (Ata)) / 512;
+       p->scnt = maxscnt = (getmtu(fd, ifname_n) - sizeof (Ata)) / 512;
        p->firmware = htons(FWV);
        p->vercmd = 0x10 | Qread;
        memcpy(p->data, config, nconfig);
@@ -111,7 +113,7 @@ aoeata(Ata *p, int pktlen)  // do ATA req
  // yes, this makes unnecessary copies.

  int
-confcmd(Conf *p, int payload)  // process conf request
+confcmd(Conf *p, int payload, int fd, uchar *mac_n, char  
*ifname_n)      // process conf request
  {
        int len;

@@ -151,14 +153,14 @@ confcmd(Conf *p, int payload)     // process
        memmove(p->data, config, nconfig);
        p->len = htons(nconfig);
        p->bufcnt = htons(bufcnt);
-       p->scnt = maxscnt = (getmtu(sfd, ifname) - sizeof (Ata)) / 512;
+       p->scnt = maxscnt = (getmtu(fd, ifname_n) - sizeof (Ata)) / 512;
        p->firmware = htons(FWV);
        p->vercmd = 0x10 | QCMD(p);     // aoe v.1
        return nconfig + sizeof *p - sizeof p->data;
  }

  void
-doaoe(Aoehdr *p, int n)
+doaoe(Aoehdr *p, int n, int fd, uchar *mac_n, char *ifname_n)
  {
        int len;
        enum {  // config query header size
@@ -174,7 +176,7 @@ doaoe(Aoehdr *p, int n)
        case Config:
                if (n < CHDR_SIZ)
                        return;
-               len = confcmd((Conf *)p, n - CHDR_SIZ);
+               len = confcmd((Conf *)p, n - CHDR_SIZ, fd, mac_n, ifname_n);
                if (len == 0)
                        return;
                break;
@@ -184,11 +186,11 @@ doaoe(Aoehdr *p, int n)
                break;
        }
        memmove(p->dst, p->src, 6);
-       memmove(p->src, mac, 6);
+       memmove(p->src, mac_n, 6);
        p->maj = htons(shelf);
        p->min = slot;
        p->flags |= Resp;
-       if (putpkt(sfd, (uchar *) p, len) == -1) {
+       if (putpkt(fd, (uchar *) p, len) == -1) {
                perror("write to network");
                exit(1);
        }
@@ -199,10 +201,14 @@ aoe(void)
  {
        Aoehdr *p;
        uchar *buf;
-       int n, sh;
+       int i, n, sh;
+       int ifname_c = ifname_count;
+       int ifname_good = ifname_count;
        long pagesz;
        enum { bufsz = 1<<16, };
-
+       struct pollfd sfds[4];
+       
+       memset(&sfds, 0, sizeof(sfds));
        if ((pagesz = sysconf(_SC_PAGESIZE)) < 0) {
                perror("sysconf");
                exit(1);
@@ -215,36 +221,67 @@ aoe(void)
        if (n & (pagesz - 1))
                buf += pagesz - (n & (pagesz - 1));

-       aoead(sfd);
+       for(n = 0; n < ifname_c; n++) {
+               aoead(sfd[n], mac[n], ifname[n]);
+               sfds[n].fd = sfd[n];
+               sfds[n].events = POLLIN;
+       }

        for (;;) {
-               n = getpkt(sfd, buf, bufsz);
-               if (n < 0) {
-                       perror("read network");
-                       exit(1);
+               if(poll(sfds, ifname_c, -1) < 1) {
+                       perror("poll");
+                       return;
+               }
+               for(i = 0; i < ifname_c; i++) {
+                       if(sfds[i].revents & POLLIN) {
+                               n = getpkt(sfds[i].fd, buf, bufsz);
+                               if (n < 0) {
+                                       perror("read network");
+                                       exit(1);
+                               }
+                               if (n < sizeof(Aoehdr))
+                                       continue;
+                               p = (Aoehdr *) buf;
+                               if (ntohs(p->type) != 0x88a2)
+                                       continue;
+                               if (p->flags & Resp)
+                                       continue;
+                               sh = ntohs(p->maj);
+                               if (sh != shelf && sh != (ushort)~0)
+                                       continue;
+                               if (p->min != slot && p->min != (uchar)~0)
+                                       continue;
+                               if (nmasks && !maskok(p->src))
+                                       continue;
+                               doaoe(p, n, sfds[i].fd, mac[i], ifname[i]);
+                       
+                       } else if (sfds[i].revents & POLLRDHUP || 
sfds[i].revents &  
POLLERR || sfds[i].revents & POLLNVAL) {
+                               
+                               if(ifname_good-- < 1) {
+                                       fprintf(stderr, "exiting, no good 
interfaces left.\n");
+                                       fflush(stderr);
+                                       exit(1);
+                               }
+                               fprintf(stderr, "disabling interface %s because 
of poll() error.  
%d good interfaces left.\n", ifname[i], ifname_good);
+                               sfds[i].revents = 0;
+                               sfds[i].events = 0;
+                               close(sfds[i].fd);
+                               sfds[i].fd = -1;
+                               
+                               /* seems like readvertising the blade works 
best */
+                               /*for(n = 0; n < ifname_c; n++) {
+                                       if(i != n)
+                                               aoead(sfd[n], mac[n], 
ifname[n]);
+                               }*/
+                       }
                }
-               if (n < sizeof(Aoehdr))
-                       continue;
-               p = (Aoehdr *) buf;
-               if (ntohs(p->type) != 0x88a2)
-                       continue;
-               if (p->flags & Resp)
-                       continue;
-               sh = ntohs(p->maj);
-               if (sh != shelf && sh != (ushort)~0)
-                       continue;
-               if (p->min != slot && p->min != (uchar)~0)
-                       continue;
-               if (nmasks && !maskok(p->src))
-                       continue;
-               doaoe(p, n);
        }
  }

  void
  usage(void)
  {
-       fprintf(stderr, "usage: %s [-b bufcnt] [-d ] [-s] [-r] [ -m  
mac[,mac...] ] shelf slot netif filename\n",
+       fprintf(stderr, "usage: %s [-b bufcnt] [-d ] [-s] [-r] [ -i iface]  
[ -m mac[,mac...] ] shelf slot netif filename\n",
                progname);
        exit(1);
  }
@@ -305,12 +342,17 @@ int
  main(int argc, char **argv)
  {
        int ch, omode = 0, readonly = 0;
-
+       int i = 0;
+       
        bufcnt = Bufcount;
        setbuf(stdin, NULL);
        atainit();
        progname = *argv;
-       while ((ch = getopt(argc, argv, "b:dsrm:")) != -1) {
+       if((ifname = malloc(sizeof(*ifname)*1)) == NULL) {
+               perror("malloc");
+               exit(1);
+       }
+       while ((ch = getopt(argc, argv, "b:dsrm:i:")) != -1) {
                switch (ch) {
                case 'b':
                        bufcnt = atoi(optarg);
@@ -329,6 +371,14 @@ main(int argc, char **argv)
                case 'm':
                        setmask(optarg);
                        break;
+               case 'i':
+                       ifname_count++;
+                       if((ifname = realloc(ifname, 
sizeof(*ifname)*(ifname_count+1))) ==  
NULL) {
+                               perror("malloc");
+                               exit(1);
+                       }                       
+                       ifname[ifname_count-1] = optarg;
+                       break;
                case '?':
                default:
                        usage();
@@ -348,9 +398,26 @@ main(int argc, char **argv)
        slot = atoi(argv[1]);
        size = getsize(bfd);
        size /= 512;
-       ifname = argv[2];
-       sfd = dial(ifname, bufcnt);
-       getea(sfd, ifname, mac);
+       ifname_count++;
+       ifname[ifname_count-1] = argv[2];
+       if((sfd = malloc(sizeof(int *)*ifname_count)) == NULL) {
+               perror("malloc");
+               exit(1);
+       }
+       if((mac = malloc(sizeof(uchar *)*ifname_count)) == NULL) {
+               perror("malloc");
+               exit(1);
+       }
+
+       for(; i < ifname_count; i++) {
+               if((mac[i] = malloc(sizeof(uchar)*6)) == NULL) {
+                       perror("malloc");
+                       exit(1);
+               }
+
+               sfd[i] = dial(ifname[i], bufcnt);
+               getea(sfd[i], ifname[i], mac[i]);
+       }
        printf("pid %ld: e%d.%d, %lld sectors %s\n",
                (long) getpid(), shelf, slot, size,
                readonly ? "O_RDONLY" : "O_RDWR");
diff -uprN vblade-19.orig/dat.h vblade-19/dat.h
--- vblade-19.orig/dat.h        2008-10-08 21:07:40.000000000 +0000
+++ vblade-19/dat.h     2009-02-26 03:39:11.000000000 +0000
@@ -115,8 +115,8 @@ enum {

  int   shelf, slot;
  ulong aoetag;
-uchar  mac[6];
+uchar  **mac;
  int   bfd;            // block file descriptor
-int    sfd;            // socket file descriptor
+int    *sfd;           // socket file descriptor
  vlong size;           // size of vblade
  char  *progname;
diff -uprN vblade-19.orig/fns.h vblade-19/fns.h
--- vblade-19.orig/fns.h        2008-10-08 21:07:41.000000000 +0000
+++ vblade-19/fns.h     2009-02-26 03:38:32.000000000 +0000
@@ -6,7 +6,7 @@ void    aoe(void);
  void  aoeinit(void);
  void  aoequery(void);
  void  aoeconfig(void);
-void   aoead(int);
+void   aoead(int fd, uchar *, char *);
  void  aoeflush(int, int);
  void  aoetick(void);
  void  aoerequest(int, int, vlong, int, uchar *, int);
diff -uprN vblade-19.orig/vblade.8 vblade-19/vblade.8
--- vblade-19.orig/vblade.8     2008-10-08 21:07:41.000000000 +0000
+++ vblade-19/vblade.8  2009-02-26 04:23:48.000000000 +0000
@@ -55,6 +55,11 @@ The -r flag restricts the export of the
  The -m flag takes an argument, a comma separated list of MAC addresses
  permitted access to the vblade.  A MAC address can be specified in  
upper
  or lower case, with or without colons.
+.TP
+\fB-i\fP
+The -i flag initializes and broadcasts on ethernet network interfaces
+to enable MPIO support and increase throughput.  You must still
+specify another interface without using the -i flag.
  .SH EXAMPLE
  In this example, the root user on a host named
  .I nai

On Feb 25, 2009, at 12:45 PM, Tracy Reed wrote:

> On Wed, Feb 25, 2009 at 03:36:36PM -0500, Ed Cashin spake thusly:
>> preserved reliably.  Although it's true that there's only one page
>> cache, the buffers of each vblade process are independent.
>
> Ah. Good point. Any way to make the one vblade process listen on
> multiple interfacse? Would that solve this issue? Makine AoE work with
> MPIO (which comes with RHEL among others) is a nice way to add
> scalability and redundancy. Previously one would have to use 802.3ad
> (LACP) to accomplish this. Not having to do such tweaking in the
> switch is attractive.
>
> -- 
> Tracy Reed
> http://tracyreed.org


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Aoetools-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/aoetools-discuss

Reply via email to