Just finished a vblade-19 MPIO patch. So far it seems to work great
for throughput and disables an interface on poll() error. I'm not
sure how the change will be handle on the initiator - seems like aoe-
revalidate works ok. Hopefully somebody can give me some feedback.
diff -uprN vblade-19.orig/aoe.c vblade-19/aoe.c
--- vblade-19.orig/aoe.c 2008-10-08 21:07:40.000000000 +0000
+++ vblade-19/aoe.c 2009-02-26 04:39:02.000000000 +0000
@@ -9,6 +9,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <netinet/in.h>
+#include <poll.h>
#include "dat.h"
#include "fns.h"
@@ -22,11 +23,12 @@ int nmasks;
char config[Nconfig];
int nconfig = 0;
int maxscnt = 2;
-char *ifname;
+char **ifname;
+int ifname_count = 0;
int bufcnt = Bufcount;
void
-aoead(int fd) // advertise the virtual blade
+aoead(int fd, uchar *mac_n, char *ifname_n) // advertise the virtual
blade
{
uchar buf[2000];
Conf *p;
@@ -35,14 +37,14 @@ aoead(int fd) // advertise the virtual b
p = (Conf *)buf;
memset(p, 0, sizeof *p);
memset(p->h.dst, 0xff, 6);
- memmove(p->h.src, mac, 6);
+ memmove(p->h.src, mac_n, 6);
p->h.type = htons(0x88a2);
p->h.flags = Resp;
p->h.maj = htons(shelf);
p->h.min = slot;
p->h.cmd = Config;
p->bufcnt = htons(bufcnt);
- p->scnt = maxscnt = (getmtu(sfd, ifname) - sizeof (Ata)) / 512;
+ p->scnt = maxscnt = (getmtu(fd, ifname_n) - sizeof (Ata)) / 512;
p->firmware = htons(FWV);
p->vercmd = 0x10 | Qread;
memcpy(p->data, config, nconfig);
@@ -111,7 +113,7 @@ aoeata(Ata *p, int pktlen) // do ATA req
// yes, this makes unnecessary copies.
int
-confcmd(Conf *p, int payload) // process conf request
+confcmd(Conf *p, int payload, int fd, uchar *mac_n, char
*ifname_n) // process conf request
{
int len;
@@ -151,14 +153,14 @@ confcmd(Conf *p, int payload) // process
memmove(p->data, config, nconfig);
p->len = htons(nconfig);
p->bufcnt = htons(bufcnt);
- p->scnt = maxscnt = (getmtu(sfd, ifname) - sizeof (Ata)) / 512;
+ p->scnt = maxscnt = (getmtu(fd, ifname_n) - sizeof (Ata)) / 512;
p->firmware = htons(FWV);
p->vercmd = 0x10 | QCMD(p); // aoe v.1
return nconfig + sizeof *p - sizeof p->data;
}
void
-doaoe(Aoehdr *p, int n)
+doaoe(Aoehdr *p, int n, int fd, uchar *mac_n, char *ifname_n)
{
int len;
enum { // config query header size
@@ -174,7 +176,7 @@ doaoe(Aoehdr *p, int n)
case Config:
if (n < CHDR_SIZ)
return;
- len = confcmd((Conf *)p, n - CHDR_SIZ);
+ len = confcmd((Conf *)p, n - CHDR_SIZ, fd, mac_n, ifname_n);
if (len == 0)
return;
break;
@@ -184,11 +186,11 @@ doaoe(Aoehdr *p, int n)
break;
}
memmove(p->dst, p->src, 6);
- memmove(p->src, mac, 6);
+ memmove(p->src, mac_n, 6);
p->maj = htons(shelf);
p->min = slot;
p->flags |= Resp;
- if (putpkt(sfd, (uchar *) p, len) == -1) {
+ if (putpkt(fd, (uchar *) p, len) == -1) {
perror("write to network");
exit(1);
}
@@ -199,10 +201,14 @@ aoe(void)
{
Aoehdr *p;
uchar *buf;
- int n, sh;
+ int i, n, sh;
+ int ifname_c = ifname_count;
+ int ifname_good = ifname_count;
long pagesz;
enum { bufsz = 1<<16, };
-
+ struct pollfd sfds[4];
+
+ memset(&sfds, 0, sizeof(sfds));
if ((pagesz = sysconf(_SC_PAGESIZE)) < 0) {
perror("sysconf");
exit(1);
@@ -215,36 +221,67 @@ aoe(void)
if (n & (pagesz - 1))
buf += pagesz - (n & (pagesz - 1));
- aoead(sfd);
+ for(n = 0; n < ifname_c; n++) {
+ aoead(sfd[n], mac[n], ifname[n]);
+ sfds[n].fd = sfd[n];
+ sfds[n].events = POLLIN;
+ }
for (;;) {
- n = getpkt(sfd, buf, bufsz);
- if (n < 0) {
- perror("read network");
- exit(1);
+ if(poll(sfds, ifname_c, -1) < 1) {
+ perror("poll");
+ return;
+ }
+ for(i = 0; i < ifname_c; i++) {
+ if(sfds[i].revents & POLLIN) {
+ n = getpkt(sfds[i].fd, buf, bufsz);
+ if (n < 0) {
+ perror("read network");
+ exit(1);
+ }
+ if (n < sizeof(Aoehdr))
+ continue;
+ p = (Aoehdr *) buf;
+ if (ntohs(p->type) != 0x88a2)
+ continue;
+ if (p->flags & Resp)
+ continue;
+ sh = ntohs(p->maj);
+ if (sh != shelf && sh != (ushort)~0)
+ continue;
+ if (p->min != slot && p->min != (uchar)~0)
+ continue;
+ if (nmasks && !maskok(p->src))
+ continue;
+ doaoe(p, n, sfds[i].fd, mac[i], ifname[i]);
+
+ } else if (sfds[i].revents & POLLRDHUP ||
sfds[i].revents &
POLLERR || sfds[i].revents & POLLNVAL) {
+
+ if(ifname_good-- < 1) {
+ fprintf(stderr, "exiting, no good
interfaces left.\n");
+ fflush(stderr);
+ exit(1);
+ }
+ fprintf(stderr, "disabling interface %s because
of poll() error.
%d good interfaces left.\n", ifname[i], ifname_good);
+ sfds[i].revents = 0;
+ sfds[i].events = 0;
+ close(sfds[i].fd);
+ sfds[i].fd = -1;
+
+ /* seems like readvertising the blade works
best */
+ /*for(n = 0; n < ifname_c; n++) {
+ if(i != n)
+ aoead(sfd[n], mac[n],
ifname[n]);
+ }*/
+ }
}
- if (n < sizeof(Aoehdr))
- continue;
- p = (Aoehdr *) buf;
- if (ntohs(p->type) != 0x88a2)
- continue;
- if (p->flags & Resp)
- continue;
- sh = ntohs(p->maj);
- if (sh != shelf && sh != (ushort)~0)
- continue;
- if (p->min != slot && p->min != (uchar)~0)
- continue;
- if (nmasks && !maskok(p->src))
- continue;
- doaoe(p, n);
}
}
void
usage(void)
{
- fprintf(stderr, "usage: %s [-b bufcnt] [-d ] [-s] [-r] [ -m
mac[,mac...] ] shelf slot netif filename\n",
+ fprintf(stderr, "usage: %s [-b bufcnt] [-d ] [-s] [-r] [ -i iface]
[ -m mac[,mac...] ] shelf slot netif filename\n",
progname);
exit(1);
}
@@ -305,12 +342,17 @@ int
main(int argc, char **argv)
{
int ch, omode = 0, readonly = 0;
-
+ int i = 0;
+
bufcnt = Bufcount;
setbuf(stdin, NULL);
atainit();
progname = *argv;
- while ((ch = getopt(argc, argv, "b:dsrm:")) != -1) {
+ if((ifname = malloc(sizeof(*ifname)*1)) == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+ while ((ch = getopt(argc, argv, "b:dsrm:i:")) != -1) {
switch (ch) {
case 'b':
bufcnt = atoi(optarg);
@@ -329,6 +371,14 @@ main(int argc, char **argv)
case 'm':
setmask(optarg);
break;
+ case 'i':
+ ifname_count++;
+ if((ifname = realloc(ifname,
sizeof(*ifname)*(ifname_count+1))) ==
NULL) {
+ perror("malloc");
+ exit(1);
+ }
+ ifname[ifname_count-1] = optarg;
+ break;
case '?':
default:
usage();
@@ -348,9 +398,26 @@ main(int argc, char **argv)
slot = atoi(argv[1]);
size = getsize(bfd);
size /= 512;
- ifname = argv[2];
- sfd = dial(ifname, bufcnt);
- getea(sfd, ifname, mac);
+ ifname_count++;
+ ifname[ifname_count-1] = argv[2];
+ if((sfd = malloc(sizeof(int *)*ifname_count)) == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+ if((mac = malloc(sizeof(uchar *)*ifname_count)) == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+
+ for(; i < ifname_count; i++) {
+ if((mac[i] = malloc(sizeof(uchar)*6)) == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+
+ sfd[i] = dial(ifname[i], bufcnt);
+ getea(sfd[i], ifname[i], mac[i]);
+ }
printf("pid %ld: e%d.%d, %lld sectors %s\n",
(long) getpid(), shelf, slot, size,
readonly ? "O_RDONLY" : "O_RDWR");
diff -uprN vblade-19.orig/dat.h vblade-19/dat.h
--- vblade-19.orig/dat.h 2008-10-08 21:07:40.000000000 +0000
+++ vblade-19/dat.h 2009-02-26 03:39:11.000000000 +0000
@@ -115,8 +115,8 @@ enum {
int shelf, slot;
ulong aoetag;
-uchar mac[6];
+uchar **mac;
int bfd; // block file descriptor
-int sfd; // socket file descriptor
+int *sfd; // socket file descriptor
vlong size; // size of vblade
char *progname;
diff -uprN vblade-19.orig/fns.h vblade-19/fns.h
--- vblade-19.orig/fns.h 2008-10-08 21:07:41.000000000 +0000
+++ vblade-19/fns.h 2009-02-26 03:38:32.000000000 +0000
@@ -6,7 +6,7 @@ void aoe(void);
void aoeinit(void);
void aoequery(void);
void aoeconfig(void);
-void aoead(int);
+void aoead(int fd, uchar *, char *);
void aoeflush(int, int);
void aoetick(void);
void aoerequest(int, int, vlong, int, uchar *, int);
diff -uprN vblade-19.orig/vblade.8 vblade-19/vblade.8
--- vblade-19.orig/vblade.8 2008-10-08 21:07:41.000000000 +0000
+++ vblade-19/vblade.8 2009-02-26 04:23:48.000000000 +0000
@@ -55,6 +55,11 @@ The -r flag restricts the export of the
The -m flag takes an argument, a comma separated list of MAC addresses
permitted access to the vblade. A MAC address can be specified in
upper
or lower case, with or without colons.
+.TP
+\fB-i\fP
+The -i flag initializes and broadcasts on ethernet network interfaces
+to enable MPIO support and increase throughput. You must still
+specify another interface without using the -i flag.
.SH EXAMPLE
In this example, the root user on a host named
.I nai
On Feb 25, 2009, at 12:45 PM, Tracy Reed wrote:
> On Wed, Feb 25, 2009 at 03:36:36PM -0500, Ed Cashin spake thusly:
>> preserved reliably. Although it's true that there's only one page
>> cache, the buffers of each vblade process are independent.
>
> Ah. Good point. Any way to make the one vblade process listen on
> multiple interfacse? Would that solve this issue? Makine AoE work with
> MPIO (which comes with RHEL among others) is a nice way to add
> scalability and redundancy. Previously one would have to use 802.3ad
> (LACP) to accomplish this. Not having to do such tweaking in the
> switch is attractive.
>
> --
> Tracy Reed
> http://tracyreed.org
------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Aoetools-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/aoetools-discuss