At Thu, 31 Jul 2014 11:58:52 +0800, Liu Yuan wrote: > > On Wed, Jul 30, 2014 at 02:26:20PM +0800, Ruoyu wrote: > > Current epoch_log contains a long nodes array to sync nodes and > > epoch in the cluster. It is simple, but there is a potential > > performance issue because each epoch log occupies nearly 500 > > KBytes. If the cluster members change frequently, epoch is lifted > > frequently, more and more unused data is transfered between > > client and server. If we don't find a way, the performance will > > go from bad to worse. > > > > Although the max node number is 6144, we only use a few of them. > > Therefore, the first solution is using a zero-length array, > > client (dog) and server (sheep) will negotiate an appropriate > > supported node number. This way will spend much less memory and > > will run much faster than before. > > > > v3: > > - epoch_log_read series functions are changed to propagate error > > to upper layer. > > > > v2: > > - internal data structure is changed. > > You need put change log under Signed-off-by line > > > Signed-off-by: Ruoyu <[email protected]> > > --- > <-- Here we put change log for a single patch, otherwise put it in the > introduction patch (git format-patch --cover-letter xxx). > > > > > dog/cluster.c | 40 +++++++++++++++++++++++---------- > > dog/vdi.c | 37 +++++++++++++++++++++++-------- > > include/internal_proto.h | 2 +- > > include/sheepdog_proto.h | 1 + > > sheep/group.c | 57 > > +++++++++++++++++++++++++----------------------- > > sheep/ops.c | 55 > > +++++++++++++++++++++++++++------------------- > > sheep/sheep_priv.h | 10 +++++---- > > sheep/store.c | 25 ++++++++++++--------- > > 8 files changed, 143 insertions(+), 84 deletions(-) > > > > diff --git a/dog/cluster.c b/dog/cluster.c > > index 188d4f4..508b65a 100644 > > --- a/dog/cluster.c > > +++ b/dog/cluster.c > > @@ -141,14 +141,14 @@ static int cluster_format(int argc, char **argv) > > return EXIT_SUCCESS; > > } > > > > -static void print_nodes(const struct epoch_log *logs, int epoch) > > +static void print_nodes(const struct epoch_log *logs, uint16_t flags) > > { > > int i, nr_disk; > > const struct sd_node *entry; > > > > - for (i = 0; i < logs[epoch].nr_nodes; i++) { > > - entry = logs[epoch].nodes + i; > > - if (logs->flags & SD_CLUSTER_FLAG_DISKMODE) { > > + for (i = 0; i < logs->nr_nodes; i++) { > > + entry = logs->nodes + i; > > + if (flags & SD_CLUSTER_FLAG_DISKMODE) { > > for (nr_disk = 0; nr_disk < DISK_MAX; nr_disk++) { > > if (entry->disks[nr_disk].disk_id == 0) > > break; > > @@ -169,21 +169,35 @@ static int cluster_info(int argc, char **argv) > > int i, ret; > > struct sd_req hdr; > > struct sd_rsp *rsp = (struct sd_rsp *)&hdr; > > - struct epoch_log *logs; > > + struct epoch_log *logs, *log; > > + char *next_log; > > int nr_logs, log_length; > > time_t ti, ct; > > struct tm tm; > > char time_str[128]; > > + uint32_t support_nodes; > > > > - log_length = sd_epoch * sizeof(struct epoch_log); > > +#define DEFAULT_SUPPORT_NODES 32 > > It is used by mutiple files, so define it in .h file. By the way, why not use > sd_nodes_nr directly? > > > + support_nodes = DEFAULT_SUPPORT_NODES; > > + log_length = sd_epoch * (sizeof(struct epoch_log) > > + + support_nodes * sizeof(struct sd_node)); > > logs = xmalloc(log_length); > > > > +retry: > > sd_init_req(&hdr, SD_OP_STAT_CLUSTER); > > hdr.data_length = log_length; > > + hdr.cluster.support_nodes = support_nodes; > > > > ret = dog_exec_req(&sd_nid, &hdr, logs); > > if (ret < 0) > > goto error; > > + if (rsp->result == SD_RES_BUFFER_SMALL) { > > + support_nodes *= 2; > > + log_length = sd_epoch * (sizeof(struct epoch_log) > > + + support_nodes * sizeof(struct sd_node)); > > + logs = xrealloc(logs, log_length); > > + goto retry; > > + } > > > > /* show cluster status */ > > if (!raw_output) > > @@ -230,10 +244,12 @@ static int cluster_info(int argc, char **argv) > > printf("Epoch Time Version\n"); > > } > > > > - nr_logs = rsp->data_length / sizeof(struct epoch_log); > > + nr_logs = rsp->data_length / (sizeof(struct epoch_log) > > + + support_nodes * sizeof(struct sd_node)); > > + next_log = (char *)logs; > > for (i = 0; i < nr_logs; i++) { > > - > > - ti = logs[i].time; > > + log = (struct epoch_log *)next_log; > > + ti = log->time; > > if (raw_output) { > > snprintf(time_str, sizeof(time_str), "%" PRIu64, > > (uint64_t) ti); > > } else { > > @@ -241,10 +257,12 @@ static int cluster_info(int argc, char **argv) > > strftime(time_str, sizeof(time_str), "%Y-%m-%d > > %H:%M:%S", &tm); > > } > > > > - printf(raw_output ? "%s %d" : "%s %6d", time_str, > > logs[i].epoch); > > + printf(raw_output ? "%s %d" : "%s %6d", time_str, log->epoch); > > printf(" ["); > > - print_nodes(logs, i); > > + print_nodes(log, logs->flags); > > printf("]\n"); > > + next_log = (char *)log->nodes > > + + support_nodes * sizeof(struct sd_node); > > } > > > > free(logs); > > diff --git a/dog/vdi.c b/dog/vdi.c > > index 2e3f7b3..6f0b748 100644 > > --- a/dog/vdi.c > > +++ b/dog/vdi.c > > @@ -1096,47 +1096,64 @@ static int do_track_object(uint64_t oid, uint8_t > > nr_copies) > > struct sd_req hdr; > > struct sd_rsp *rsp = (struct sd_rsp *)&hdr; > > const struct sd_vnode *vnode_buf[SD_MAX_COPIES]; > > - struct epoch_log *logs; > > + struct epoch_log *logs, *log; > > + char *next_log; > > int nr_logs, log_length; > > + uint32_t support_nodes; > > > > - log_length = sd_epoch * sizeof(struct epoch_log); > > +#define DEFAULT_SUPPORT_NODES 32 > > + support_nodes = DEFAULT_SUPPORT_NODES; > > + log_length = sd_epoch * (sizeof(struct epoch_log) > > + + support_nodes * sizeof(struct sd_node)); > > logs = xmalloc(log_length); > > > > +retry: > > sd_init_req(&hdr, SD_OP_STAT_CLUSTER); > > hdr.data_length = log_length; > > + hdr.cluster.support_nodes = support_nodes; > > > > ret = dog_exec_req(&sd_nid, &hdr, logs); > > if (ret < 0) > > goto error; > > > > + if (rsp->result == SD_RES_BUFFER_SMALL) { > > + support_nodes *= 2; > > + log_length = sd_epoch * (sizeof(struct epoch_log) > > + + support_nodes * sizeof(struct sd_node)); > > + logs = xrealloc(logs, log_length); > > + goto retry; > > + } > > if (rsp->result != SD_RES_SUCCESS) { > > printf("%s\n", sd_strerror(rsp->result)); > > goto error; > > } > > > > - nr_logs = rsp->data_length / sizeof(struct epoch_log); > > + nr_logs = rsp->data_length / (sizeof(struct epoch_log) > > + + support_nodes * sizeof(struct sd_node)); > > + next_log = (char *)logs; > > for (i = nr_logs - 1; i >= 0; i--) { > > struct rb_root vroot = RB_ROOT; > > struct rb_root nroot = RB_ROOT; > > > > + log = (struct epoch_log *)next_log; > > printf("\nobj %"PRIx64" locations at epoch %d, copies = %d\n", > > - oid, logs[i].epoch, nr_copies); > > + oid, log->epoch, nr_copies); > > printf("---------------------------------------------------\n"); > > > > /* > > * When # of nodes is less than nr_copies, we only print > > * remaining nodes that holds all the remaining copies. > > */ > > - if (logs[i].nr_nodes < nr_copies) { > > - for (j = 0; j < logs[i].nr_nodes; j++) { > > - const struct node_id *n = &logs[i].nodes[j].nid; > > + if (log->nr_nodes < nr_copies) { > > + for (j = 0; j < log->nr_nodes; j++) { > > + const struct node_id *n = &log->nodes[j].nid; > > > > printf("%s\n", addr_to_str(n->addr, n->port)); > > } > > continue; > > } > > - for (int k = 0; k < logs[i].nr_nodes; k++) > > - rb_insert(&nroot, &logs[i].nodes[k], rb, node_cmp); > > + for (int k = 0; k < log->nr_nodes; k++) > > + rb_insert(&nroot, &log->nodes[k], rb, node_cmp); > > if (logs->flags & SD_CLUSTER_FLAG_DISKMODE) > > disks_to_vnodes(&nroot, &vroot); > > else > > @@ -1148,6 +1165,8 @@ static int do_track_object(uint64_t oid, uint8_t > > nr_copies) > > printf("%s\n", addr_to_str(n->addr, n->port)); > > } > > rb_destroy(&vroot, struct sd_vnode, rb); > > + next_log = (char *)log->nodes > > + + support_nodes * sizeof(struct sd_node); > > } > > > > free(logs); > > diff --git a/include/internal_proto.h b/include/internal_proto.h > > index 37afb46..d61b5a5 100644 > > --- a/include/internal_proto.h > > +++ b/include/internal_proto.h > > @@ -221,7 +221,7 @@ struct epoch_log { > > uint8_t __pad[3]; > > uint16_t flags; > > char drv_name[STORE_LEN]; > > - struct sd_node nodes[SD_MAX_NODES]; > > + struct sd_node nodes[0]; > > }; > > > > struct vdi_op_message { > > diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h > > index 7cfdccb..8b26a8b 100644 > > --- a/include/sheepdog_proto.h > > +++ b/include/sheepdog_proto.h > > @@ -165,6 +165,7 @@ struct sd_req { > > uint8_t copy_policy; > > uint16_t flags; > > uint32_t tag; > > + uint32_t support_nodes; > > support_nodes is not good. nodes_nr is simpler.
These sort of style problems can be fixed later easily. I prefer apply first and fix later than increasing turn around of review. Frequent commits can reduce possibility of rebasing and make development speedy. Thanks, Hitoshi -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
