At Thu, 31 Jul 2014 11:58:52 +0800,
Liu Yuan wrote:
> 
> On Wed, Jul 30, 2014 at 02:26:20PM +0800, Ruoyu wrote:
> > Current epoch_log contains a long nodes array to sync nodes and
> > epoch in the cluster. It is simple, but there is a potential
> > performance issue because each epoch log occupies nearly 500
> > KBytes. If the cluster members change frequently, epoch is lifted
> > frequently, more and more unused data is transfered between
> > client and server. If we don't find a way, the performance will
> > go from bad to worse.
> > 
> > Although the max node number is 6144, we only use a few of them.
> > Therefore, the first solution is using a zero-length array,
> > client (dog) and server (sheep) will negotiate an appropriate
> > supported node number. This way will spend much less memory and
> > will run much faster than before.
> > 
> > v3:
> >  - epoch_log_read series functions are changed to propagate error
> >    to upper layer.
> > 
> > v2:
> >  - internal data structure is changed.
> 
> You need put change log under Signed-off-by line
> 
> > Signed-off-by: Ruoyu <[email protected]>
> > --- 
> <-- Here we put change log for a single patch, otherwise put it in the
> introduction patch (git format-patch --cover-letter xxx).
> 
> >
> >  dog/cluster.c            | 40 +++++++++++++++++++++++----------
> >  dog/vdi.c                | 37 +++++++++++++++++++++++--------
> >  include/internal_proto.h |  2 +-
> >  include/sheepdog_proto.h |  1 +
> >  sheep/group.c            | 57 
> > +++++++++++++++++++++++++-----------------------
> >  sheep/ops.c              | 55 
> > +++++++++++++++++++++++++++-------------------
> >  sheep/sheep_priv.h       | 10 +++++----
> >  sheep/store.c            | 25 ++++++++++++---------
> >  8 files changed, 143 insertions(+), 84 deletions(-)
> > 
> > diff --git a/dog/cluster.c b/dog/cluster.c
> > index 188d4f4..508b65a 100644
> > --- a/dog/cluster.c
> > +++ b/dog/cluster.c
> > @@ -141,14 +141,14 @@ static int cluster_format(int argc, char **argv)
> >     return EXIT_SUCCESS;
> >  }
> >  
> > -static void print_nodes(const struct epoch_log *logs, int epoch)
> > +static void print_nodes(const struct epoch_log *logs, uint16_t flags)
> >  {
> >     int i, nr_disk;
> >     const struct sd_node *entry;
> >  
> > -   for (i = 0; i < logs[epoch].nr_nodes; i++) {
> > -           entry = logs[epoch].nodes + i;
> > -           if (logs->flags & SD_CLUSTER_FLAG_DISKMODE) {
> > +   for (i = 0; i < logs->nr_nodes; i++) {
> > +           entry = logs->nodes + i;
> > +           if (flags & SD_CLUSTER_FLAG_DISKMODE) {
> >                     for (nr_disk = 0; nr_disk < DISK_MAX; nr_disk++) {
> >                             if (entry->disks[nr_disk].disk_id == 0)
> >                                     break;
> > @@ -169,21 +169,35 @@ static int cluster_info(int argc, char **argv)
> >     int i, ret;
> >     struct sd_req hdr;
> >     struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> > -   struct epoch_log *logs;
> > +   struct epoch_log *logs, *log;
> > +   char *next_log;
> >     int nr_logs, log_length;
> >     time_t ti, ct;
> >     struct tm tm;
> >     char time_str[128];
> > +   uint32_t support_nodes;
> >  
> > -   log_length = sd_epoch * sizeof(struct epoch_log);
> > +#define DEFAULT_SUPPORT_NODES 32
> 
> It is used by mutiple files, so define it in .h file. By the way, why not use
> sd_nodes_nr directly?
> 
> > +   support_nodes = DEFAULT_SUPPORT_NODES;
> > +   log_length = sd_epoch * (sizeof(struct epoch_log)
> > +                   + support_nodes * sizeof(struct sd_node));
> >     logs = xmalloc(log_length);
> >  
> > +retry:
> >     sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
> >     hdr.data_length = log_length;
> > +   hdr.cluster.support_nodes = support_nodes;
> >  
> >     ret = dog_exec_req(&sd_nid, &hdr, logs);
> >     if (ret < 0)
> >             goto error;
> > +   if (rsp->result == SD_RES_BUFFER_SMALL) {
> > +           support_nodes *= 2;
> > +           log_length = sd_epoch * (sizeof(struct epoch_log)
> > +                           + support_nodes * sizeof(struct sd_node));
> > +           logs = xrealloc(logs, log_length);
> > +           goto retry;
> > +   }
> >  
> >     /* show cluster status */
> >     if (!raw_output)
> > @@ -230,10 +244,12 @@ static int cluster_info(int argc, char **argv)
> >             printf("Epoch Time           Version\n");
> >     }
> >  
> > -   nr_logs = rsp->data_length / sizeof(struct epoch_log);
> > +   nr_logs = rsp->data_length / (sizeof(struct epoch_log)
> > +                   + support_nodes * sizeof(struct sd_node));
> > +   next_log = (char *)logs;
> >     for (i = 0; i < nr_logs; i++) {
> > -
> > -           ti = logs[i].time;
> > +           log = (struct epoch_log *)next_log;
> > +           ti = log->time;
> >             if (raw_output) {
> >                     snprintf(time_str, sizeof(time_str), "%" PRIu64, 
> > (uint64_t) ti);
> >             } else {
> > @@ -241,10 +257,12 @@ static int cluster_info(int argc, char **argv)
> >                     strftime(time_str, sizeof(time_str), "%Y-%m-%d 
> > %H:%M:%S", &tm);
> >             }
> >  
> > -           printf(raw_output ? "%s %d" : "%s %6d", time_str, 
> > logs[i].epoch);
> > +           printf(raw_output ? "%s %d" : "%s %6d", time_str, log->epoch);
> >             printf(" [");
> > -           print_nodes(logs, i);
> > +           print_nodes(log, logs->flags);
> >             printf("]\n");
> > +           next_log = (char *)log->nodes
> > +                           + support_nodes * sizeof(struct sd_node);
> >     }
> >  
> >     free(logs);
> > diff --git a/dog/vdi.c b/dog/vdi.c
> > index 2e3f7b3..6f0b748 100644
> > --- a/dog/vdi.c
> > +++ b/dog/vdi.c
> > @@ -1096,47 +1096,64 @@ static int do_track_object(uint64_t oid, uint8_t 
> > nr_copies)
> >     struct sd_req hdr;
> >     struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> >     const struct sd_vnode *vnode_buf[SD_MAX_COPIES];
> > -   struct epoch_log *logs;
> > +   struct epoch_log *logs, *log;
> > +   char *next_log;
> >     int nr_logs, log_length;
> > +   uint32_t support_nodes;
> >  
> > -   log_length = sd_epoch * sizeof(struct epoch_log);
> > +#define DEFAULT_SUPPORT_NODES 32
> > +   support_nodes = DEFAULT_SUPPORT_NODES;
> > +   log_length = sd_epoch * (sizeof(struct epoch_log)
> > +                   + support_nodes * sizeof(struct sd_node));
> >     logs = xmalloc(log_length);
> >  
> > +retry:
> >     sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
> >     hdr.data_length = log_length;
> > +   hdr.cluster.support_nodes = support_nodes;
> >  
> >     ret = dog_exec_req(&sd_nid, &hdr, logs);
> >     if (ret < 0)
> >             goto error;
> >  
> > +   if (rsp->result == SD_RES_BUFFER_SMALL) {
> > +           support_nodes *= 2;
> > +           log_length = sd_epoch * (sizeof(struct epoch_log)
> > +                           + support_nodes * sizeof(struct sd_node));
> > +           logs = xrealloc(logs, log_length);
> > +           goto retry;
> > +   }
> >     if (rsp->result != SD_RES_SUCCESS) {
> >             printf("%s\n", sd_strerror(rsp->result));
> >             goto error;
> >     }
> >  
> > -   nr_logs = rsp->data_length / sizeof(struct epoch_log);
> > +   nr_logs = rsp->data_length / (sizeof(struct epoch_log)
> > +                   + support_nodes * sizeof(struct sd_node));
> > +   next_log = (char *)logs;
> >     for (i = nr_logs - 1; i >= 0; i--) {
> >             struct rb_root vroot = RB_ROOT;
> >             struct rb_root nroot = RB_ROOT;
> >  
> > +           log = (struct epoch_log *)next_log;
> >             printf("\nobj %"PRIx64" locations at epoch %d, copies = %d\n",
> > -                  oid, logs[i].epoch, nr_copies);
> > +                  oid, log->epoch, nr_copies);
> >             printf("---------------------------------------------------\n");
> >  
> >             /*
> >              * When # of nodes is less than nr_copies, we only print
> >              * remaining nodes that holds all the remaining copies.
> >              */
> > -           if (logs[i].nr_nodes < nr_copies) {
> > -                   for (j = 0; j < logs[i].nr_nodes; j++) {
> > -                           const struct node_id *n = &logs[i].nodes[j].nid;
> > +           if (log->nr_nodes < nr_copies) {
> > +                   for (j = 0; j < log->nr_nodes; j++) {
> > +                           const struct node_id *n = &log->nodes[j].nid;
> >  
> >                             printf("%s\n", addr_to_str(n->addr, n->port));
> >                     }
> >                     continue;
> >             }
> > -           for (int k = 0; k < logs[i].nr_nodes; k++)
> > -                   rb_insert(&nroot, &logs[i].nodes[k], rb, node_cmp);
> > +           for (int k = 0; k < log->nr_nodes; k++)
> > +                   rb_insert(&nroot, &log->nodes[k], rb, node_cmp);
> >             if (logs->flags & SD_CLUSTER_FLAG_DISKMODE)
> >                     disks_to_vnodes(&nroot, &vroot);
> >             else
> > @@ -1148,6 +1165,8 @@ static int do_track_object(uint64_t oid, uint8_t 
> > nr_copies)
> >                     printf("%s\n", addr_to_str(n->addr, n->port));
> >             }
> >             rb_destroy(&vroot, struct sd_vnode, rb);
> > +           next_log = (char *)log->nodes
> > +                           + support_nodes * sizeof(struct sd_node);
> >     }
> >  
> >     free(logs);
> > diff --git a/include/internal_proto.h b/include/internal_proto.h
> > index 37afb46..d61b5a5 100644
> > --- a/include/internal_proto.h
> > +++ b/include/internal_proto.h
> > @@ -221,7 +221,7 @@ struct epoch_log {
> >     uint8_t  __pad[3];
> >     uint16_t flags;
> >     char drv_name[STORE_LEN];
> > -   struct sd_node nodes[SD_MAX_NODES];
> > +   struct sd_node nodes[0];
> >  };
> >  
> >  struct vdi_op_message {
> > diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> > index 7cfdccb..8b26a8b 100644
> > --- a/include/sheepdog_proto.h
> > +++ b/include/sheepdog_proto.h
> > @@ -165,6 +165,7 @@ struct sd_req {
> >                     uint8_t         copy_policy;
> >                     uint16_t        flags;
> >                     uint32_t        tag;
> > +                   uint32_t        support_nodes;
> 
> support_nodes is not good. nodes_nr is simpler.

These sort of style problems can be fixed later easily. I prefer apply
first and fix later than increasing turn around of review. Frequent
commits can reduce possibility of rebasing and make development
speedy.

Thanks,
Hitoshi

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to