From: Robin Dong <san...@taobao.com>

Using hyper volume (size up to 16PB) to store large number of accounts
and containers.

Signed-off-by: Robin Dong <san...@taobao.com>
---
 sheep/http/http.c  |   5 +
 sheep/http/http.h  |   1 +
 sheep/http/kv.c    | 627 ++++++++++++++++++++++++++++++++++++++++++++++-------
 sheep/http/kv.h    |  22 +-
 sheep/http/s3.c    |   6 +-
 sheep/http/swift.c | 107 ++++++---
 6 files changed, 656 insertions(+), 112 deletions(-)

diff --git a/sheep/http/http.c b/sheep/http/http.c
index b3bbb79..577b163 100644
--- a/sheep/http/http.c
+++ b/sheep/http/http.c
@@ -52,6 +52,7 @@ static inline const char *strstatus(enum http_status status)
                [NO_CONTENT] = "204 No Content",
                [PARTIAL_CONTENT] = "206 Partial Content",
                [BAD_REQUEST] = "400 Bad Request",
+               [UNAUTHORIZED] = "401 Unauthorized",
                [NOT_FOUND] = "404 Not Found",
                [METHOD_NOT_ALLOWED] = "405 Method Not Allowed",
                [CONFLICT] = "409 Conflict",
@@ -192,6 +193,9 @@ void http_response_header(struct http_request *req, enum 
http_status status)
 
        req->status = status;
        http_request_writef(req, "Status: %s\r\n", strstatus(status));
+       if (req->opcode == HTTP_GET && req->data_length > 0)
+               http_request_writef(req, "Content-Length: %lu\r\n",
+                                   req->data_length);
        http_request_writes(req, "Content-type: text/plain;\r\n\r\n");
 }
 
@@ -233,6 +237,7 @@ static void http_run_request(struct work *work)
 
                if (method != NULL) {
                        method(req);
+                       sd_debug("req->status %d", req->status);
                        if (req->status != UNKNOWN)
                                goto out;
                }
diff --git a/sheep/http/http.h b/sheep/http/http.h
index 046d412..a8527d1 100644
--- a/sheep/http/http.h
+++ b/sheep/http/http.h
@@ -32,6 +32,7 @@ enum http_status {
        NO_CONTENT,                     /* 204 */
        PARTIAL_CONTENT,                /* 206 */
        BAD_REQUEST,                    /* 400 */
+       UNAUTHORIZED,                   /* 401 */
        NOT_FOUND,                      /* 404 */
        METHOD_NOT_ALLOWED,             /* 405 */
        CONFLICT,                       /* 409 */
diff --git a/sheep/http/kv.c b/sheep/http/kv.c
index 8113389..7d002b0 100644
--- a/sheep/http/kv.c
+++ b/sheep/http/kv.c
@@ -16,14 +16,30 @@
 
 #define FOR_EACH_VDI(nr, vdis) FOR_EACH_BIT(nr, vdis, SD_NR_VDIS)
 
-static int lookup_bucket(struct http_request *req, const char *bucket,
-                        uint32_t *vid)
+struct bucket_inode_hdr {
+       char bucket_name[SD_MAX_BUCKET_NAME];
+       uint64_t obj_count;
+       uint64_t bytes_used;
+       uint32_t onode_vid;
+};
+
+struct bucket_inode {
+       union {
+               struct bucket_inode_hdr hdr;
+               uint8_t data[SD_MAX_BUCKET_NAME << 1];
+       };
+};
+
+#define MAX_BUCKETS (SD_MAX_VDI_SIZE / sizeof(struct bucket_inode))
+#define BUCKETS_PER_SD_OBJ (SD_DATA_OBJ_SIZE / sizeof(struct bucket_inode))
+
+static int lookup_vdi(const char *name, uint32_t *vid)
 {
        int ret;
        struct vdi_info info = {};
        struct vdi_iocb iocb = {
-               .name = bucket,
-               .data_len = strlen(bucket),
+               .name = name,
+               .data_len = strlen(name),
        };
 
        ret = vdi_lookup(&iocb, &info);
@@ -32,27 +48,23 @@ static int lookup_bucket(struct http_request *req, const 
char *bucket,
                *vid = info.vid;
                break;
        case SD_RES_NO_VDI:
-               sd_info("no such bucket %s", bucket);
-               http_response_header(req, NOT_FOUND);
-               return -1;
+               sd_info("no such vdi %s", name);
+               break;
        default:
-               sd_err("%s: bucket %s", sd_strerror(ret), bucket);
-               http_response_header(req, INTERNAL_SERVER_ERROR);
-               return -1;
+               sd_err("Failed to find vdi %s %s", name, sd_strerror(ret));
        }
 
-       return 0;
+       return ret;
 }
 
-/* Bucket operations */
-
-int kv_create_bucket(struct http_request *req, const char *bucket)
+static int kv_create_hyper_volume(const char *name, uint32_t *vdi_id)
 {
        struct sd_req hdr;
+       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
        int ret;
        char buf[SD_MAX_VDI_LEN] = {0};
 
-       pstrcpy(buf, SD_MAX_VDI_LEN, bucket);
+       pstrcpy(buf, SD_MAX_VDI_LEN, name);
 
        sd_init_req(&hdr, SD_OP_NEW_VDI);
        hdr.flags = SD_FLAG_CMD_WRITE;
@@ -64,104 +76,573 @@ int kv_create_bucket(struct http_request *req, const char 
*bucket)
        hdr.vdi.store_policy = 1;
 
        ret = exec_local_req(&hdr, buf);
+       if (rsp->result != SD_RES_SUCCESS)
+               sd_err("Failed to create VDI %s: %s", name,
+                      sd_strerror(rsp->result));
+
+       if (vdi_id)
+               *vdi_id = rsp->vdi.vdi_id;
+
+       return ret;
+}
+
+static int discard_data_obj(uint64_t oid)
+{
+       int ret;
+       struct sd_req hdr;
+
+       sd_init_req(&hdr, SD_OP_DISCARD_OBJ);
+       hdr.obj.oid = oid;
+
+       ret = exec_local_req(&hdr, NULL);
+       if (ret != SD_RES_SUCCESS)
+               sd_err("Failed to discard data obj %lu %s", oid,
+                      sd_strerror(ret));
+
+       return ret;
+}
+
+static int kv_delete_vdi(const char *name)
+{
+       int ret;
+       struct sd_req hdr;
+       char data[SD_MAX_VDI_LEN] = {0};
+       uint32_t vid;
+
+       ret = lookup_vdi(name, &vid);
+       if (ret != SD_RES_SUCCESS)
+               return ret;
+
+       sd_init_req(&hdr, SD_OP_DEL_VDI);
+       hdr.flags = SD_FLAG_CMD_WRITE;
+       hdr.data_length = sizeof(data);
+       pstrcpy(data, SD_MAX_VDI_LEN, name);
+
+       ret = exec_local_req(&hdr, data);
+       if (ret != SD_RES_SUCCESS)
+               sd_err("Failed to delete vdi %s %s", name, sd_strerror(ret));
+
+       return ret;
+}
+
+/*
+ * An account is actually a hyper volume vdi (up to 16PB),
+ * all the buckets (or containers, identified by 'struct bucket_inode') are
+ * stores in this hyper vdi using hashing algorithm.
+ * The bucket also has a hyper vdi named "account/bucket" which stores
+ * 'struct kv_onodes'.
+ *
+ * For example: account "coly" has two buckets "jetta" and "volvo"
+ *
+ *
+ * account vdi
+ * 
+-----------+---+--------------------------+---+--------------------------+--
+ * |name: coly |...|bucket_inode (name: jetta)|...|bucket_inode (name: 
volvo)|..
+ * 
+-----------+---+--------------------------+---+--------------------------+--
+ *                                  |                             |
+ *                                 /                              |
+ * bucket vdi                     /                               |
+ * +-----------------+-------+ <--                                |
+ * |name: coly/jetta |.......|                                    |
+ * +-----------------+-------+                                   /
+ *                              bucket vdi                      /
+ *                              +-----------------+------+ <----
+ *                              | name: coly/volvo|......|
+ *                              +-----------------+------+
+ */
+
+/* Account operations */
+
+int kv_create_account(const char *account)
+{
+       uint32_t vdi_id;
+       return kv_create_hyper_volume(account, &vdi_id);
+}
+
+typedef void (*list_cb)(struct http_request *req, const char *bucket,
+                       void *opaque);
+
+struct list_buckets_arg {
+       struct http_request *req;
+       void *opaque;
+       list_cb cb;
+       uint32_t bucket_counter;
+};
+
+static void list_buckets_cb(void *data, enum btree_node_type type, void *arg)
+{
+       struct sd_extent *ext;
+       struct list_buckets_arg *lbarg = arg;
+       struct bucket_inode *bnode;
+       uint64_t oid;
+       char *buf = NULL;
+       int ret;
+
+       if (type == BTREE_EXT) {
+               ext = (struct sd_extent *)data;
+               if (!ext->vdi_id)
+                       return;
+
+               buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+               oid = vid_to_data_oid(ext->vdi_id, ext->idx);
+               ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to read data object %lx", oid);
+                       goto out;
+               }
+               /* loop all bucket_inodes in this data-object */
+               for (int i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+                       bnode = (struct bucket_inode *)
+                               (buf + i * sizeof(struct bucket_inode));
+                       if (bnode->hdr.onode_vid == 0)
+                               continue;
+                       if (lbarg->cb)
+                               lbarg->cb(lbarg->req, bnode->hdr.bucket_name,
+                                         (void *)lbarg->opaque);
+                       lbarg->bucket_counter++;
+               }
+       }
+out:
+       free(buf);
+}
+
+/* get number of buckets in this account */
+static int kv_get_account(const char *account, uint32_t *nr_buckets)
+{
+       struct sd_inode inode;
+       uint64_t oid;
+       uint32_t account_vid;
+       int ret;
+
+       ret = lookup_vdi(account, &account_vid);
+       if (ret != SD_RES_SUCCESS)
+               return ret;
+
+       /* read account vdi out */
+       oid = vid_to_vdi_oid(account_vid);
+       ret = sd_read_object(oid, (char *)&inode, sizeof(struct sd_inode), 0);
+       if (ret != SD_RES_SUCCESS) {
+               sd_err("Failed to read inode header %lx", oid);
+               return ret;
+       }
+
+       struct list_buckets_arg arg = {NULL, NULL, NULL, 0};
+       traverse_btree(sheep_bnode_reader, &inode, list_buckets_cb, &arg);
+       if (nr_buckets)
+               *nr_buckets = arg.bucket_counter;
+
+       return SD_RES_SUCCESS;
+}
+
+int kv_read_account(const char *account, uint32_t *nr_buckets)
+{
+       int ret;
+
+       ret = kv_get_account(account, nr_buckets);
+       if (ret != SD_RES_SUCCESS)
+               sd_err("Failed to get number of buckets in %s", account);
+       return ret;
+}
+
+int kv_update_account(const char *account)
+{
+       /* TODO: update metadata of the account */
+       return -1;
+}
+
+int kv_delete_account(const char *account)
+{
+       int ret;
+
+       ret = kv_delete_vdi(account);
+       if (ret != SD_RES_SUCCESS)
+               sd_err("Failed to delete vdi %s", account);
+
+       return ret;
+}
+
+/* Bucket operations */
+
+static int lookup_bucket(struct http_request *req, const char *bucket,
+                        uint32_t *vid)
+{
+       int ret;
+       struct vdi_info info = {};
+       struct vdi_iocb iocb = {
+               .name = bucket,
+               .data_len = strlen(bucket),
+       };
+
+       ret = vdi_lookup(&iocb, &info);
        switch (ret) {
        case SD_RES_SUCCESS:
-               http_response_header(req, CREATED);
+               *vid = info.vid;
                break;
-       case SD_RES_VDI_EXIST:
-               http_response_header(req, ACCEPTED);
+       case SD_RES_NO_VDI:
+               sd_info("no such bucket %s", bucket);
+               http_response_header(req, NOT_FOUND);
                break;
        default:
-               sd_err("%s: bucket %s", sd_strerror(ret), bucket);
+               sd_err("Failed to find bucket %s %s", bucket, sd_strerror(ret));
                http_response_header(req, INTERNAL_SERVER_ERROR);
-               return -1;
        }
 
-       return 0;
+       return ret;
 }
 
-int kv_read_bucket(struct http_request *req, const char *bucket)
+/*
+ * Delete bucket(container) inode in account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if bucket_inode is not found
+ * Return -1 if some errors happend
+ */
+static int delete_bucket(struct sd_inode *account_inode, uint64_t idx,
+                        const char *bucket)
 {
-       /* TODO: read metadata of the bucket */
-       return -1;
+       struct bucket_inode *bnode;
+       char *buf;
+       uint32_t vdi_id;
+       uint64_t oid;
+       uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+       int offset = idx % BUCKETS_PER_SD_OBJ;
+       int ret, i, empty_buckets = 0, found = 0;
+
+       vdi_id = INODE_GET_VID(account_inode, data_index);
+       if (!vdi_id) {
+               sd_err("the %lu in vdi %s is not exists", data_index,
+                      account_inode->name);
+               ret = -1;
+               goto out;
+       }
+
+       oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+       buf = xzalloc(SD_DATA_OBJ_SIZE);
+       ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+       if (ret != SD_RES_SUCCESS) {
+               sd_err("Failed to read inode header %lx", oid);
+               ret = -1;
+               goto out;
+       }
+
+       for (i = 0; i < BUCKETS_PER_SD_OBJ; i++) {
+               char vdi_name[SD_MAX_VDI_LEN];
+               bnode = (struct bucket_inode *)
+                       (buf + i * sizeof(struct bucket_inode));
+               /* count all empty buckets in this sd-data-obj */
+               if (bnode->hdr.onode_vid == 0) {
+                       empty_buckets++;
+                       continue;
+               }
+               if (strncmp(bnode->hdr.bucket_name, bucket, SD_MAX_BUCKET_NAME))
+                       continue;
+
+               if (i < offset)
+                       panic("postion of bucket inode %d is smaller than %d",
+                             i, offset);
+
+               found = i;
+               /* find the bnode */
+               bnode->hdr.onode_vid = 0;
+               snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+                        account_inode->name, bucket);
+
+               ret = kv_delete_vdi(vdi_name);
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to delete vdi %s", vdi_name);
+                       ret = -1;
+                       goto out;
+               }
+               sd_debug("delete vdi %s success", vdi_name);
+       }
+
+       if (!found) {
+               ret = BUCKETS_PER_SD_OBJ;
+               goto out;
+       }
+
+       /*
+        * if only this bucket_inode is in the sd-data-obj,
+        * then delete this sd-data-obj
+        */
+       if (empty_buckets == BUCKETS_PER_SD_OBJ - 1) {
+               ret = discard_data_obj(oid);
+               if (ret != SD_RES_SUCCESS) {
+                       ret = -1;
+                       goto out;
+               }
+               INODE_SET_VID(account_inode, data_index, 0);
+               ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+                                        data_index, vdi_id, vdi_id, 0, false,
+                                        false);
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to write inode %x", vdi_id);
+                       ret = -1;
+                       goto out;
+               }
+               sd_debug("discard obj %lx and update vdi %x success",
+                        oid, vdi_id);
+       } else {
+               ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+                                  i * sizeof(struct bucket_inode), false);
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to write object %lx", oid);
+                       ret = -1;
+                       goto out;
+               }
+       }
+
+       sd_debug("write object oid %lx success", oid);
+       ret = found;
+out:
+       free(buf);
+       return ret;
 }
 
-int kv_update_bucket(struct http_request *req, const char *bucket)
+/*
+ * Add bucket(container) inode into account vdi.
+ * idx: the target hash positon of bucket
+ * Return the position of bucket_inode in sd-data-object if success
+ * Return BUCKETS_PER_SD_OBJ if the data-object is full of bucket_inode
+ * Return -1 if some error happend
+ */
+static int add_bucket(struct sd_inode *account_inode, uint64_t idx,
+                     const char *bucket)
 {
-       /* TODO: update metadata of the bucket */
-       return -1;
+       struct bucket_inode *bnode;
+       char *buf;
+       uint32_t vdi_id;
+       uint64_t oid;
+       uint64_t data_index = idx / BUCKETS_PER_SD_OBJ;
+       int offset = idx % BUCKETS_PER_SD_OBJ;
+       int ret, i;
+       bool create = false;
+
+       buf = xzalloc(SD_DATA_OBJ_SIZE);
+
+       vdi_id = INODE_GET_VID(account_inode, data_index);
+       oid = vid_to_data_oid(account_inode->vdi_id, data_index);
+       sd_debug("oid %x %lx %lx", account_inode->vdi_id, data_index, oid);
+       /* the data object is exists */
+       if (vdi_id) {
+               ret = sd_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0);
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to read inode header %lx", oid);
+                       ret = -1;
+                       goto out;
+               }
+       } else
+               create = true;
+
+       sd_debug("bucket_inode offset %d %lu", offset, BUCKETS_PER_SD_OBJ);
+       for (i = offset; i < BUCKETS_PER_SD_OBJ; i++) {
+               char vdi_name[SD_MAX_VDI_LEN];
+               bnode = (struct bucket_inode *)
+                       (buf + i * sizeof(struct bucket_inode));
+               if (bnode->hdr.onode_vid != 0)
+                       continue;
+
+               /* the bnode not used */
+               strncpy(bnode->hdr.bucket_name, bucket, SD_MAX_BUCKET_NAME);
+               bnode->hdr.obj_count = 0;
+               bnode->hdr.bytes_used = 0;
+               snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+                        account_inode->name, bucket);
+               ret = kv_create_hyper_volume(vdi_name, &(bnode->hdr.onode_vid));
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to create hyper volume %d", ret);
+                       ret = -1;
+                       goto out;
+               }
+               sd_debug("create hyper volume %s success", vdi_name);
+               break;
+       }
+
+       if (i >= BUCKETS_PER_SD_OBJ) {
+               ret = BUCKETS_PER_SD_OBJ;
+               goto out;
+       }
+
+       /* write bnode back to account-vdi */
+       if (create)
+               ret = sd_write_object(oid, buf, SD_DATA_OBJ_SIZE, 0, create);
+       else
+               ret = sd_write_object(oid, buf, sizeof(struct bucket_inode),
+                                  i * sizeof(struct bucket_inode), create);
+
+       if (ret != SD_RES_SUCCESS) {
+               sd_err("Failed to write object %lx", oid);
+               ret = -1;
+               goto out;
+       }
+
+       sd_debug("write object oid %lx success", oid);
+
+       /* update index of vdi */
+       if (create) {
+               vdi_id = account_inode->vdi_id;
+               INODE_SET_VID(account_inode, data_index, vdi_id);
+               ret = sd_inode_write_vid(sheep_bnode_writer, account_inode,
+                                        data_index, vdi_id, vdi_id, 0, false,
+                                        false);
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to write inode %x", vdi_id);
+                       ret = -1;
+                       goto out;
+               }
+               sd_debug("write account inode success");
+       }
+
+       ret = i;
+out:
+       free(buf);
+       return ret;
 }
 
-/* TODO: return HTTP_CONFLICT when the bucket is not empty */
-int kv_delete_bucket(struct http_request *req, const char *bucket)
+static int kv_get_bucket(struct sd_inode *account_inode, const char *account,
+                        const char *bucket)
 {
+       char vdi_name[SD_MAX_VDI_LEN];
+       uint64_t oid;
+       uint32_t account_vid, bucket_vid;
        int ret;
-       struct sd_req hdr;
-       char data[SD_MAX_VDI_LEN] = {0};
-       uint32_t vid;
 
-       ret = lookup_bucket(req, bucket, &vid);
-       if (ret < 0)
+       ret = lookup_vdi(account, &account_vid);
+       if (ret != SD_RES_SUCCESS) {
+               sd_err("Failed to find account %s", account);
+               return -1;
+       }
+
+       /* read account vdi out */
+       oid = vid_to_vdi_oid(account_vid);
+       ret = sd_read_object(oid, (char *)account_inode,
+                         sizeof(struct sd_inode), 0);
+       if (ret != SD_RES_SUCCESS)
                return ret;
 
-       sd_init_req(&hdr, SD_OP_DELETE_CACHE);
-       hdr.obj.oid = vid_to_vdi_oid(vid);
+       /* find bucket vdi */
+       snprintf(vdi_name, SD_MAX_VDI_LEN, "%s/%s",
+                account_inode->name, bucket);
 
-       ret = exec_local_req(&hdr, NULL);
-       if (ret != SD_RES_SUCCESS) {
-               sd_err("failed to execute request");
-               http_response_header(req, INTERNAL_SERVER_ERROR);
+       return lookup_vdi(vdi_name, &bucket_vid);
+}
+
+int kv_create_bucket(const char *account, const char *bucket)
+{
+       struct sd_inode inode;
+       uint64_t hval, i;
+       int ret;
+
+       ret = kv_get_bucket(&inode, account, bucket);
+       /* if bucket is exists, return SD_RES_VDI_EXIST */
+       if (!ret) {
+               sd_err("bucket %s is exists.", bucket);
+               return SD_RES_VDI_EXIST;
+       } else if (ret < 0)
                return -1;
-       }
 
-       sd_init_req(&hdr, SD_OP_DEL_VDI);
-       hdr.flags = SD_FLAG_CMD_WRITE;
-       hdr.data_length = sizeof(data);
-       pstrcpy(data, SD_MAX_VDI_LEN, bucket);
+       sd_debug("read account inode success");
 
-       ret = exec_local_req(&hdr, data);
-       if (ret == SD_RES_SUCCESS) {
-               http_response_header(req, NO_CONTENT);
-               return 0;
-       } else {
-               sd_err("%s: bucket %s", sd_strerror(ret), bucket);
-               http_response_header(req, INTERNAL_SERVER_ERROR);
+       hval = sd_hash(bucket, strlen(bucket));
+       for (i = 0; i < MAX_BUCKETS; i++) {
+               uint64_t idx = (hval + i) % MAX_BUCKETS;
+               ret = add_bucket(&inode, idx, bucket);
+               /* data-object is full */
+               if (ret == BUCKETS_PER_SD_OBJ) {
+                       i += BUCKETS_PER_SD_OBJ;
+                       continue;
+               } else if (ret < 0) {
+                       sd_err("Failed to add bucket");
+                       return ret;
+               }
+               /* add bucket success */
+               sd_debug("add bucket success");
+               break;
+       }
+
+       if (i >= MAX_BUCKETS) {
+               sd_err("Containers in vdi %s is full!", account);
                return -1;
        }
+       return 0;
 }
 
-int kv_list_buckets(struct http_request *req,
-                   void (*cb)(struct http_request *req, const char *bucket,
-                              void *opaque),
-                   void *opaque)
+int kv_read_bucket(const char *account, const char *bucket)
 {
-       char buf[SD_INODE_HEADER_SIZE];
-       struct sd_inode *inode = (struct sd_inode *)buf;
-       unsigned long nr;
+       /* TODO: read metadata of the bucket */
+       return -1;
+}
 
-       http_response_header(req, OK);
+int kv_update_bucket(const char *account, const char *bucket)
+{
+       /* TODO: update metadata of the bucket */
+       return -1;
+}
 
-       FOR_EACH_VDI(nr, sys->vdi_inuse) {
-               uint64_t oid;
-               int ret;
+/* return SD_RES_NO_VDI if bucket is not existss */
+int kv_delete_bucket(const char *account, const char *bucket)
+{
+       struct sd_inode inode;
+       uint64_t hval, i;
+       int ret;
 
-               oid = vid_to_vdi_oid(nr);
+       ret = kv_get_bucket(&inode, account, bucket);
+       if (ret) {
+               sd_err("Failed to get bucket");
+               return ret;
+       }
 
-               ret = sd_read_object(oid, (char *)inode, SD_INODE_HEADER_SIZE,
-                                    0);
-               if (ret != SD_RES_SUCCESS) {
-                       sd_err("Failed to read inode header");
+       hval = sd_hash(bucket, strlen(bucket));
+       for (i = 0; i < MAX_BUCKETS; i++) {
+               uint64_t idx = (hval + i) % MAX_BUCKETS;
+               ret = delete_bucket(&inode, idx, bucket);
+               if (ret == BUCKETS_PER_SD_OBJ) {
+                       i += BUCKETS_PER_SD_OBJ;
                        continue;
+               } else if (ret < 0) {
+                       sd_err("Failed to delete bucket %d", ret);
+                       return ret;
                }
+               /* delete bucket success */
+               sd_debug("delete bucket success");
+               break;
+       }
 
-               if (inode->name[0] == '\0') /* this VDI has been deleted */
-                       continue;
+       if (i >= MAX_BUCKETS) {
+               sd_err("Can't find bucket %s", bucket);
+               return SD_RES_NO_VDI;
+       }
+       return SD_RES_SUCCESS;
+}
+
+int kv_list_buckets(struct http_request *req, const char *account, list_cb cb,
+                   void *opaque)
+{
+       struct sd_inode account_inode;
+       uint32_t account_vid;
+       uint64_t oid;
+       int ret;
 
-               if (!vdi_is_snapshot(inode))
-                       cb(req, inode->name, opaque);
+       ret = lookup_vdi(account, &account_vid);
+       if (ret != SD_RES_SUCCESS) {
+               sd_err("Failed to find account %s", account);
+               return ret;
        }
 
-       return 0;
+       /* read account vdi out */
+       oid = vid_to_vdi_oid(account_vid);
+       ret = sd_read_object(oid, (char *)&account_inode,
+                         sizeof(struct sd_inode), 0);
+       if (ret != SD_RES_SUCCESS) {
+               sd_err("Failed to read account inode header %lx", oid);
+               return ret;
+       }
+
+       struct list_buckets_arg arg = {req, opaque, cb, 0};
+       traverse_btree(sheep_bnode_reader, &account_inode,
+                      list_buckets_cb, &arg);
+       return SD_RES_SUCCESS;
 }
 
 /* Object operations */
diff --git a/sheep/http/kv.h b/sheep/http/kv.h
index f0b09fe..1774a36 100644
--- a/sheep/http/kv.h
+++ b/sheep/http/kv.h
@@ -14,15 +14,25 @@
 
 #include "http.h"
 
-#define SD_MAX_BUCKET_NAME 1024
+#define SD_MAX_BUCKET_NAME 64
 #define SD_MAX_OBJECT_NAME 1024
 
+/* Account operations */
+int kv_create_account(const char *account);
+int kv_read_account(const char *account, uint32_t *nr_buckets);
+int kv_update_account(const char *account);
+int kv_delete_account(const char *account);
+int kv_list_accounts(struct http_request *req,
+                   void (*cb)(struct http_request *req, const char *account,
+                              void *opaque),
+                   void *opaque);
+
 /* Bucket operations */
-int kv_create_bucket(struct http_request *req, const char *bucket);
-int kv_read_bucket(struct http_request *req, const char *bucket);
-int kv_update_bucket(struct http_request *req, const char *bucket);
-int kv_delete_bucket(struct http_request *req, const char *bucket);
-int kv_list_buckets(struct http_request *req,
+int kv_create_bucket(const char *account, const char *bucket);
+int kv_read_bucket(const char *account, const char *bucket);
+int kv_update_bucket(const char *account, const char *bucket);
+int kv_delete_bucket(const char *account, const char *bucket);
+int kv_list_buckets(struct http_request *req, const char *account,
                    void (*cb)(struct http_request *req, const char *bucket,
                               void *opaque),
                    void *opaque);
diff --git a/sheep/http/s3.c b/sheep/http/s3.c
index ca2efe3..8142bb5 100644
--- a/sheep/http/s3.c
+++ b/sheep/http/s3.c
@@ -58,7 +58,7 @@ static void s3_get_service(struct http_request *req)
 {
        bool print_header = true;
 
-       kv_list_buckets(req, s3_get_service_cb, &print_header);
+       kv_list_buckets(req, "s3", s3_get_service_cb, &print_header);
 
        http_request_writes(req, "</Buckets></ListAllMyBucketsResult>\r\n");
 }
@@ -125,7 +125,7 @@ static void s3_get_bucket(struct http_request *req, const 
char *bucket)
 
 static void s3_put_bucket(struct http_request *req, const char *bucket)
 {
-       kv_create_bucket(req, bucket);
+       kv_create_bucket("s3", bucket);
 
        if (req->status == ACCEPTED)
                s3_write_err_response(req, "BucketAlreadyExists",
@@ -139,7 +139,7 @@ static void s3_post_bucket(struct http_request *req, const 
char *bucket)
 
 static void s3_delete_bucket(struct http_request *req, const char *bucket)
 {
-       kv_delete_bucket(req, bucket);
+       kv_delete_bucket("s3", bucket);
 
        switch (req->status) {
        case NOT_FOUND:
diff --git a/sheep/http/swift.c b/sheep/http/swift.c
index 14f5ae5..8b5024d 100644
--- a/sheep/http/swift.c
+++ b/sheep/http/swift.c
@@ -21,6 +21,10 @@ static int dfd;
 static char content[4096];
 static char tmp[4096];
 
+#define HTTP_REMOVE_ACCOUNT "HTTP_X_REMOVE_ACCOUNT_META_BOOK"
+
+static void swift_delete_account(struct http_request *req, const char 
*account);
+
 static void make_bucket_path(char *bucket, size_t size, const char *account,
                             const char *container)
 {
@@ -33,26 +37,37 @@ static void make_bucket_path(char *bucket, size_t size, 
const char *account,
 
 static void swift_head_account(struct http_request *req, const char *account)
 {
-       http_response_header(req, NOT_IMPLEMENTED);
+       uint32_t nr_buckets;
+       int ret;
+
+       ret = kv_read_account(account, &nr_buckets);
+       if (ret)
+               http_response_header(req, UNAUTHORIZED);
+       else {
+               http_request_writef(req, "X-Account-Container-Count: %u\n",
+                                   nr_buckets);
+               http_response_header(req, NO_CONTENT);
+       }
 }
 
 static void swift_get_account_cb(struct http_request *req, const char *bucket,
                                 void *opaque)
 {
-       const char *account = opaque;
-       char *args[2] = {};
+       struct strbuf *buf = (struct strbuf *)opaque;
 
-       split_path(bucket, ARRAY_SIZE(args), args);
-
-       if (args[1] != NULL && strcmp(args[0], account) == 0) {
-               http_request_writes(req, args[1]);
-               http_request_writes(req, "\n");
-       }
+       if (bucket)
+               strbuf_addf(buf, "%s\n", bucket);
 }
 
 static void swift_get_account(struct http_request *req, const char *account)
 {
-       kv_list_buckets(req, swift_get_account_cb, (void *)account);
+       struct strbuf buf = STRBUF_INIT;
+
+       kv_list_buckets(req, account, swift_get_account_cb, (void *)&buf);
+       req->data_length = buf.len;
+       http_response_header(req, OK);
+       http_request_write(req, buf.buf, buf.len);
+       strbuf_release(&buf);
 }
 
 static void swift_put_account(struct http_request *req, const char *account)
@@ -62,24 +77,50 @@ static void swift_put_account(struct http_request *req, 
const char *account)
 
 static void swift_post_account(struct http_request *req, const char *account)
 {
-       http_response_header(req, NOT_IMPLEMENTED);
+       char *p;
+       int ret;
+
+       for (int i = 0; (p = req->fcgx.envp[i]); ++i) {
+               /* delete account */
+               if (!strncmp(p, HTTP_REMOVE_ACCOUNT,
+                            strlen(HTTP_REMOVE_ACCOUNT))) {
+                       swift_delete_account(req, account);
+                       return;
+               }
+       }
+       /* create account */
+       ret = kv_create_account(account);
+       if (ret == SD_RES_SUCCESS)
+               http_response_header(req, CREATED);
+       else if (ret == SD_RES_VDI_EXIST)
+               http_response_header(req, ACCEPTED);
+       else
+               http_response_header(req, INTERNAL_SERVER_ERROR);
 }
 
-static void swift_delete_account_cb(struct http_request *req,
-                                   const char *bucket, void *opaque)
+static void swift_delete_account(struct http_request *req, const char *account)
 {
-       const char *account = opaque;
-       char *args[2] = {};
+       uint32_t nr_buckets;
+       int ret;
 
-       split_path(bucket, ARRAY_SIZE(args), args);
+       ret = kv_read_account(account, &nr_buckets);
+       if (ret) {
+               http_response_header(req, INTERNAL_SERVER_ERROR);
+               return;
+       }
 
-       if (args[1] != NULL && strcmp(args[0], account) == 0)
-               kv_delete_bucket(req, bucket);
-}
+       if (nr_buckets) {
+               /* return HTTP_CONFLICT when the account is not empty */
+               http_response_header(req, CONFLICT);
+               return;
+       }
 
-static void swift_delete_account(struct http_request *req, const char *account)
-{
-       kv_list_buckets(req, swift_delete_account_cb, (void *)account);
+       ret = kv_delete_account(account);
+       if (ret) {
+               http_response_header(req, INTERNAL_SERVER_ERROR);
+               return;
+       }
+       http_response_header(req, OK);
 }
 
 /* Operations on Containers */
@@ -109,10 +150,14 @@ static void swift_get_container(struct http_request *req, 
const char *account,
 static void swift_put_container(struct http_request *req, const char *account,
                                const char *container)
 {
-       char bucket[SD_MAX_BUCKET_NAME];
-
-       make_bucket_path(bucket, sizeof(bucket), account, container);
-       kv_create_bucket(req, bucket);
+       int ret;
+       ret = kv_create_bucket(account, container);
+       if (ret == SD_RES_SUCCESS)
+               http_response_header(req, CREATED);
+       else if (ret == SD_RES_VDI_EXIST)
+               http_response_header(req, ACCEPTED);
+       else
+               http_response_header(req, INTERNAL_SERVER_ERROR);
 }
 
 static void swift_post_container(struct http_request *req, const char *account,
@@ -124,10 +169,12 @@ static void swift_post_container(struct http_request 
*req, const char *account,
 static void swift_delete_container(struct http_request *req,
                                   const char *account, const char *container)
 {
-       char bucket[SD_MAX_BUCKET_NAME];
-
-       make_bucket_path(bucket, sizeof(bucket), account, container);
-       kv_delete_bucket(req, bucket);
+       int ret;
+       ret = kv_delete_bucket(account, container);
+       if (ret == SD_RES_NO_VDI)
+               http_response_header(req, NOT_FOUND);
+       else
+               http_response_header(req, NO_CONTENT);
 }
 
 /* Operations on Objects */
-- 
1.7.12.4

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to