[Qemu-devel] [PATCH RFC v2 09/22] block/pcache: separation AIOCB on requests

2016-08-29 Thread Pavel Butsykin
for case when the cache partially covers request we are part of the request
is filled from the cache, and the other part request from disk. Also add
reference counting for nodes, as way to maintain multithreading.

There is still no full synchronization in multithreaded mode.

Signed-off-by: Pavel Butsykin 
---
 block/pcache.c | 169 -
 1 file changed, 155 insertions(+), 14 deletions(-)

diff --git a/block/pcache.c b/block/pcache.c
index 28bd056..6114289 100644
--- a/block/pcache.c
+++ b/block/pcache.c
@@ -58,7 +58,10 @@ typedef struct BlockNode {
 typedef struct PCNode {
 BlockNode cm;
 
+uint32_t status;
+uint32_t ref;
 uint8_t  *data;
+CoMutex  lock;
 } PCNode;
 
 typedef struct ReqStor {
@@ -95,9 +98,23 @@ typedef struct PrefCacheAIOCB {
 uint64_t sector_num;
 uint32_t nb_sectors;
 int  aio_type;
+struct {
+QTAILQ_HEAD(req_head, PrefCachePartReq) list;
+CoMutex lock;
+} requests;
 int  ret;
 } PrefCacheAIOCB;
 
+typedef struct PrefCachePartReq {
+uint64_t sector_num;
+uint32_t nb_sectors;
+
+QEMUIOVector qiov;
+PCNode *node;
+PrefCacheAIOCB *acb;
+QTAILQ_ENTRY(PrefCachePartReq) entry;
+} PrefCachePartReq;
+
 static const AIOCBInfo pcache_aiocb_info = {
 .aiocb_size = sizeof(PrefCacheAIOCB),
 };
@@ -126,8 +143,39 @@ static QemuOptsList runtime_opts = {
 #define MB_BITS 20
 #define PCACHE_DEFAULT_CACHE_SIZE (4 << MB_BITS)
 
+enum {
+NODE_SUCCESS_STATUS = 0,
+NODE_WAIT_STATUS= 1,
+NODE_REMOVE_STATUS  = 2,
+NODE_GHOST_STATUS   = 3 /* only for debugging */
+};
+
 #define PCNODE(_n) ((PCNode *)(_n))
 
+static inline void pcache_node_unref(PCNode *node)
+{
+assert(node->status == NODE_SUCCESS_STATUS ||
+   node->status == NODE_REMOVE_STATUS);
+
+if (atomic_fetch_dec(&node->ref) == 0) {
+assert(node->status == NODE_REMOVE_STATUS);
+
+node->status = NODE_GHOST_STATUS;
+g_free(node->data);
+g_slice_free1(sizeof(*node), node);
+}
+}
+
+static inline PCNode *pcache_node_ref(PCNode *node)
+{
+assert(node->status == NODE_SUCCESS_STATUS ||
+   node->status == NODE_WAIT_STATUS);
+assert(atomic_read(&node->ref) == 0);/* XXX: only for sequential requests 
*/
+atomic_inc(&node->ref);
+
+return node;
+}
+
 static int pcache_key_cmp(const RbNodeKey *key1, const RbNodeKey *key2)
 {
 assert(key1 != NULL);
@@ -184,13 +232,7 @@ static void *node_insert(struct RbRoot *root, BlockNode 
*node)
 
 static inline PCNode *pcache_node_insert(struct RbRoot *root, PCNode *node)
 {
-return node_insert(root, &node->cm);
-}
-
-static inline void pcache_node_free(PCNode *node)
-{
-g_free(node->data);
-g_slice_free1(sizeof(*node), node);
+return pcache_node_ref(node_insert(root, &node->cm));
 }
 
 static inline void *pcache_node_alloc(RbNodeKey* key)
@@ -199,6 +241,9 @@ static inline void *pcache_node_alloc(RbNodeKey* key)
 
 node->cm.sector_num = key->num;
 node->cm.nb_sectors = key->size;
+node->ref = 0;
+node->status = NODE_WAIT_STATUS;
+qemu_co_mutex_init(&node->lock);
 node->data = g_malloc(node->cm.nb_sectors << BDRV_SECTOR_BITS);
 
 return node;
@@ -206,6 +251,12 @@ static inline void *pcache_node_alloc(RbNodeKey* key)
 
 static void pcache_node_drop(BDRVPCacheState *s, PCNode *node)
 {
+uint32_t prev_status = atomic_xchg(&node->status, NODE_REMOVE_STATUS);
+if (prev_status == NODE_REMOVE_STATUS) {
+return;
+}
+assert(prev_status != NODE_GHOST_STATUS);
+
 atomic_sub(&s->pcache.curr_size, node->cm.nb_sectors);
 
 qemu_co_mutex_lock(&s->pcache.lru.lock);
@@ -216,7 +267,7 @@ static void pcache_node_drop(BDRVPCacheState *s, PCNode 
*node)
 rb_erase(&node->cm.rb_node, &s->pcache.tree.root);
 qemu_co_mutex_unlock(&s->pcache.tree.lock);
 
-pcache_node_free(node);
+pcache_node_unref(node);
 }
 
 static void pcache_try_shrink(BDRVPCacheState *s)
@@ -234,6 +285,30 @@ static void pcache_try_shrink(BDRVPCacheState *s)
 }
 }
 
+static PrefCachePartReq *pcache_req_get(PrefCacheAIOCB *acb, PCNode *node)
+{
+PrefCachePartReq *req = g_slice_alloc(sizeof(*req));
+
+req->nb_sectors = node->cm.nb_sectors;
+req->sector_num = node->cm.sector_num;
+req->node = node;
+req->acb = acb;
+
+assert(acb->sector_num <= node->cm.sector_num + node->cm.nb_sectors);
+
+qemu_iovec_init(&req->qiov, 1);
+qemu_iovec_add(&req->qiov, node->data,
+   node->cm.nb_sectors << BDRV_SECTOR_BITS);
+return req;
+}
+
+static inline void push_node_request(PrefCacheAIOCB *acb, PCNode *node)
+{
+PrefCachePartReq *req = pcache_req_get(acb, node);
+
+QTAILQ_INSERT_HEAD(&acb->requests.list, req, entry);
+}
+
 static inline void pcache_lru_node_up(BDRVPCacheState *s, PCNode *node)
 {
 qemu_co_mutex_lock(&s->pcache.lru.lock);

Re: [Qemu-devel] [PATCH RFC v2 09/22] block/pcache: separation AIOCB on requests

2016-09-02 Thread Kevin Wolf
Am 29.08.2016 um 19:10 hat Pavel Butsykin geschrieben:
> for case when the cache partially covers request we are part of the request
> is filled from the cache, and the other part request from disk. Also add
> reference counting for nodes, as way to maintain multithreading.
> 
> There is still no full synchronization in multithreaded mode.
> 
> Signed-off-by: Pavel Butsykin 
> ---
>  block/pcache.c | 169 
> -
>  1 file changed, 155 insertions(+), 14 deletions(-)
> 
> diff --git a/block/pcache.c b/block/pcache.c
> index 28bd056..6114289 100644
> --- a/block/pcache.c
> +++ b/block/pcache.c
> @@ -58,7 +58,10 @@ typedef struct BlockNode {
>  typedef struct PCNode {
>  BlockNode cm;
>  
> +uint32_t status;

I guess this is NODE_*_STATUS. Make it a named enum then instead of
uint32_t so that it's obvious what this field means.

> +uint32_t ref;
>  uint8_t  *data;
> +CoMutex  lock;
>  } PCNode;
>  
>  typedef struct ReqStor {
> @@ -95,9 +98,23 @@ typedef struct PrefCacheAIOCB {
>  uint64_t sector_num;
>  uint32_t nb_sectors;
>  int  aio_type;
> +struct {
> +QTAILQ_HEAD(req_head, PrefCachePartReq) list;
> +CoMutex lock;
> +} requests;
>  int  ret;
>  } PrefCacheAIOCB;
>  
> +typedef struct PrefCachePartReq {
> +uint64_t sector_num;
> +uint32_t nb_sectors;

Should be byte-based, like everything.

> +QEMUIOVector qiov;
> +PCNode *node;
> +PrefCacheAIOCB *acb;
> +QTAILQ_ENTRY(PrefCachePartReq) entry;
> +} PrefCachePartReq;
> +
>  static const AIOCBInfo pcache_aiocb_info = {
>  .aiocb_size = sizeof(PrefCacheAIOCB),
>  };
> @@ -126,8 +143,39 @@ static QemuOptsList runtime_opts = {
>  #define MB_BITS 20
>  #define PCACHE_DEFAULT_CACHE_SIZE (4 << MB_BITS)
>  
> +enum {
> +NODE_SUCCESS_STATUS = 0,
> +NODE_WAIT_STATUS= 1,
> +NODE_REMOVE_STATUS  = 2,
> +NODE_GHOST_STATUS   = 3 /* only for debugging */

NODE_DELETED_STATUS?

> +};
> +
>  #define PCNODE(_n) ((PCNode *)(_n))
>  
> +static inline void pcache_node_unref(PCNode *node)
> +{
> +assert(node->status == NODE_SUCCESS_STATUS ||
> +   node->status == NODE_REMOVE_STATUS);
> +
> +if (atomic_fetch_dec(&node->ref) == 0) {

Atomics imply concurrency, which we don't have.

> +assert(node->status == NODE_REMOVE_STATUS);
> +
> +node->status = NODE_GHOST_STATUS;
> +g_free(node->data);
> +g_slice_free1(sizeof(*node), node);

When you switch to plain g_malloc(), this needs to be updated.

> +}
> +}
> +
> +static inline PCNode *pcache_node_ref(PCNode *node)
> +{
> +assert(node->status == NODE_SUCCESS_STATUS ||
> +   node->status == NODE_WAIT_STATUS);
> +assert(atomic_read(&node->ref) == 0);/* XXX: only for sequential 
> requests */
> +atomic_inc(&node->ref);

Do you expect concurrent accesses or not? Because if you don't, there is
no need for atomics, but if you do, this is buggy because each of the
lines is atomic for itself, but the assertion isn't atomic with the
refcount increment.

A ref() function that can take only a single reference feels odd anyway
and this restriction seems to be lifted later. Why have it here?

> +
> +return node;
> +}

Kevin



Re: [Qemu-devel] [PATCH RFC v2 09/22] block/pcache: separation AIOCB on requests

2016-09-08 Thread Pavel Butsykin

On 02.09.2016 12:10, Kevin Wolf wrote:

Am 29.08.2016 um 19:10 hat Pavel Butsykin geschrieben:

for case when the cache partially covers request we are part of the request
is filled from the cache, and the other part request from disk. Also add
reference counting for nodes, as way to maintain multithreading.

There is still no full synchronization in multithreaded mode.

Signed-off-by: Pavel Butsykin 
---
  block/pcache.c | 169 -
  1 file changed, 155 insertions(+), 14 deletions(-)

diff --git a/block/pcache.c b/block/pcache.c
index 28bd056..6114289 100644
--- a/block/pcache.c
+++ b/block/pcache.c
@@ -58,7 +58,10 @@ typedef struct BlockNode {
  typedef struct PCNode {
  BlockNode cm;

+uint32_t status;


I guess this is NODE_*_STATUS. Make it a named enum then instead of
uint32_t so that it's obvious what this field means.


OK


+uint32_t ref;
  uint8_t  *data;
+CoMutex  lock;
  } PCNode;

  typedef struct ReqStor {
@@ -95,9 +98,23 @@ typedef struct PrefCacheAIOCB {
  uint64_t sector_num;
  uint32_t nb_sectors;
  int  aio_type;
+struct {
+QTAILQ_HEAD(req_head, PrefCachePartReq) list;
+CoMutex lock;
+} requests;
  int  ret;
  } PrefCacheAIOCB;

+typedef struct PrefCachePartReq {
+uint64_t sector_num;
+uint32_t nb_sectors;


Should be byte-based, like everything.


+QEMUIOVector qiov;
+PCNode *node;
+PrefCacheAIOCB *acb;
+QTAILQ_ENTRY(PrefCachePartReq) entry;
+} PrefCachePartReq;
+
  static const AIOCBInfo pcache_aiocb_info = {
  .aiocb_size = sizeof(PrefCacheAIOCB),
  };
@@ -126,8 +143,39 @@ static QemuOptsList runtime_opts = {
  #define MB_BITS 20
  #define PCACHE_DEFAULT_CACHE_SIZE (4 << MB_BITS)

+enum {
+NODE_SUCCESS_STATUS = 0,
+NODE_WAIT_STATUS= 1,
+NODE_REMOVE_STATUS  = 2,
+NODE_GHOST_STATUS   = 3 /* only for debugging */


NODE_DELETED_STATUS?


Yes :)


+};
+
  #define PCNODE(_n) ((PCNode *)(_n))

+static inline void pcache_node_unref(PCNode *node)
+{
+assert(node->status == NODE_SUCCESS_STATUS ||
+   node->status == NODE_REMOVE_STATUS);
+
+if (atomic_fetch_dec(&node->ref) == 0) {


Atomics imply concurrency, which we don't have.


+assert(node->status == NODE_REMOVE_STATUS);
+
+node->status = NODE_GHOST_STATUS;
+g_free(node->data);
+g_slice_free1(sizeof(*node), node);


When you switch to plain g_malloc(), this needs to be updated.


+}
+}
+
+static inline PCNode *pcache_node_ref(PCNode *node)
+{
+assert(node->status == NODE_SUCCESS_STATUS ||
+   node->status == NODE_WAIT_STATUS);
+assert(atomic_read(&node->ref) == 0);/* XXX: only for sequential requests 
*/
+atomic_inc(&node->ref);


Do you expect concurrent accesses or not? Because if you don't, there is
no need for atomics, but if you do, this is buggy because each of the
lines is atomic for itself, but the assertion isn't atomic with the
refcount increment.


Well, about concurrent accesses, we've already figured out.


A ref() function that can take only a single reference feels odd anyway
and this restriction seems to be lifted later. Why have it here?


No, this is a temporary assert(). In fact, it is not necessary, but the
assert helps to check the correct functioning on the current patch,
because not yet implemented reading of nodes and rescheduling requests.


+
+return node;
+}


Kevin