Since writes are buffered in memory, incoming reads must retrieve
buffered pages instead of submitting the I/O to the media.

This patch implements this logic. When a read bio arrives to rrpc, valid
pages from the flash blocks residing in memory are copied. If there are
any "holes" in the bio, a new bio is submitted to the media to retrieve
the necessary pages. The original bio is updated accordingly.

Signed-off-by: Javier González <jav...@cnexlabs.com>
---
 drivers/lightnvm/rrpc.c  | 451 ++++++++++++++++++++++++++++++++++++-----------
 include/linux/lightnvm.h |   1 +
 2 files changed, 346 insertions(+), 106 deletions(-)

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index e9fb19d..6348d52 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -827,10 +827,13 @@ static void rrpc_end_io(struct nvm_rq *rqd)
        struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
        uint8_t nr_pages = rqd->nr_pages;
 
-       if (bio_data_dir(rqd->bio) == WRITE)
+       if (bio_data_dir(rqd->bio) == WRITE) {
                rrpc_end_io_write(rrpc, rqd, nr_pages);
-       else
+       } else {
+               if (rqd->flags & NVM_IOTYPE_SYNC)
+                       return;
                rrpc_end_io_read(rrpc, rqd, nr_pages);
+       }
 
        bio_put(rqd->bio);
 
@@ -842,83 +845,6 @@ static void rrpc_end_io(struct nvm_rq *rqd)
        mempool_free(rqd, rrpc->rq_pool);
 }
 
-static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
-                       struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
-                       unsigned long flags, int nr_pages)
-{
-       struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
-       struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
-       struct rrpc_addr *gp;
-       sector_t laddr = rrpc_get_laddr(bio);
-       int is_gc = flags & NVM_IOTYPE_GC;
-       int i;
-
-       if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
-               nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
-               mempool_free(rrqd, rrpc->rrq_pool);
-               mempool_free(rqd, rrpc->rq_pool);
-               return NVM_IO_REQUEUE;
-       }
-
-       for (i = 0; i < nr_pages; i++) {
-               /* We assume that mapping occurs at 4KB granularity */
-               BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
-               gp = &rrpc->trans_map[laddr + i];
-
-               if (gp->rblk) {
-                       rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
-                                                               gp->addr);
-               } else {
-                       BUG_ON(is_gc);
-                       rrpc_unlock_laddr(rrpc, r);
-                       nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
-                                                       rqd->dma_ppa_list);
-                       mempool_free(rrqd, rrpc->rrq_pool);
-                       mempool_free(rqd, rrpc->rq_pool);
-                       return NVM_IO_DONE;
-               }
-
-               brrqd[i].addr = gp;
-       }
-
-       rqd->opcode = NVM_OP_HBREAD;
-
-       return NVM_IO_OK;
-}
-
-static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
-                                                       unsigned long flags)
-{
-       struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
-       int is_gc = flags & NVM_IOTYPE_GC;
-       sector_t laddr = rrpc_get_laddr(bio);
-       struct rrpc_addr *gp;
-
-       if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
-               mempool_free(rrqd, rrpc->rrq_pool);
-               mempool_free(rqd, rrpc->rq_pool);
-               return NVM_IO_REQUEUE;
-       }
-
-       BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
-       gp = &rrpc->trans_map[laddr];
-
-       if (gp->rblk) {
-               rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
-       } else {
-               BUG_ON(is_gc);
-               rrpc_unlock_rq(rrpc, rrqd);
-               mempool_free(rrqd, rrpc->rrq_pool);
-               mempool_free(rqd, rrpc->rq_pool);
-               return NVM_IO_DONE;
-       }
-
-       rqd->opcode = NVM_OP_HBREAD;
-       rrqd->addr = gp;
-
-       return NVM_IO_OK;
-}
-
 /*
  * Copy data from current bio to block write buffer. This if necessary
  * to guarantee durability if a flash block becomes bad before all pages
@@ -1051,14 +977,335 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct bio 
*bio,
        return NVM_IO_DONE;
 }
 
+static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
+                               struct rrpc_rq *rrqd, unsigned long flags)
+{
+       uint8_t nr_pages = rrpc_get_pages(bio);
+
+       rrqd->nr_pages = nr_pages;
+
+       if (nr_pages > 1)
+               return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
+       else
+               return rrpc_write_rq(rrpc, bio, rrqd, flags);
+}
+
+static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
+                       struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
+                       unsigned long flags, int nr_pages)
+{
+       struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+       struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
+       struct rrpc_addr *gp;
+       sector_t laddr = rrpc_get_laddr(bio);
+       int is_gc = flags & NVM_IOTYPE_GC;
+       int i;
+
+       if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
+               nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
+               return NVM_IO_REQUEUE;
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               /* We assume that mapping occurs at 4KB granularity */
+               BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
+               gp = &rrpc->trans_map[laddr + i];
+
+               if (gp->rblk) {
+                       rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
+                                                               gp->addr);
+               } else {
+                       BUG_ON(is_gc);
+                       rrpc_unlock_laddr(rrpc, r);
+                       nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
+                                                       rqd->dma_ppa_list);
+                       return NVM_IO_DONE;
+               }
+
+               brrqd[i].addr = gp;
+       }
+
+       rqd->opcode = NVM_OP_HBREAD;
+
+       return NVM_IO_OK;
+}
+
+static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
+                                                       unsigned long flags)
+{
+       struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+       int is_gc = flags & NVM_IOTYPE_GC;
+       sector_t laddr = rrpc_get_laddr(bio);
+       struct rrpc_addr *gp;
+
+       if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd))
+               return NVM_IO_REQUEUE;
+
+       BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
+       gp = &rrpc->trans_map[laddr];
+
+       if (gp->rblk) {
+               rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
+       } else {
+               BUG_ON(is_gc);
+               rrpc_unlock_rq(rrpc, rrqd);
+               return NVM_IO_DONE;
+       }
+
+       rqd->opcode = NVM_OP_HBREAD;
+       rrqd->addr = gp;
+
+       return NVM_IO_OK;
+}
+
+static int rrpc_read_w_buf_entry(struct bio *bio, struct rrpc_block *rblk,
+                                       struct bvec_iter iter, int entry)
+{
+       struct buf_entry *read_entry;
+       struct bio_vec bv;
+       struct page *page;
+       void *kaddr;
+       void *data;
+       int read = 0;
+
+       lockdep_assert_held(&rblk->w_buf.s_lock);
+
+       spin_lock(&rblk->w_buf.w_lock);
+       if (entry >= rblk->w_buf.cur_mem) {
+               spin_unlock(&rblk->w_buf.w_lock);
+               goto out;
+       }
+       spin_unlock(&rblk->w_buf.w_lock);
+
+       read_entry = &rblk->w_buf.entries[entry];
+       data = read_entry->data;
+
+       bv = bio_iter_iovec(bio, iter);
+       page = bv.bv_page;
+       kaddr = kmap_atomic(page);
+       memcpy(kaddr + bv.bv_offset, data, RRPC_EXPOSED_PAGE_SIZE);
+       kunmap_atomic(kaddr);
+       read++;
+
+out:
+       return read;
+}
+
+static int rrpc_read_from_w_buf(struct rrpc *rrpc, struct nvm_rq *rqd,
+                       struct rrpc_buf_rq *brrqd, unsigned long *read_bitmap)
+{
+       struct nvm_dev *dev = rrpc->dev;
+       struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+       struct rrpc_addr *addr;
+       struct bio *bio = rqd->bio;
+       struct bvec_iter iter = bio->bi_iter;
+       struct rrpc_block *rblk;
+       unsigned long blk_id;
+       int nr_pages = rqd->nr_pages;
+       int left = nr_pages;
+       int read = 0;
+       int entry;
+       int i;
+
+       if (nr_pages != bio->bi_vcnt)
+               goto out;
+
+       if (nr_pages == 1) {
+               rblk = rrqd->addr->rblk;
+
+               /* If the write buffer exists, the block is open in memory */
+               spin_lock(&rblk->w_buf.s_lock);
+               atomic_inc(&rblk->w_buf.refs);
+               if (rblk->w_buf.entries) {
+                       blk_id = rblk->parent->id;
+                       entry = rrqd->addr->addr -
+                               (blk_id * dev->sec_per_pg * dev->pgs_per_blk);
+
+                       read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
+
+                       left -= read;
+                       WARN_ON(test_and_set_bit(0, read_bitmap));
+               }
+               bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
+
+               atomic_dec(&rblk->w_buf.refs);
+               spin_unlock(&rblk->w_buf.s_lock);
+
+               goto out;
+       }
+
+       /* Iterate through all pages and copy those that are found in the write
+        * buffer. We will complete the holes (if any) with a intermediate bio
+        * later on
+        */
+       for (i = 0; i < nr_pages; i++) {
+               addr = brrqd[i].addr;
+               rblk = addr->rblk;
+
+               /* If the write buffer exists, the block is open in memory */
+               spin_lock(&rblk->w_buf.s_lock);
+               atomic_inc(&rblk->w_buf.refs);
+               if (rblk->w_buf.entries) {
+                       blk_id = rblk->parent->id;
+                       entry = addr->addr - (blk_id * dev->sec_per_pg *
+                                                       dev->pgs_per_blk);
+
+                       read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
+
+                       left -= read;
+                       WARN_ON(test_and_set_bit(i, read_bitmap));
+               }
+               bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
+
+               atomic_dec(&rblk->w_buf.refs);
+               spin_unlock(&rblk->w_buf.s_lock);
+       }
+
+out:
+       return left;
+}
+
+static int rrpc_submit_read_io(struct rrpc *rrpc, struct bio *bio,
+                               struct nvm_rq *rqd, unsigned long flags)
+{
+       struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+       int err;
+
+       err = nvm_submit_io(rrpc->dev, rqd);
+       if (err) {
+               pr_err("rrpc: I/O submission failed: %d\n", err);
+               bio_put(bio);
+               if (!(flags & NVM_IOTYPE_GC)) {
+                       rrpc_unlock_rq(rrpc, rrqd);
+                       if (rqd->nr_pages > 1)
+                               nvm_dev_dma_free(rrpc->dev,
+                       rqd->ppa_list, rqd->dma_ppa_list);
+               }
+               return NVM_IO_ERR;
+       }
+
+       return NVM_IO_OK;
+}
+
+static int rrpc_fill_partial_read_bio(struct rrpc *rrpc, struct bio *bio,
+                               unsigned long *read_bitmap, struct nvm_rq *rqd,
+                               struct rrpc_buf_rq *brrqd, uint8_t nr_pages)
+{
+       struct bio *new_bio;
+       struct page *page;
+       struct bio_vec src_bv, dst_bv;
+       void *src_p, *dst_p;
+       int nr_holes = nr_pages - bitmap_weight(read_bitmap, nr_pages);
+       int hole;
+       int i = 0;
+       int ret;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       new_bio = bio_alloc(GFP_KERNEL, nr_holes);
+       if (!new_bio) {
+               pr_err("nvm: rrpc: could not alloc read bio\n");
+               return NVM_IO_ERR;
+       }
+
+       hole = find_first_zero_bit(read_bitmap, nr_pages);
+       do {
+               page = mempool_alloc(rrpc->page_pool, GFP_KERNEL);
+               if (!page) {
+                       bio_put(new_bio);
+                       pr_err("nvm: rrpc: could not alloc read page\n");
+                       goto err;
+               }
+
+               ret = bio_add_page(new_bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
+               if (ret != RRPC_EXPOSED_PAGE_SIZE) {
+                       pr_err("nvm: rrpc: could not add page to bio\n");
+                       mempool_free(page, rrpc->page_pool);
+                       goto err;
+               }
+
+               rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
+                                                       brrqd[hole].addr->addr);
+
+               i++;
+               hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
+       } while (hole != nr_pages);
+
+       if (nr_holes != new_bio->bi_vcnt) {
+               pr_err("rrpc: malformed bio\n");
+               goto err;
+       }
+
+       new_bio->bi_iter.bi_sector = bio->bi_iter.bi_sector;
+       new_bio->bi_rw = READ;
+       new_bio->bi_private = &wait;
+       new_bio->bi_end_io = rrpc_end_sync_bio;
+
+       rqd->flags |= NVM_IOTYPE_SYNC;
+       rqd->bio = new_bio;
+       rqd->nr_pages = nr_holes;
+
+       rrpc_submit_read_io(rrpc, new_bio, rqd, rqd->flags);
+       wait_for_completion_io(&wait);
+
+       if (new_bio->bi_error)
+               goto err;
+
+       /* Fill the holes in the original bio */
+       i = 0;
+       hole = find_first_zero_bit(read_bitmap, nr_pages);
+       do {
+               src_bv = new_bio->bi_io_vec[i];
+               dst_bv = bio->bi_io_vec[hole];
+
+               src_p = kmap_atomic(src_bv.bv_page);
+               dst_p = kmap_atomic(dst_bv.bv_page);
+
+               memcpy(dst_p + dst_bv.bv_offset,
+                       src_p + src_bv.bv_offset,
+                       RRPC_EXPOSED_PAGE_SIZE);
+
+               kunmap_atomic(src_p);
+               kunmap_atomic(dst_p);
+
+               mempool_free(&src_bv.bv_page, rrpc->page_pool);
+
+               i++;
+               hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
+       } while (hole != nr_pages);
+
+       bio_put(new_bio);
+
+       /* Complete the original bio and associated request */
+       rqd->flags &= ~NVM_IOTYPE_SYNC;
+       rqd->bio = bio;
+       rqd->nr_pages = nr_pages;
+
+       bio_endio(bio);
+       rrpc_end_io(rqd);
+       return NVM_IO_OK;
+
+err:
+       /* Free allocated pages in new bio */
+       for (i = 0; i < new_bio->bi_vcnt; i++) {
+               src_bv = new_bio->bi_io_vec[i];
+               mempool_free(&src_bv.bv_page, rrpc->page_pool);
+       }
+       bio_endio(new_bio);
+       return NVM_IO_ERR;
+}
+
 static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
                                struct rrpc_rq *rrqd, unsigned long flags)
 {
        struct nvm_rq *rqd;
        struct rrpc_buf_rq brrqd[rrpc->max_write_pgs];
+       unsigned long read_bitmap; /* Max 64 ppas per request */
+       uint8_t left;
        uint8_t nr_pages = rrpc_get_pages(bio);
        int err;
 
+       bitmap_zero(&read_bitmap, nr_pages);
+
        rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
        if (!rqd) {
                pr_err_ratelimited("rrpc: not able to queue bio.");
@@ -1073,22 +1320,25 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct 
bio *bio,
                                                &rqd->dma_ppa_list);
                if (!rqd->ppa_list) {
                        pr_err("rrpc: not able to allocate ppa list\n");
-                       mempool_free(rrqd, rrpc->rrq_pool);
                        mempool_free(rqd, rrpc->rq_pool);
+                       mempool_free(rrqd, rrpc->rrq_pool);
                        return NVM_IO_ERR;
                }
 
                err = rrpc_read_ppalist_rq(rrpc, bio, rqd, brrqd, flags,
                                                                nr_pages);
                if (err) {
-                       mempool_free(rrqd, rrpc->rrq_pool);
                        mempool_free(rqd, rrpc->rq_pool);
+                       mempool_free(rrqd, rrpc->rrq_pool);
                        return err;
                }
        } else {
                err = rrpc_read_rq(rrpc, bio, rqd, flags);
-               if (err)
+               if (err) {
+                       mempool_free(rrqd, rrpc->rrq_pool);
+                       mempool_free(rqd, rrpc->rq_pool);
                        return err;
+               }
        }
 
        bio_get(bio);
@@ -1097,33 +1347,22 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct 
bio *bio,
        rqd->nr_pages = rrqd->nr_pages = nr_pages;
        rqd->flags = flags;
 
-       err = nvm_submit_io(rrpc->dev, rqd);
-       if (err) {
-               pr_err("rrpc: I/O submission failed: %d\n", err);
-               bio_put(bio);
-               if (!(flags & NVM_IOTYPE_GC)) {
-                       rrpc_unlock_rq(rrpc, rrqd);
-                       if (rqd->nr_pages > 1)
-                               nvm_dev_dma_free(rrpc->dev,
-                       rqd->ppa_list, rqd->dma_ppa_list);
-               }
+       left = rrpc_read_from_w_buf(rrpc, rqd, brrqd, &read_bitmap);
+       if (left == 0) {
+               bio_endio(bio);
+               rrpc_end_io(rqd);
+               return NVM_IO_OK;
+       } else if (left < 0)
                return NVM_IO_ERR;
-       }
 
-       return NVM_IO_OK;
-}
+       if (bitmap_empty(&read_bitmap, nr_pages))
+               return rrpc_submit_read_io(rrpc, bio, rqd, flags);
 
-static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
-                               struct rrpc_rq *rrqd, unsigned long flags)
-{
-       uint8_t nr_pages = rrpc_get_pages(bio);
-
-       rrqd->nr_pages = nr_pages;
-
-       if (nr_pages > 1)
-               return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
-       else
-               return rrpc_write_rq(rrpc, bio, rrqd, flags);
+       /* The read bio could not be completely read from the write buffer. This
+        * case only occurs when several pages are sent in a single bio
+        */
+       return rrpc_fill_partial_read_bio(rrpc, bio, &read_bitmap, rqd, brrqd,
+                                                               nr_pages);
 }
 
 static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index eda9743..ae26ced 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -11,6 +11,7 @@ enum {
 
        NVM_IOTYPE_NONE = 0,
        NVM_IOTYPE_GC = 1,
+       NVM_IOTYPE_SYNC = 2,
 };
 
 #define NVM_BLK_BITS (16)
-- 
2.1.4

Reply via email to