[PATCH v4 15/20] block/xen-blkfront: Make it running on 64KB page granularity

2015-09-07 Thread Julien Grall
The PV block protocol is using 4KB page granularity. The goal of this
patch is to allow a Linux using 64KB page granularity using block
device on a non-modified Xen.

The block API is using segment which should at least be the size of a
Linux page. Therefore, the driver will have to break the page in chunk
of 4K before giving the page to the backend.

When breaking a 64KB segment in 4KB chunks, it is possible that some
chunks are empty. As the PV protocol always require to have data in the
chunk, we have to count the number of Xen page which will be in use and
avoid sending empty chunks.

Note that, a pre-defined number of grants are reserved before preparing
the request. This pre-defined number is based on the number and the
maximum size of the segments. If each segment contains a very small
amount of data, the driver may reserve too many grants (16 grants is
reserved per segment with 64KB page granularity).

Furthermore, in the case of persistent grants we allocate one Linux page
per grant although only the first 4KB of the page will be effectively
in use. This could be improved by sharing the page with multiple grants.

Signed-off-by: Julien Grall 
Acked-by: Roger Pau Monné 

---
Cc: Konrad Rzeszutek Wilk 
Cc: Boris Ostrovsky 
Cc: David Vrabel 

Improvement such as support 64KB grant is not taken into consideration in
this patch because we have the requirement to run a Linux using 64KB page
on a non-modified Xen.

Changes in v4:
- Rebase after d50babbe300eedf33ea5b00a12c5df3a05bd96c7 "
xen-blkfront: introduce blkfront_gather_backend_features()"
- Fix typoes
- Add Roger's acked-by

Changes in v3:
- Use DIV_ROUND_UP in INDIRECT_GREFS
- Split lines over 80 characters whenever it's possible
- s/mfn/gfn/ based on the new naming
- The grant callback doesn't allow anymore to change the len
(wasn't used here).
- gnttab_foreach_grant has been renamed to gnttab_foreach_grant_in_range
- Use gnttab_count_grant to get the number of grants in a sg
- Do some renaming to use the correct variable every time

Changes in v2:
- Use gnttab_foreach_grant to split a Linux page into grant
---
 drivers/block/xen-blkfront.c | 324 ---
 1 file changed, 213 insertions(+), 111 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 4232cbd..f2cdc73 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -78,6 +78,7 @@ struct blk_shadow {
struct grant **grants_used;
struct grant **indirect_grants;
struct scatterlist *sg;
+   unsigned int num_sg;
 };
 
 struct split_bio {
@@ -107,8 +108,12 @@ static unsigned int xen_blkif_max_ring_order;
 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 
S_IRUGO);
 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for 
the shared ring");
 
-#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * 
(info)->nr_ring_pages)
-#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * 
XENBUS_MAX_RING_PAGES)
+#define BLK_RING_SIZE(info)\
+   __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
+
+#define BLK_MAX_RING_SIZE  \
+   __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+
 /*
  * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
  * characters are enough. Define to 20 to keep consist with backend.
@@ -147,6 +152,7 @@ struct blkfront_info
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned int feature_persistent:1;
+   /* Number of 4KB segments handled */
unsigned int max_indirect_segments;
int is_ready;
struct blk_mq_tag_set tag_set;
@@ -175,10 +181,23 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define DEV_NAME   "xvd"   /* name in /dev */
 
-#define SEGS_PER_INDIRECT_FRAME \
-   (PAGE_SIZE/sizeof(struct blkif_request_segment))
-#define INDIRECT_GREFS(_segs) \
-   ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+/*
+ * Grants are always the same size as a Xen page (i.e 4KB).
+ * A physical segment is always the same size as a Linux page.
+ * Number of grants per physical segment
+ */
+#define GRANTS_PER_PSEG(PAGE_SIZE / XEN_PAGE_SIZE)
+
+#define GRANTS_PER_INDIRECT_FRAME \
+   (XEN_PAGE_SIZE / sizeof(struct blkif_request_segment))
+
+#define PSEGS_PER_INDIRECT_FRAME   \
+   (GRANTS_INDIRECT_FRAME / GRANTS_PSEGS)
+
+#define INDIRECT_GREFS(_grants)\
+   DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME)
+
+#define GREFS(_psegs)  ((_psegs) * GRANTS_PER_PSEG)
 
 static int blkfront_setup_indirect(struct blkfront_info *info);
 static int blkfront_gather_backend_features(struct blkfront_info *info);
@@ -466,14 +485,100 @@ static int blkif_queue_discard_req(struct request *req)
return 0;
 }
 
+struct 

[PATCH v4 15/20] block/xen-blkfront: Make it running on 64KB page granularity

2015-09-07 Thread Julien Grall
The PV block protocol is using 4KB page granularity. The goal of this
patch is to allow a Linux using 64KB page granularity using block
device on a non-modified Xen.

The block API is using segment which should at least be the size of a
Linux page. Therefore, the driver will have to break the page in chunk
of 4K before giving the page to the backend.

When breaking a 64KB segment in 4KB chunks, it is possible that some
chunks are empty. As the PV protocol always require to have data in the
chunk, we have to count the number of Xen page which will be in use and
avoid sending empty chunks.

Note that, a pre-defined number of grants are reserved before preparing
the request. This pre-defined number is based on the number and the
maximum size of the segments. If each segment contains a very small
amount of data, the driver may reserve too many grants (16 grants is
reserved per segment with 64KB page granularity).

Furthermore, in the case of persistent grants we allocate one Linux page
per grant although only the first 4KB of the page will be effectively
in use. This could be improved by sharing the page with multiple grants.

Signed-off-by: Julien Grall 
Acked-by: Roger Pau Monné 

---
Cc: Konrad Rzeszutek Wilk 
Cc: Boris Ostrovsky 
Cc: David Vrabel 

Improvement such as support 64KB grant is not taken into consideration in
this patch because we have the requirement to run a Linux using 64KB page
on a non-modified Xen.

Changes in v4:
- Rebase after d50babbe300eedf33ea5b00a12c5df3a05bd96c7 "
xen-blkfront: introduce blkfront_gather_backend_features()"
- Fix typoes
- Add Roger's acked-by

Changes in v3:
- Use DIV_ROUND_UP in INDIRECT_GREFS
- Split lines over 80 characters whenever it's possible
- s/mfn/gfn/ based on the new naming
- The grant callback doesn't allow anymore to change the len
(wasn't used here).
- gnttab_foreach_grant has been renamed to gnttab_foreach_grant_in_range
- Use gnttab_count_grant to get the number of grants in a sg
- Do some renaming to use the correct variable every time

Changes in v2:
- Use gnttab_foreach_grant to split a Linux page into grant
---
 drivers/block/xen-blkfront.c | 324 ---
 1 file changed, 213 insertions(+), 111 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 4232cbd..f2cdc73 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -78,6 +78,7 @@ struct blk_shadow {
struct grant **grants_used;
struct grant **indirect_grants;
struct scatterlist *sg;
+   unsigned int num_sg;
 };
 
 struct split_bio {
@@ -107,8 +108,12 @@ static unsigned int xen_blkif_max_ring_order;
 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 
S_IRUGO);
 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for 
the shared ring");
 
-#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * 
(info)->nr_ring_pages)
-#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * 
XENBUS_MAX_RING_PAGES)
+#define BLK_RING_SIZE(info)\
+   __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
+
+#define BLK_MAX_RING_SIZE  \
+   __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+
 /*
  * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
  * characters are enough. Define to 20 to keep consist with backend.
@@ -147,6 +152,7 @@ struct blkfront_info
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned int feature_persistent:1;
+   /* Number of 4KB segments handled */
unsigned int max_indirect_segments;
int is_ready;
struct blk_mq_tag_set tag_set;
@@ -175,10 +181,23 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define DEV_NAME   "xvd"   /* name in /dev */
 
-#define SEGS_PER_INDIRECT_FRAME \
-   (PAGE_SIZE/sizeof(struct blkif_request_segment))
-#define INDIRECT_GREFS(_segs) \
-   ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+/*
+ * Grants are always the same size as a Xen page (i.e 4KB).
+ * A physical segment is always the same size as a Linux page.
+ * Number of grants per physical segment
+ */
+#define GRANTS_PER_PSEG(PAGE_SIZE / XEN_PAGE_SIZE)
+
+#define GRANTS_PER_INDIRECT_FRAME \
+   (XEN_PAGE_SIZE / sizeof(struct blkif_request_segment))
+
+#define PSEGS_PER_INDIRECT_FRAME   \
+   (GRANTS_INDIRECT_FRAME / GRANTS_PSEGS)
+
+#define INDIRECT_GREFS(_grants)\
+   DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME)
+
+#define GREFS(_psegs)  ((_psegs) * GRANTS_PER_PSEG)
 
 static int blkfront_setup_indirect(struct blkfront_info *info);
 static int blkfront_gather_backend_features(struct