[PATCH 04/30] drbd: Implement handling of thinly provisioned storage on resync target nodes

2016-06-13 Thread Philipp Reisner
If during resync we read only zeroes for a range of sectors assume
that these secotors can be discarded on the sync target node.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_int.h  |  5 +++
 drivers/block/drbd/drbd_main.c | 18 
 drivers/block/drbd/drbd_protocol.h |  4 ++
 drivers/block/drbd/drbd_receiver.c | 88 --
 drivers/block/drbd/drbd_worker.c   | 29 -
 5 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 33f0b82..9e338ec 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -471,6 +471,9 @@ enum {
/* this originates from application on peer
 * (not some resync or verify or other DRBD internal request) */
__EE_APPLICATION,
+
+   /* If it contains only 0 bytes, send back P_RS_DEALLOCATED */
+   __EE_RS_THIN_REQ,
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
@@ -485,6 +488,7 @@ enum {
 #define EE_SUBMITTED   (1<<__EE_SUBMITTED)
 #define EE_WRITE   (1<<__EE_WRITE)
 #define EE_APPLICATION (1<<__EE_APPLICATION)
+#define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ)
 
 /* flag bits per device */
 enum {
@@ -1123,6 +1127,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device 
*, sector_t sector, int
 extern int drbd_send_bitmap(struct drbd_device *device);
 extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv 
retcode);
 extern void conn_send_sr_reply(struct drbd_connection *connection, enum 
drbd_state_rv retcode);
+extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct 
drbd_peer_request *);
 extern void drbd_backing_dev_free(struct drbd_device *device, struct 
drbd_backing_dev *ldev);
 extern void drbd_device_cleanup(struct drbd_device *device);
 void drbd_print_uuids(struct drbd_device *device, const char *text);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 2891631..b0891c3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1377,6 +1377,22 @@ int drbd_send_ack_ex(struct drbd_peer_device 
*peer_device, enum drbd_packet cmd,
  cpu_to_be64(block_id));
 }
 
+int drbd_send_rs_deallocated(struct drbd_peer_device *peer_device,
+struct drbd_peer_request *peer_req)
+{
+   struct drbd_socket *sock;
+   struct p_block_desc *p;
+
+   sock = &peer_device->connection->data;
+   p = drbd_prepare_command(peer_device, sock);
+   if (!p)
+   return -EIO;
+   p->sector = cpu_to_be64(peer_req->i.sector);
+   p->blksize = cpu_to_be32(peer_req->i.size);
+   p->pad = 0;
+   return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, 
sizeof(*p), NULL, 0);
+}
+
 int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd,
   sector_t sector, int size, u64 block_id)
 {
@@ -3683,6 +3699,8 @@ const char *cmdname(enum drbd_packet cmd)
[P_CONN_ST_CHG_REPLY]   = "conn_st_chg_reply",
[P_RETRY_WRITE] = "retry_write",
[P_PROTOCOL_UPDATE] = "protocol_update",
+   [P_RS_THIN_REQ] = "rs_thin_req",
+   [P_RS_DEALLOCATED]  = "rs_deallocated",
 
/* enum drbd_packet, but not commands - obsoleted flags:
 *  P_MAY_IGNORE
diff --git a/drivers/block/drbd/drbd_protocol.h 
b/drivers/block/drbd/drbd_protocol.h
index 129f8c7..ce0e72c 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -60,6 +60,10 @@ enum drbd_packet {
 * which is why I chose TRIM here, to disambiguate. */
P_TRIM= 0x31,
 
+   /* Only use these two if both support FF_THIN_RESYNC */
+   P_RS_THIN_REQ = 0x32, /* Request a block for resync or reply 
P_RS_DEALLOCATED */
+   P_RS_DEALLOCATED  = 0x33, /* Contains only zeros on sync source 
node */
+
P_MAY_IGNORE  = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) 
... */
P_MAX_OPT_CMD = 0x101,
 
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index dcadea2..f5eef97 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1418,9 +1418,15 @@ int drbd_submit_peer_request(struct drbd_device *device,
 * so we can find it to present it in debugfs */
peer_req->submit_jif = jiffies;
peer_req->flags |= EE_SUBMITTED;
-   spin_lock_irq(&device->resource->req_lock);
-   list_add_tail(&peer_req->w.list, &device->active_ee);
-   spin_unlock_irq(&device->resource->req_lock);
+
+   /* If this was a resync request from receive_rs_deallocated(

[PATCH 04/30] drbd: Implement handling of thinly provisioned storage on resync target nodes

2016-06-13 Thread Philipp Reisner
If during resync we read only zeroes for a range of sectors assume
that these secotors can be discarded on the sync target node.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_int.h  |  5 +++
 drivers/block/drbd/drbd_main.c | 18 
 drivers/block/drbd/drbd_protocol.h |  4 ++
 drivers/block/drbd/drbd_receiver.c | 88 --
 drivers/block/drbd/drbd_worker.c   | 29 -
 5 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 7a1cf7e..1a93f4f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -471,6 +471,9 @@ enum {
/* this originates from application on peer
 * (not some resync or verify or other DRBD internal request) */
__EE_APPLICATION,
+
+   /* If it contains only 0 bytes, send back P_RS_DEALLOCATED */
+   __EE_RS_THIN_REQ,
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
@@ -485,6 +488,7 @@ enum {
 #define EE_SUBMITTED   (1<<__EE_SUBMITTED)
 #define EE_WRITE   (1<<__EE_WRITE)
 #define EE_APPLICATION (1<<__EE_APPLICATION)
+#define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ)
 
 /* flag bits per device */
 enum {
@@ -1123,6 +1127,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device 
*, sector_t sector, int
 extern int drbd_send_bitmap(struct drbd_device *device);
 extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv 
retcode);
 extern void conn_send_sr_reply(struct drbd_connection *connection, enum 
drbd_state_rv retcode);
+extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct 
drbd_peer_request *);
 extern void drbd_backing_dev_free(struct drbd_device *device, struct 
drbd_backing_dev *ldev);
 extern void drbd_device_cleanup(struct drbd_device *device);
 void drbd_print_uuids(struct drbd_device *device, const char *text);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 4c64cb9..dd2432e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1377,6 +1377,22 @@ int drbd_send_ack_ex(struct drbd_peer_device 
*peer_device, enum drbd_packet cmd,
  cpu_to_be64(block_id));
 }
 
+int drbd_send_rs_deallocated(struct drbd_peer_device *peer_device,
+struct drbd_peer_request *peer_req)
+{
+   struct drbd_socket *sock;
+   struct p_block_desc *p;
+
+   sock = &peer_device->connection->data;
+   p = drbd_prepare_command(peer_device, sock);
+   if (!p)
+   return -EIO;
+   p->sector = cpu_to_be64(peer_req->i.sector);
+   p->blksize = cpu_to_be32(peer_req->i.size);
+   p->pad = 0;
+   return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, 
sizeof(*p), NULL, 0);
+}
+
 int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd,
   sector_t sector, int size, u64 block_id)
 {
@@ -3681,6 +3697,8 @@ const char *cmdname(enum drbd_packet cmd)
[P_CONN_ST_CHG_REPLY]   = "conn_st_chg_reply",
[P_RETRY_WRITE] = "retry_write",
[P_PROTOCOL_UPDATE] = "protocol_update",
+   [P_RS_THIN_REQ] = "rs_thin_req",
+   [P_RS_DEALLOCATED]  = "rs_deallocated",
 
/* enum drbd_packet, but not commands - obsoleted flags:
 *  P_MAY_IGNORE
diff --git a/drivers/block/drbd/drbd_protocol.h 
b/drivers/block/drbd/drbd_protocol.h
index ef92453..e5e74e3 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -60,6 +60,10 @@ enum drbd_packet {
 * which is why I chose TRIM here, to disambiguate. */
P_TRIM= 0x31,
 
+   /* Only use these two if both support FF_THIN_RESYNC */
+   P_RS_THIN_REQ = 0x32, /* Request a block for resync or reply 
P_RS_DEALLOCATED */
+   P_RS_DEALLOCATED  = 0x33, /* Contains only zeros on sync source 
node */
+
P_MAY_IGNORE  = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) 
... */
P_MAX_OPT_CMD = 0x101,
 
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 8b30ab5..3a6c2ec 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1417,9 +1417,15 @@ int drbd_submit_peer_request(struct drbd_device *device,
 * so we can find it to present it in debugfs */
peer_req->submit_jif = jiffies;
peer_req->flags |= EE_SUBMITTED;
-   spin_lock_irq(&device->resource->req_lock);
-   list_add_tail(&peer_req->w.list, &device->active_ee);
-   spin_unlock_irq(&device->resource->req_lock);
+
+   /* If this was a resync request from receive_rs_deallocated(

Re: [Drbd-dev] [PATCH 04/30] drbd: Implement handling of thinly provisioned storage on resync target nodes

2016-04-25 Thread Philipp Reisner
Am Montag, 25. April 2016, 08:28:45 schrieb Bart Van Assche:
> On 04/25/2016 05:10 AM, Philipp Reisner wrote:
> > If during resync we read only zeroes for a range of sectors assume
> > that these secotors can be discarded on the sync target node.
> 
> Hello Phil,
> 
> With which interconnect(s) has this patch been tested? I'm afraid that
> for high-speed interconnects this patch will slow down I/O instead of
> making it faster because all_zero() examines all data before it is sent.
> 

Hi Bart,

that it might make things slower is true for sure. The benefit it
provides is to de-allocate blocks on the secondary obviously.
The whole feature is optional and it is off by default.

Obviously we want to have a generic interface like SEEK_HOLE/
SEEK_DATA for block devices, but that does not exist as of today.

best regards,
 Phil


Re: [Drbd-dev] [PATCH 04/30] drbd: Implement handling of thinly provisioned storage on resync target nodes

2016-04-25 Thread Bart Van Assche

On 04/25/2016 05:10 AM, Philipp Reisner wrote:

If during resync we read only zeroes for a range of sectors assume
that these secotors can be discarded on the sync target node.


Hello Phil,

With which interconnect(s) has this patch been tested? I'm afraid that 
for high-speed interconnects this patch will slow down I/O instead of 
making it faster because all_zero() examines all data before it is sent.


Bart.



[PATCH 04/30] drbd: Implement handling of thinly provisioned storage on resync target nodes

2016-04-25 Thread Philipp Reisner
If during resync we read only zeroes for a range of sectors assume
that these secotors can be discarded on the sync target node.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_int.h  |  5 +++
 drivers/block/drbd/drbd_main.c | 18 
 drivers/block/drbd/drbd_protocol.h |  4 ++
 drivers/block/drbd/drbd_receiver.c | 88 --
 drivers/block/drbd/drbd_worker.c   | 29 -
 5 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 7a1cf7e..1a93f4f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -471,6 +471,9 @@ enum {
/* this originates from application on peer
 * (not some resync or verify or other DRBD internal request) */
__EE_APPLICATION,
+
+   /* If it contains only 0 bytes, send back P_RS_DEALLOCATED */
+   __EE_RS_THIN_REQ,
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
@@ -485,6 +488,7 @@ enum {
 #define EE_SUBMITTED   (1<<__EE_SUBMITTED)
 #define EE_WRITE   (1<<__EE_WRITE)
 #define EE_APPLICATION (1<<__EE_APPLICATION)
+#define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ)
 
 /* flag bits per device */
 enum {
@@ -1123,6 +1127,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device 
*, sector_t sector, int
 extern int drbd_send_bitmap(struct drbd_device *device);
 extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv 
retcode);
 extern void conn_send_sr_reply(struct drbd_connection *connection, enum 
drbd_state_rv retcode);
+extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct 
drbd_peer_request *);
 extern void drbd_backing_dev_free(struct drbd_device *device, struct 
drbd_backing_dev *ldev);
 extern void drbd_device_cleanup(struct drbd_device *device);
 void drbd_print_uuids(struct drbd_device *device, const char *text);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 802d729..3cecc4f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1377,6 +1377,22 @@ int drbd_send_ack_ex(struct drbd_peer_device 
*peer_device, enum drbd_packet cmd,
  cpu_to_be64(block_id));
 }
 
+int drbd_send_rs_deallocated(struct drbd_peer_device *peer_device,
+struct drbd_peer_request *peer_req)
+{
+   struct drbd_socket *sock;
+   struct p_block_desc *p;
+
+   sock = &peer_device->connection->data;
+   p = drbd_prepare_command(peer_device, sock);
+   if (!p)
+   return -EIO;
+   p->sector = cpu_to_be64(peer_req->i.sector);
+   p->blksize = cpu_to_be32(peer_req->i.size);
+   p->pad = 0;
+   return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, 
sizeof(*p), NULL, 0);
+}
+
 int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd,
   sector_t sector, int size, u64 block_id)
 {
@@ -3681,6 +3697,8 @@ const char *cmdname(enum drbd_packet cmd)
[P_CONN_ST_CHG_REPLY]   = "conn_st_chg_reply",
[P_RETRY_WRITE] = "retry_write",
[P_PROTOCOL_UPDATE] = "protocol_update",
+   [P_RS_THIN_REQ] = "rs_thin_req",
+   [P_RS_DEALLOCATED]  = "rs_deallocated",
 
/* enum drbd_packet, but not commands - obsoleted flags:
 *  P_MAY_IGNORE
diff --git a/drivers/block/drbd/drbd_protocol.h 
b/drivers/block/drbd/drbd_protocol.h
index ef92453..e5e74e3 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -60,6 +60,10 @@ enum drbd_packet {
 * which is why I chose TRIM here, to disambiguate. */
P_TRIM= 0x31,
 
+   /* Only use these two if both support FF_THIN_RESYNC */
+   P_RS_THIN_REQ = 0x32, /* Request a block for resync or reply 
P_RS_DEALLOCATED */
+   P_RS_DEALLOCATED  = 0x33, /* Contains only zeros on sync source 
node */
+
P_MAY_IGNORE  = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) 
... */
P_MAX_OPT_CMD = 0x101,
 
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 8b30ab5..3a6c2ec 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1417,9 +1417,15 @@ int drbd_submit_peer_request(struct drbd_device *device,
 * so we can find it to present it in debugfs */
peer_req->submit_jif = jiffies;
peer_req->flags |= EE_SUBMITTED;
-   spin_lock_irq(&device->resource->req_lock);
-   list_add_tail(&peer_req->w.list, &device->active_ee);
-   spin_unlock_irq(&device->resource->req_lock);
+
+   /* If this was a resync request from receive_rs_deallocated(