Re: [PATCH 01/28] ibtrs: add header shared between ibtrs_client and ibtrs_server

2017-03-24 Thread Jinpu Wang
On Fri, Mar 24, 2017 at 3:31 PM, Johannes Thumshirn  wrote:
> On Fri, Mar 24, 2017 at 01:54:04PM +0100, Jinpu Wang wrote:
>> >> +
>> >> +#define XX(a) case (a): return #a
>> >
>> > please no macros with retun in them and XX isn't quite too descriptive as
>> > well.
>> >
>> > [...]
>> >
>> >> +static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
>> >> +{
>> >> + switch (opcode) {
>> >> + XX(IB_WC_SEND);
>> >> + XX(IB_WC_RDMA_WRITE);
>> >> + XX(IB_WC_RDMA_READ);
>> >> + XX(IB_WC_COMP_SWAP);
>> >> + XX(IB_WC_FETCH_ADD);
>> >> + /* recv-side); inbound completion */
>> >> + XX(IB_WC_RECV);
>> >> + XX(IB_WC_RECV_RDMA_WITH_IMM);
>> >> + default: return "IB_WC_OPCODE_UNKNOWN";
>> >> + }
>> >> +}
>> >
>> > How about:
>> >
>> > struct {
>> > char *name;
>> > enum ib_wc_opcode opcode;
>> > } ib_wc_opcode_table[] = {
>> > { stringyfy(IB_WC_SEND), IB_WC_SEND },
>> > { stringyfy(IB_WC_RDMA_WRITE), IB_WC_RDMA_WRITE },
>> > { stringyfy(IB_WC_RDMA_READ ), IB_WC_RDMA_READ }
>> > { stringyfy(IB_WC_COMP_SWAP), IB_WC_COMP_SWAP },
>> > { stringyfy(IB_WC_FETCH_ADD), IB_WC_FETCH_ADD },
>> > { stringyfy(IB_WC_RECV), IB_WC_RECV },
>> > { stringyfy(IB_WC_RECV_RDMA_WITH_IMM), IB_WC_RECV_RDMA_WITH_IMM },
>> > { NULL, 0 },
>> > };
>> >
>> > static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
>> > {
>> > int i;
>> >
>> > for (i = 0; i < ARRAY_SIZE(ib_wc_opcode_table); i++)
>> > if (ib_wc_opcode_table[i].opcode == opcode)
>> > return ib_wc_opcode_table[i].name;
>> >
>> > return "IB_WC_OPCODE_UNKNOWN";
>> > }
>> >
>> Looks nice, might be better to put it into ib_verbs.h?
>
> Probably yes, as are your kvec functions for lib/iov_iter.c
Thanks, will do in next round!

>
> [...]
>
>> > What about resolving the kernel bug instead of making workarounds?
>> I tried to send a patch upsteam, but was rejected by Sean.
>> http://www.spinics.net/lists/linux-rdma/msg22381.html
>>
>
> I don't see a NACK in this thread.
>
> From http://www.spinics.net/lists/linux-rdma/msg22410.html:
> "The port space (which maps to the service ID) needs to be included as part of
> the check that determines the format of the private data, and not simply the
> address family."
>
> After such a state I would have expected to see a v2 of the patch with above
> comment addressed.
I might busy with other staff at that time, I will check again and
revisit the bug.

>
> Byte,
> Johannes
> --
> Johannes Thumshirn  Storage
> jthumsh...@suse.de+49 911 74053 689
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)
> Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

Regards,
-- 
Jack Wang
Linux Kernel Developer

ProfitBricks GmbH
Greifswalder Str. 207
D - 10405 Berlin

Tel:   +49 30 577 008  042
Fax:  +49 30 577 008 299
Email:jinpu.w...@profitbricks.com
URL:  https://www.profitbricks.de

Sitz der Gesellschaft: Berlin
Registergericht: Amtsgericht Charlottenburg, HRB 125506 B
Geschäftsführer: Achim Weiss


Re: [PATCH 01/28] ibtrs: add header shared between ibtrs_client and ibtrs_server

2017-03-24 Thread Johannes Thumshirn
On Fri, Mar 24, 2017 at 01:54:04PM +0100, Jinpu Wang wrote:
> >> +
> >> +#define XX(a) case (a): return #a
> >
> > please no macros with retun in them and XX isn't quite too descriptive as
> > well.
> >
> > [...]
> >
> >> +static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
> >> +{
> >> + switch (opcode) {
> >> + XX(IB_WC_SEND);
> >> + XX(IB_WC_RDMA_WRITE);
> >> + XX(IB_WC_RDMA_READ);
> >> + XX(IB_WC_COMP_SWAP);
> >> + XX(IB_WC_FETCH_ADD);
> >> + /* recv-side); inbound completion */
> >> + XX(IB_WC_RECV);
> >> + XX(IB_WC_RECV_RDMA_WITH_IMM);
> >> + default: return "IB_WC_OPCODE_UNKNOWN";
> >> + }
> >> +}
> >
> > How about:
> >
> > struct {
> > char *name;
> > enum ib_wc_opcode opcode;
> > } ib_wc_opcode_table[] = {
> > { stringyfy(IB_WC_SEND), IB_WC_SEND },
> > { stringyfy(IB_WC_RDMA_WRITE), IB_WC_RDMA_WRITE },
> > { stringyfy(IB_WC_RDMA_READ ), IB_WC_RDMA_READ }
> > { stringyfy(IB_WC_COMP_SWAP), IB_WC_COMP_SWAP },
> > { stringyfy(IB_WC_FETCH_ADD), IB_WC_FETCH_ADD },
> > { stringyfy(IB_WC_RECV), IB_WC_RECV },
> > { stringyfy(IB_WC_RECV_RDMA_WITH_IMM), IB_WC_RECV_RDMA_WITH_IMM },
> > { NULL, 0 },
> > };
> >
> > static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
> > {
> > int i;
> >
> > for (i = 0; i < ARRAY_SIZE(ib_wc_opcode_table); i++)
> > if (ib_wc_opcode_table[i].opcode == opcode)
> > return ib_wc_opcode_table[i].name;
> >
> > return "IB_WC_OPCODE_UNKNOWN";
> > }
> >
> Looks nice, might be better to put it into ib_verbs.h?

Probably yes, as are your kvec functions for lib/iov_iter.c

[...]

> > What about resolving the kernel bug instead of making workarounds?
> I tried to send a patch upsteam, but was rejected by Sean.
> http://www.spinics.net/lists/linux-rdma/msg22381.html
> 

I don't see a NACK in this thread.

>From http://www.spinics.net/lists/linux-rdma/msg22410.html:
"The port space (which maps to the service ID) needs to be included as part of
the check that determines the format of the private data, and not simply the
address family." 

After such a state I would have expected to see a v2 of the patch with above
comment addressed.

Byte,
Johannes
-- 
Johannes Thumshirn  Storage
jthumsh...@suse.de+49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850


Re: [PATCH 01/28] ibtrs: add header shared between ibtrs_client and ibtrs_server

2017-03-24 Thread Jinpu Wang
>> +
>> +#define XX(a) case (a): return #a
>
> please no macros with retun in them and XX isn't quite too descriptive as
> well.
>
> [...]
>
>> +static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
>> +{
>> + switch (opcode) {
>> + XX(IB_WC_SEND);
>> + XX(IB_WC_RDMA_WRITE);
>> + XX(IB_WC_RDMA_READ);
>> + XX(IB_WC_COMP_SWAP);
>> + XX(IB_WC_FETCH_ADD);
>> + /* recv-side); inbound completion */
>> + XX(IB_WC_RECV);
>> + XX(IB_WC_RECV_RDMA_WITH_IMM);
>> + default: return "IB_WC_OPCODE_UNKNOWN";
>> + }
>> +}
>
> How about:
>
> struct {
> char *name;
> enum ib_wc_opcode opcode;
> } ib_wc_opcode_table[] = {
> { stringyfy(IB_WC_SEND), IB_WC_SEND },
> { stringyfy(IB_WC_RDMA_WRITE), IB_WC_RDMA_WRITE },
> { stringyfy(IB_WC_RDMA_READ ), IB_WC_RDMA_READ }
> { stringyfy(IB_WC_COMP_SWAP), IB_WC_COMP_SWAP },
> { stringyfy(IB_WC_FETCH_ADD), IB_WC_FETCH_ADD },
> { stringyfy(IB_WC_RECV), IB_WC_RECV },
> { stringyfy(IB_WC_RECV_RDMA_WITH_IMM), IB_WC_RECV_RDMA_WITH_IMM },
> { NULL, 0 },
> };
>
> static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
> {
> int i;
>
> for (i = 0; i < ARRAY_SIZE(ib_wc_opcode_table); i++)
> if (ib_wc_opcode_table[i].opcode == opcode)
> return ib_wc_opcode_table[i].name;
>
> return "IB_WC_OPCODE_UNKNOWN";
> }
>
Looks nice, might be better to put it into ib_verbs.h?

>
> [...]
>
>> +/**
>> + * struct ibtrs_msg_hdr - Common header of all IBTRS messages
>> + * @type:Message type, valid values see: enum ibtrs_msg_types
>> + * @tsize:   Total size of transferred data
>> + *
>> + * Don't move the first 8 padding bytes! It's a workaround for a kernel bug.
>> + * See IBNBD-610 for details
>
> What about resolving the kernel bug instead of making workarounds?
I tried to send a patch upsteam, but was rejected by Sean.
http://www.spinics.net/lists/linux-rdma/msg22381.html

>
>> + *
>> + * DO NOT CHANGE!
>> + */
>> +struct ibtrs_msg_hdr {
>> + u8  __padding1;
>> + u8  type;
>> + u16 __padding2;
>> + u32 tsize;
>> +};
>
> [...]
>
> --
> Johannes Thumshirn  Storage
> jthumsh...@suse.de+49 911 74053 689
> SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
> GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)
> Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

Thanks Johannes for review.


-- 
Jack Wang
Linux Kernel Developer

ProfitBricks GmbH
Greifswalder Str. 207
D - 10405 Berlin

Tel:   +49 30 577 008  042
Fax:  +49 30 577 008 299
Email:jinpu.w...@profitbricks.com
URL:  https://www.profitbricks.de

Sitz der Gesellschaft: Berlin
Registergericht: Amtsgericht Charlottenburg, HRB 125506 B
Geschäftsführer: Achim Weiss


Re: [PATCH 01/28] ibtrs: add header shared between ibtrs_client and ibtrs_server

2017-03-24 Thread Johannes Thumshirn
On Fri, Mar 24, 2017 at 11:45:16AM +0100, Jack Wang wrote:
> From: Jack Wang 
> 
> Signed-off-by: Jack Wang 
> Signed-off-by: Kleber Souza 
> Signed-off-by: Danil Kipnis 
> Signed-off-by: Roman Pen 
> ---

[...]

> +
> +#define XX(a) case (a): return #a

please no macros with retun in them and XX isn't quite too descriptive as
well.

[...]

> +static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
> +{
> + switch (opcode) {
> + XX(IB_WC_SEND);
> + XX(IB_WC_RDMA_WRITE);
> + XX(IB_WC_RDMA_READ);
> + XX(IB_WC_COMP_SWAP);
> + XX(IB_WC_FETCH_ADD);
> + /* recv-side); inbound completion */
> + XX(IB_WC_RECV);
> + XX(IB_WC_RECV_RDMA_WITH_IMM);
> + default: return "IB_WC_OPCODE_UNKNOWN";
> + }
> +}

How about:

struct {
char *name;
enum ib_wc_opcode opcode;
} ib_wc_opcode_table[] = {
{ stringyfy(IB_WC_SEND), IB_WC_SEND },
{ stringyfy(IB_WC_RDMA_WRITE), IB_WC_RDMA_WRITE },
{ stringyfy(IB_WC_RDMA_READ ), IB_WC_RDMA_READ }
{ stringyfy(IB_WC_COMP_SWAP), IB_WC_COMP_SWAP },
{ stringyfy(IB_WC_FETCH_ADD), IB_WC_FETCH_ADD },
{ stringyfy(IB_WC_RECV), IB_WC_RECV },
{ stringyfy(IB_WC_RECV_RDMA_WITH_IMM), IB_WC_RECV_RDMA_WITH_IMM },
{ NULL, 0 },
};

static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
{
int i;

for (i = 0; i < ARRAY_SIZE(ib_wc_opcode_table); i++)
if (ib_wc_opcode_table[i].opcode == opcode)
return ib_wc_opcode_table[i].name;

return "IB_WC_OPCODE_UNKNOWN";
}


[...]

> +/**
> + * struct ibtrs_msg_hdr - Common header of all IBTRS messages
> + * @type:Message type, valid values see: enum ibtrs_msg_types
> + * @tsize:   Total size of transferred data
> + *
> + * Don't move the first 8 padding bytes! It's a workaround for a kernel bug.
> + * See IBNBD-610 for details

What about resolving the kernel bug instead of making workarounds?

> + *
> + * DO NOT CHANGE!
> + */
> +struct ibtrs_msg_hdr {
> + u8  __padding1;
> + u8  type;
> + u16 __padding2;
> + u32 tsize;
> +};

[...]

-- 
Johannes Thumshirn  Storage
jthumsh...@suse.de+49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850


[PATCH 01/28] ibtrs: add header shared between ibtrs_client and ibtrs_server

2017-03-24 Thread Jack Wang
From: Jack Wang 

Signed-off-by: Jack Wang 
Signed-off-by: Kleber Souza 
Signed-off-by: Danil Kipnis 
Signed-off-by: Roman Pen 
---
 include/rdma/ibtrs.h | 514 +++
 1 file changed, 514 insertions(+)
 create mode 100644 include/rdma/ibtrs.h

diff --git a/include/rdma/ibtrs.h b/include/rdma/ibtrs.h
new file mode 100644
index 000..4fc572b
--- /dev/null
+++ b/include/rdma/ibtrs.h
@@ -0,0 +1,514 @@
+/*
+ * InfiniBand Transport Layer
+ *
+ * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved.
+ * Authors: Fabian Holler < m...@fholler.de>
+ *  Jack Wang 
+ * Kleber Souza 
+ * Danil Kipnis 
+ * Roman Pen 
+ *  Milind Dumbare 
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions, and the following disclaimer,
+ *without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *substantially similar to the "NO WARRANTY" disclaimer below
+ *("Disclaimer") and any redistribution must be conditioned upon
+ *including a substantially similar Disclaimer requirement for further
+ *binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *of any contributors may be used to endorse or promote products derived
+ *from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#ifndef __IBTRS_H
+#define __IBTRS_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define IBTRS_SERVER_PORT 1234
+#define WC_ARRAY_SIZE 16
+#define IB_APM_TIMEOUT 16 /* 4.096 * 2 ^ 16 = 260 msec */
+
+#define USR_MSG_CNT 64
+#define USR_CON_BUF_SIZE (USR_MSG_CNT * 2) /* double bufs for ACK's */
+
+#define DEFAULT_HEARTBEAT_TIMEOUT_MS 2
+#define MIN_HEARTBEAT_TIMEOUT_MS 5000
+#define HEARTBEAT_INTV_MS 500
+#define HEARTBEAT_INTV_JIFFIES msecs_to_jiffies(HEARTBEAT_INTV_MS)
+
+#define MIN_RTR_CNT 1
+#define MAX_RTR_CNT 7
+
+/*
+ * With the current size of the tag allocated on the client, 4K is the maximum
+ * number of tags we can allocate. (see IBNBD-2321)
+ * This number is also used on the client to allocate the IU for the user
+ * connection to receive the RDMA addresses from the server.
+ */
+#define MAX_SESS_QUEUE_DEPTH 4096
+
+#define XX(a) case (a): return #a
+
+#define IBTRS_ADDRLEN sizeof("ipv6:[:::::::]")
+
+static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
+{
+   switch (opcode) {
+   XX(IB_WC_SEND);
+   XX(IB_WC_RDMA_WRITE);
+   XX(IB_WC_RDMA_READ);
+   XX(IB_WC_COMP_SWAP);
+   XX(IB_WC_FETCH_ADD);
+   /* recv-side); inbound completion */
+   XX(IB_WC_RECV);
+   XX(IB_WC_RECV_RDMA_WITH_IMM);
+   default: return "IB_WC_OPCODE_UNKNOWN";
+   }
+}
+
+
+struct ib_session {
+   struct ib_pd*pd;
+   struct ib_mr*mr;
+   struct ib_event_handler event_handler;
+};
+
+struct ibtrs_ib_path {
+   union ib_gidp_sgid;
+   union ib_gidp_dgid;
+};
+
+struct ib_con {
+   struct ib_qp*qp cacheline_aligned;
+   struct ib_cq*cq cacheline_aligned;
+   struct ib_send_wr   beacon;
+   struct rdma_cm_id   *cm_id;
+   struct ibtrs_ib_pathpri_path;
+   struct ibtrs_ib_path   cur_path;
+   char*addr;
+   char*hostname;
+};
+
+struct ibtrs_iu {
+   struct list_headlist;
+   dma_addr_t  dma_addr;
+   void*buf;
+   size_t  size;
+   enum dma_data_direction direction;
+   boolis_msg;
+   u32 tag;
+};
+
+struct ibtrs_heartbeat {