date:20060117

[openib-general] Problems with dmcli on 64b hosts

2006-01-17 Thread Yael Shenhav







Hi Roland, I am using dmcli Python utility for SRP purposes. With x86_64, FedoraCore4, I get an error: Traceback (most recent call last):  File /usr/local/ibg2/bin/dmcli, line 185, in ? main()  File /usr/local/ibg2/bin/dmcli, line 149, in main  agt = f.reg_agent(1)  File /usr/local/ibg2/lib64/python/umad.py, line 121, in reg_agent  if fcntl.ioctl(self._fd, REGISTER_AGENT, buf, 1): TypeError: ioctl requires a file or file descriptor, an integer and optionally a integer or buffer argumentI saw you identified this issue as being a python BUG and proposed a workaround inspired by https://sourceforge.net/tracker/?func=detailatid=105470aid=1112949group_id=5470Are you planning to get this workaround in dmcli?Thanks.Regards,Yael Shenhav








___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] [PATCH] enable the fmr pool user to set the page size

2006-01-17 Thread Or Gerlitz

Roland,

This patch allows the consumer to set the page size of pages mapped 
by the pool fmrs which is a feature already existing in the ib_verbs api. 

On the cosmetic side it changes ib_fmr_attr.page_size field to be named
page_shift. Note that i did not go down to change mpt_entry-page_size 
name so its up to you if to leave the page_size convention.

A patch to convert the fmr consumers to the new api is below, if this 
api change is accepted we will enhance iser code eg to fmr in 4K pages
resolution.

Or.


Signed-off-by: Or Gerlitz [EMAIL PROTECTED]



Index: include/rdma/ib_verbs.h
===
--- include/rdma/ib_verbs.h (revision 4911)
+++ include/rdma/ib_verbs.h (working copy)
@@ -650,7 +650,7 @@ struct ib_mw_bind {
 struct ib_fmr_attr {
int max_pages;
int max_maps;
-   u8  page_size;
+   u8  page_shift;
 };
 
 struct ib_ucontext {
Index: include/rdma/ib_fmr_pool.h
===
--- include/rdma/ib_fmr_pool.h  (revision 4911)
+++ include/rdma/ib_fmr_pool.h  (working copy)
@@ -43,6 +43,7 @@ struct ib_fmr_pool;
 /**
  * struct ib_fmr_pool_param - Parameters for creating FMR pool
  * @max_pages_per_fmr:Maximum number of pages per map request.
+ * @page_shift: Log2 of sizeof pages mapped by this fmr
  * @access:Access flags for FMRs in pool.
  * @pool_size:Number of FMRs to allocate for pool.
  * @dirty_watermark:Flush is triggered when @dirty_watermark dirty
@@ -55,6 +56,7 @@ struct ib_fmr_pool;
  */
 struct ib_fmr_pool_param {
int max_pages_per_fmr;
+   int page_shift;
enum ib_access_flagsaccess;
int pool_size;
int dirty_watermark;
Index: core/fmr_pool.c
===
--- core/fmr_pool.c (revision 4911)
+++ core/fmr_pool.c (working copy)
@@ -280,7 +280,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(s
struct ib_fmr_attr attr = {
.max_pages = params-max_pages_per_fmr,
.max_maps  = IB_FMR_MAX_REMAPS,
-   .page_size = PAGE_SHIFT
+   .page_shift = params-page_shift;
};
 
for (i = 0; i  params-pool_size; ++i) {
Index: hw/mthca/mthca_mr.c
===
--- hw/mthca/mthca_mr.c (revision 4911)
+++ hw/mthca/mthca_mr.c (working copy)
@@ -497,7 +497,7 @@ int mthca_fmr_alloc(struct mthca_dev *de
 
might_sleep();
 
-   if (mr-attr.page_size  12 || mr-attr.page_size = 32)
+   if (mr-attr.page_shift  12 || mr-attr.page_shift = 32)
return -EINVAL;
 
/* For Arbel, all MTTs must fit in the same page. */
@@ -549,7 +549,7 @@ int mthca_fmr_alloc(struct mthca_dev *de
   MTHCA_MPT_FLAG_REGION  |
   access);
 
-   mpt_entry-page_size = cpu_to_be32(mr-attr.page_size - 12);
+   mpt_entry-page_size = cpu_to_be32(mr-attr.page_shift - 12);
mpt_entry-key   = cpu_to_be32(key);
mpt_entry-pd= cpu_to_be32(pd);
memset(mpt_entry-start, 0,

Index: ulp/sdp/sdp_conn.c
===
--- ulp/sdp/sdp_conn.c  (revision 4911)
+++ ulp/sdp/sdp_conn.c  (working copy)
@@ -1759,6 +1759,7 @@ static void sdp_device_init_one(struct i
/*
 * FMR allocation
 */
+   fmr_param_s.page_shift = PAGE_SHIFT;
fmr_param_s.pool_size = SDP_FMR_POOL_SIZE;
fmr_param_s.dirty_watermark = SDP_FMR_DIRTY_SIZE;
fmr_param_s.cache = 1;
Index: ulp/iser/iser_verbs.c
===
--- ulp/iser/iser_verbs.c   (revision 5033)
+++ ulp/iser/iser_verbs.c   (working copy)
@@ -150,6 +150,7 @@ int iser_create_ib_conn_res(struct iser_
 
p_iser_adaptor = p_iser_conn-p_adaptor;
 
+   params.page_shift= PAGE_SHIFT;
params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE;
params.pool_size = ISCSI_ISER_XMIT_CMDS_MAX;
params.dirty_watermark   = 32;

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] [patch] userspace/management/diags/src/sminfo.c - cmdline processing fix

2006-01-17 Thread Sasha Khapyorsky

Hello Hal,

There is small bug in sminfo's cmdline processing, this will segfault
when option argument is missing (like 'sminfo -a'). The fast and dirty
fix is inlined.

The same problem exists with most diag tools, so I think we need to
rework AGRBEGIN { ... } ARGEND stuff (actually remove it from
libibcommon since it is used by diag tools only). I can do it if there
are no objections.

Regards,
Sasha.


This fast fix for invalid ARGF() usage in sminfo.c.

Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED]

Index: diags/src/sminfo.c
===
--- diags/src/sminfo.c  (revision 5017)
+++ diags/src/sminfo.c  (working copy)
@@ -49,6 +49,8 @@
 
 #define IBERROR(fmt, args...)  iberror(__FUNCTION__, fmt, ## args)
 
+#define SAFE_ARGF() (*(argv+1) ? ARGF() : ( usage(), NULL ) )
+
 static void
 iberror(const char *fn, char *msg, ...)
 {
@@ -116,10 +118,10 @@
 
ARGBEGIN {
case 'C':
-   ca = ARGF();
+   ca = SAFE_ARGF();
break;
case 'P':
-   ca_port = strtoul(ARGF(), 0, 0);
+   ca_port = strtoul(SAFE_ARGF(), 0, 0);
break;
case 'd':
ibdebug++;
@@ -137,17 +139,17 @@
dest_type = IB_DEST_GUID;
break;
case 't':
-   timeout = strtoul(ARGF(), 0, 0);
+   timeout = strtoul(SAFE_ARGF(), 0, 0);
madrpc_set_timeout(timeout);
break;
case 'a':
-   act = strtoul(ARGF(), 0, 0);
+   act = strtoul(SAFE_ARGF(), 0, 0);
break;
case 's':
-   state = strtoul(ARGF(), 0, 0);
+   state = strtoul(SAFE_ARGF(), 0, 0);
break;
case 'p':
-   prio = strtoul(ARGF(), 0, 0);
+   prio = strtoul(SAFE_ARGF(), 0, 0);
break;
case 'V':
fprintf(stderr, %s %s\n, argv0, get_build_version() );
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] enable the fmr pool user to set the pagesize

2006-01-17 Thread Michael S. Tsirkin

Quoting Or Gerlitz [EMAIL PROTECTED]:
 A patch to convert the fmr consumers to the new api is below, if this 
 api change is accepted we will enhance iser code eg to fmr in 4K pages
 resolution.

Out of curiosity, why would you want to make the page size smaller than
PAGE_SIZE?  Bigger pages typically give you better performance, isnt that true
for iser?

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [patch] userspace/management/diags/src/sminfo.c -cmdline processing fix

2006-01-17 Thread Michael S. Tsirkin

Quoting Sasha Khapyorsky [EMAIL PROTECTED]:
 Subject: [patch] userspace/management/diags/src/sminfo.c -cmdline processing 
 fix
 
 Hello Hal,
 
 There is small bug in sminfo's cmdline processing, this will segfault
 when option argument is missing (like 'sminfo -a'). The fast and dirty
 fix is inlined.
 
 The same problem exists with most diag tools, so I think we need to
 rework AGRBEGIN { ... } ARGEND stuff (actually remove it from
 libibcommon since it is used by diag tools only). I can do it if there
 are no objections.
 
 Regards,
 Sasha.
 
 
 This fast fix for invalid ARGF() usage in sminfo.c.
 
 Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED]

BTW, why arent the diags using the standard getopt_long?
That would solve the problem above in a clean way and help us get rid of
the ARGxxx macros completely.

Hal?

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [patch] userspace/management/diags/src/sminfo.c -cmdline processing fix

2006-01-17 Thread Sasha Khapyorsky

On 12:30 Tue 17 Jan , Michael S. Tsirkin wrote:
 
 BTW, why arent the diags using the standard getopt_long?
 That would solve the problem above in a clean way and help us get rid of
 the ARGxxx macros completely.

Agree.

Sasha.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] RE: [patch] userspace/management/diags/src/sminfo.c -cmdline processing fix

2006-01-17 Thread Hal Rosenstock

Hi Michael,
 
I believe this is largely historical. I will put this on the list TODO for the 
diags and hopefully get to it in the not too distant future.
 
-- Hal



From: Michael S. Tsirkin [mailto:[EMAIL PROTECTED]
Sent: Tue 1/17/2006 5:30 AM
To: Sasha Khapyorsky
Cc: Hal Rosenstock; openib
Subject: Re: [patch] userspace/management/diags/src/sminfo.c -cmdline 
processing fix



Quoting Sasha Khapyorsky [EMAIL PROTECTED]:
 Subject: [patch] userspace/management/diags/src/sminfo.c -cmdline processing 
 fix

 Hello Hal,

 There is small bug in sminfo's cmdline processing, this will segfault
 when option argument is missing (like 'sminfo -a'). The fast and dirty
 fix is inlined.

 The same problem exists with most diag tools, so I think we need to
 rework AGRBEGIN { ... } ARGEND stuff (actually remove it from
 libibcommon since it is used by diag tools only). I can do it if there
 are no objections.

 Regards,
 Sasha.


 This fast fix for invalid ARGF() usage in sminfo.c.

 Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED]

BTW, why arent the diags using the standard getopt_long?
That would solve the problem above in a clean way and help us get rid of
the ARGxxx macros completely.

Hal?

--
MST


___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] enable the fmr pool user to set the pagesize

2006-01-17 Thread Or Gerlitz


Michael S. Tsirkin wrote:

Out of curiosity, why would you want to make the page size smaller than
PAGE_SIZE?  Bigger pages typically give you better performance, isnt that true
for iser?


First just for the sake of clarity it is important to emphasize in the 
verbs api level the decoupling of the OS page notation to the page 
used by the HCA to map bunch of buffers to one network VA.


Second and indeed more important, from our experience, there are 
eventually IB consumers such as the Linux SCSI Mid-Layer which sometimes 
generate Scatter-Gather lists that are RDMA aligned when treated in a 
resolution different from the system PAGE_SHIFT. Example to that we saw 
with ia64 SLES9 SP1/2 kernels. So if you work in PAGE_SHIFT you can not 
produce one VA for many of the SG submitted by the mid-layer.


Or.


___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] enable the fmr pool user to set the pagesize

2006-01-17 Thread Michael S. Tsirkin

Quoting Or Gerlitz [EMAIL PROTECTED]:
 Second and indeed more important, from our experience, there are 
 eventually IB consumers such as the Linux SCSI Mid-Layer which sometimes 
 generate Scatter-Gather lists that are RDMA aligned when treated in a 
 resolution different from the system PAGE_SHIFT. Example to that we saw 
 with ia64 SLES9 SP1/2 kernels. So if you work in PAGE_SHIFT you can not 
 produce one VA for many of the SG submitted by the mid-layer.

Interesting. Where does the mid-layer get the 4K (not PAGE_SIZE) aligned
buffers?  Any idea?


-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Comments on ehca updates

2006-01-17 Thread Roland Dreier

Hi, I noticed that you checked in some ehca changes.  A couple of comments:

1) While most of the changes to your #includes are correct, like

-#include hipz_fns_core.h
+#include hipz_fns_core.h

since you should use  instead of  for includes in your own local
directory to make the kernel build work, things in the kernel's own
include/ directory should still use , so for example

-#include linux/version.h
+#include linux/version.h

Also, I never noticed this before but

-#include ib_mad.h
+#include ib_mad.h

should really just be #include rdma/ib_mad.h.

2) How can the changes like

@@ -75,7 +74,7 @@ int ehca_post_send(struct ib_qp *qp,
my_qp, qp-qp_num, send_wr, bad_send_wr);
 
/* LOCK the QUEUE */
-   spin_lock_irqsave(my_qp-spinlock_s, spin_flags);
+   spin_lock(my_qp-spinlock_s);

be correct?  ehca_post_send() is called directly as your device's
post_send method, which means that a consumer can call it from both
process and interrupt context.  So using plain spin_lock() can
deadlock if a process context call is interrupted by an interrupt
context call.

The same comment applies to your other changes like this, at least in
your post_recv and poll_cq methods.

 - R.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: Problems with dmcli on 64b hosts

2006-01-17 Thread Roland Dreier

Yael Are you planning to get this workaround in dmcli?

Not really -- I think the C version of the DM client that I posted
later is more useful.  By the way, I still need to integrate Alexander
Beyn's fix to that code, and I will do that soon.

Even better would be if someone ambitious created a DM tool that
automatically connects to the targets it discovers, etc.

 - R.
 
 
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: Comments on ehca updates

2006-01-17 Thread Heiko J Schick


Hello Roland,

thanks for your comments. We will include
the changes it in our code and do then a new OpenIB SVN check-in.

Mit freundlichen Gruessen / Kind Regards
Heiko Joerg Schick

IBM Deutschland Entwicklung GmbH
I/O Firmware Development II
Linux Infiniband Device Drivers

Schoenaicher Str. 220
71032 Boeblingen
E-Mail: [EMAIL PROTECTED]
External: 49-7031-16-0 x4219,  t/l: 120-4219






Roland Dreier [EMAIL PROTECTED]

01/17/2006 01:59 PM




To
Heiko J Schick/Germany/[EMAIL PROTECTED],
Christoph Raisch/Germany/[EMAIL PROTECTED], Marcus Eder/Germany/[EMAIL PROTECTED]


cc
openib-general@openib.org


Subject
Comments on ehca updates








Hi, I noticed that you checked in some ehca changes.
A couple of comments:

1) While most of the changes to your #includes are correct, like


-#include hipz_fns_core.h

+#include hipz_fns_core.h

since you should use  instead of  for includes in your
own local
directory to make the kernel build work, things in the kernel's own
include/ directory should still use , so for example


-#include linux/version.h

+#include linux/version.h

Also, I never noticed this before but


-#include ib_mad.h

+#include ib_mad.h

should really just be #include rdma/ib_mad.h.

2) How can the changes like

@@ -75,7 +74,7 @@ int ehca_post_send(struct ib_qp *qp,
 
   
my_qp, qp-qp_num, send_wr, bad_send_wr);
 
 /*
LOCK the QUEUE */
-
spin_lock_irqsave(my_qp-spinlock_s, spin_flags);
+
spin_lock(my_qp-spinlock_s);

be correct? ehca_post_send() is called directly as your device's
post_send method, which means that a consumer can call it from both
process and interrupt context. So using plain spin_lock() can
deadlock if a process context call is interrupted by an interrupt
context call.

The same comment applies to your other changes like this, at least in
your post_recv and poll_cq methods.

 - R.


___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] RE: [patch] userspace/management/diags/src/sminfo.c - cmdline processing fix

2006-01-17 Thread Hal Rosenstock

Hi Sasha,
 
Thanks. Applied.
 
I would welcome such a patch.
 
-- Hal



From: Sasha Khapyorsky [mailto:[EMAIL PROTECTED]
Sent: Tue 1/17/2006 5:10 AM
To: Hal Rosenstock
Cc: openib
Subject: [patch] userspace/management/diags/src/sminfo.c - cmdline processing 
fix



Hello Hal,

There is small bug in sminfo's cmdline processing, this will segfault
when option argument is missing (like 'sminfo -a'). The fast and dirty
fix is inlined.

The same problem exists with most diag tools, so I think we need to
rework AGRBEGIN { ... } ARGEND stuff (actually remove it from
libibcommon since it is used by diag tools only). I can do it if there
are no objections.

Regards,
Sasha.


This fast fix for invalid ARGF() usage in sminfo.c.

Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED]

Index: diags/src/sminfo.c
===
--- diags/src/sminfo.c  (revision 5017)
+++ diags/src/sminfo.c  (working copy)
@@ -49,6 +49,8 @@

 #define IBERROR(fmt, args...)  iberror(__FUNCTION__, fmt, ## args)

+#define SAFE_ARGF() (*(argv+1) ? ARGF() : ( usage(), NULL ) )
+
 static void
 iberror(const char *fn, char *msg, ...)
 {
@@ -116,10 +118,10 @@

ARGBEGIN {
case 'C':
-   ca = ARGF();
+   ca = SAFE_ARGF();
break;
case 'P':
-   ca_port = strtoul(ARGF(), 0, 0);
+   ca_port = strtoul(SAFE_ARGF(), 0, 0);
break;
case 'd':
ibdebug++;
@@ -137,17 +139,17 @@
dest_type = IB_DEST_GUID;
break;
case 't':
-   timeout = strtoul(ARGF(), 0, 0);
+   timeout = strtoul(SAFE_ARGF(), 0, 0);
madrpc_set_timeout(timeout);
break;
case 'a':
-   act = strtoul(ARGF(), 0, 0);
+   act = strtoul(SAFE_ARGF(), 0, 0);
break;
case 's':
-   state = strtoul(ARGF(), 0, 0);
+   state = strtoul(SAFE_ARGF(), 0, 0);
break;
case 'p':
-   prio = strtoul(ARGF(), 0, 0);
+   prio = strtoul(SAFE_ARGF(), 0, 0);
break;
case 'V':
fprintf(stderr, %s %s\n, argv0, get_build_version() );


___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Unknown symbol ip_dev_find (2.6.15.1 kernel)

2006-01-17 Thread Steven Wooding

Hi,

I was updating my kernel and openib drivers (haven't done so for a
couple of months) and I've got stuck on the following problem.

When you do make modules_install you can the following warnings at the end:

WARNING: /lib/modules/2.6.15.1/kernel/drivers/infiniband/ulp/sdp/ib_sdp.ko
needs unknown symbol ip_dev_find
WARNING: /lib/modules/2.6.15.1/kernel/drivers/infiniband/core/ib_at.ko
needs unknown symbol ip_dev_find
WARNING: /lib/modules/2.6.15.1/kernel/drivers/infiniband/core/ib_addr.ko
needs unknown symbol ip_dev_find

I tried to reboot anyway, but these modules do indeed fail to load due
to this problem.

I notice this was fixed for 2.6.14 with a patch that exported the
ip_dev_find symbol. Do we need one for 2.6.15.1 or have I missed a
step out of my installation process?

Thanks for the help.

Cheers,


Steve.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] RE: [RFC] DAT 2.0 immediate data proposal

2006-01-17 Thread Kanevsky, Arkady




Arlin,
a few things need to be addressed.

1. correlation with local and remote 
invalidate
This potentially effects both DAT_DTOs and post 
operations

2. Need a precise defintion for CONFIRM_FLAG definition 
in a transport independent fashion.
What guarantees DAT Provider "provides" on successful 
local completion?
Remote end guarantee?

My understanding what you are trying to do is create 2 
models one IB and one for iWARP.
So for IB Consumers will use CONFIRM_FLAG and for iWARP 
IMMED_FLAG.
Provider will indicate in Provider_attr which model it 
supports.

The issue I have with it is that I do not see a model 
that Consumer can use to create
a transport independent code.
It looks like Immed_flag can be made transport 
independent. But with "sender" specifying
the behavior a protocol extension is needed for IB. IB 
will always deliver Immediate data
in the header not a payload and remote Provider can 
control how it is delivered to a Consumer.
But this means that there is no need for DTO_flags for 
Send side. Instead it can be
used for Recv side or controlled purely by 
Provider.

3. Need to define error behavior. for new operations, 
async errors, EP behavior.

4. Need to define DAT_Provider attributes for immediate 
data and dto_flags behavior

5. Does Solicited_wait completion_flag value now 
applicable for RDMA_write for immediate data?

6. Is dto_completion_data xfer_length include 
immediate_data size or not?

7. what memory privilages needed for a recv buffer for 
immediate data?

8. SRQ interaction?

9. What happens of buffer for recv operation NOT 
recv_immed is matched for incomming recv/rdma_write op?

10. Change dat_ep_post_write_immed to 
dat_ep_post_rdma_write_immed to be consistent with current
terminology.

11. Need to cleanup operation description to make it 
clear that Send|RDMA_write and immediate data part
is a single atomic operation. The current "followed by" 
language is misleading.
Make it explicit that there is a single local DTO 
completion and single remote DTO completion.

12. Is your intension that post_recv_immed can ONLY 
except immediate data and is not
capable to recv any message?

13. size should be num_segments for 
dat_ep_post_recv_immed()

Arkady






Arkady Kanevsky 
email: [EMAIL PROTECTED]
Network 
Appliance Inc. 
phone: 781-768-5395
1601 
Trapelo Rd. - Suite 16.Fax: 
781-895-1195
Waltham, MA 
02451 
central phone: 781-768-5300


  
  
  From: Arlin Davis 
  [mailto:[EMAIL PROTECTED] Sent: Monday, January 16, 2006 
  5:55 PMTo: Kanevsky, Arkady; Lentini, JamesCc: 
  [EMAIL PROTECTED]; openib-general@openib.orgSubject: 
  [RFC] DAT 2.0 immediate data proposal
  
  
  Arkady,
  
  The attached proposal adds 
  immediate data options as standard APIs instead of extensions for the 
  following calls. 
  
  dat_ep_post_send_immed()
  dat_ep_post_recv_immed()
  dat_ep_post_write_immed()
  
  The patch should be ready by 
  tomorrow.
  
  Thanks,
  
  -arlin
  
  
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] Unknown symbol ip_dev_find (2.6.15.1 kernel)

2006-01-17 Thread James Lentini


 I notice this was fixed for 2.6.14 with a patch that exported the
 ip_dev_find symbol. Do we need one for 2.6.15.1 or have I missed a
 step out of my installation process?

The same fix is necessary. You can apply the 2.6.14 patch to 2.6.15.1.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] RE: [RFC] DAT 2.0 extension proposal

2006-01-17 Thread Kanevsky, Arkady




Arlin,

1. Does it mean that existing DAT providers will have 
to be modified so they report
DAT_NOT_IMPLEMENTED for each 
extension?

2. Why is there DAT_INVALID in 
DAT_DTOS?

3. Do you want to use DAT_EXTENSION_DATA or 
DAT_EXT_DATA?

4. The proposed operations are operation on EP and they 
are DTOs.
Why not define DAT_DTO_EXT_OP instead of 
DAT_EXT_OP?

MY concern is that if these are not DTO then we have a 
new event stream type
for "extensions" and we need to define rules for this 
event stream including
ordering rules and interactions with other event 
streams, provider attributes
for stream mixing and so on...

If we restrictextensions to DTO operation 
extension we avoid all these issues
and simplify APIs. On the negative side these extension 
are restrictive.

5. Memory protection extension for atomic 
operations

6. error returns for extensions?

Arkady






Arkady Kanevsky 
email: [EMAIL PROTECTED]
Network 
Appliance Inc. 
phone: 781-768-5395
1601 
Trapelo Rd. - Suite 16.Fax: 
781-895-1195
Waltham, MA 
02451 
central phone: 781-768-5300


  
  
  From: Davis, Arlin R 
  [mailto:[EMAIL PROTECTED] Sent: Monday, January 16, 2006 
  5:55 PMTo: Kanevsky, Arkady; Lentini, JamesCc: 
  [EMAIL PROTECTED]; openib-general@openib.orgSubject: 
  [RFC] DAT 2.0 extension proposal
  
  
  Arkady,
  
  The attached proposal adds generic 
  DTO extensions and provider specific atomic operations as follow. 
  
  
  dat_ep_post_cmp_and_swap()
  dat_ep_post_fetch_and_add()
  
  The patch should be ready by 
  tomorrow.
  
  Thanks,
  
  -arlin
  
  
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Send packets above ibumad and waiting for a response

2006-01-17 Thread Ofer Gigi









Hi,

I am trying to write a test that will send/receive packets directly
above IBUMAD.



Does anyone have such a chunk of code that do this?



Thanks a lot in advance!

Ofer






___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

RE: [openib-general] Send packets above ibumad and waiting for aresponse

2006-01-17 Thread Hal Rosenstock

Hi Ofer,
 
Are you referring to the kernelmodule or the library here ?
 
-- Hal
 


From: [EMAIL PROTECTED] on behalf of Ofer Gigi
Sent: Tue 1/17/2006 11:19 AM
To: openib-general@openib.org
Subject: [openib-general] Send packets above ibumad and waiting for aresponse


Hi,
I am trying to write a test that will send/receive packets directly above 
IBUMAD.
 
Does anyone have such a chunk of code that do this?
 
Thanks a lot in advance!
Ofer
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] [PATCH] ibsrpdm: use the proper HCA and port with non-default umad device

2006-01-17 Thread Roland Dreier

At long last I've integrated your patch into srptools.  I took this as
an excuse to check the DM package into svn under

https://openib.org/svn/gen2/trunk/src/userspace/srptools

as well, rather than passing tarballs around on the mailing list.

Thanks,
  Roland
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] RE: Missing file, trunk/src/linux-kernel/include/scsi/srp.h. in SVN5031

2006-01-17 Thread Bob Woodruff

Roland wrote,
However my reasoning could easily be all wrong -- what are the
advantages of having it in openib svn?

- R.

I guess one could go either way. The benefit of keeping it 
in SVN is that it that if any changes are needed, they are
in some database until they are pushed upstream. The benefit
of just having the kernel.org version being the latest is that
there is only one copy and no confusion as to what is the 
latest version. It seems odd though to have the .h file only 
in the kernel.org tree and the srp.c files in openib SVN. 
One could argue that once a component is accepted upstream,
that the kernel.org version is the latest, but then it makes tracking
in between kernel.org releases a bit more difficult.

Anyone else have a comment on this one ?

woody


___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

RE: [openib-general] [PATCH] Problem with directed route SMPs withbeginning or ending LID routed parts

2006-01-17 Thread Hal Rosenstock

Hi Ralph,
 
This is much simpler :-)
 
Thanks! Applied. 
 
I tested this both in an operational network with some different topologies as 
well as passing the previously failed compliance C14-11.
 
-- Hal



From: [EMAIL PROTECTED] on behalf of Ralph Campbell
Sent: Mon 1/16/2006 2:17 PM
To: openib-general@openib.org
Subject: [openib-general] [PATCH] Problem with directed route SMPs 
withbeginning or ending LID routed parts



OK.  Here is a much simplified patch which fixes the problem of a
directed route SMP with a with beginning or ending LID routed part.

Signed-off-by: Ralph Campbell [EMAIL PROTECTED]

Index: core/mad.c
===
--- core/mad.c  (revision 5030)
+++ core/mad.c  (working copy)
@@ -665,7 +665,15 @@
struct ib_wc mad_wc;
struct ib_send_wr *send_wr = mad_send_wr-send_wr;

-   if (!smi_handle_dr_smp_send(smp, device-node_type, port_num)) {
+   /*
+* Directed route handling starts if the initial LID routed part of
+* a request or the ending LID routed part of a response is empty.
+* If we are at the start of the LID routed part, don't update the
+* hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
+*/
+   if ((ib_get_smp_direction(smp) ? smp-dr_dlid : smp-dr_slid) ==
+IB_LID_PERMISSIVE 
+   !smi_handle_dr_smp_send(smp, device-node_type, port_num)) {
ret = -EINVAL;
printk(KERN_ERR PFX Invalid directed route\n);
goto out;

--
Ralph Campbell [EMAIL PROTECTED]

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] Re: [PATCH] enable the fmr pool user to set the pagesize

2006-01-17 Thread Dan Bar Dov

On 1/17/06, Michael S. Tsirkin [EMAIL PROTECTED] wrote:
 Quoting Or Gerlitz [EMAIL PROTECTED]:
  Second and indeed more important, from our experience, there are
  eventually IB consumers such as the Linux SCSI Mid-Layer which sometimes
  generate Scatter-Gather lists that are RDMA aligned when treated in a
  resolution different from the system PAGE_SHIFT. Example to that we saw
  with ia64 SLES9 SP1/2 kernels. So if you work in PAGE_SHIFT you can not
  produce one VA for many of the SG submitted by the mid-layer.

 Interesting. Where does the mid-layer get the 4K (not PAGE_SIZE) aligned
 buffers?  Any idea?

Not really. We suspect a different allocation unit in the buffer cache
or some file systems not doing what we think they do..
We saw it also on Itanium with 16K page size that was sending 4K sg elements.
What it made obvious is that the memory registration restrictions are
the HCA restrictions, and those have nothing to do with kernel
restrictions, yet the driver code relies on kernel restrictions.

Dan

 --
 MST
 ___
 openib-general mailing list
 openib-general@openib.org
 http://openib.org/mailman/listinfo/openib-general

 To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] [PATCH] race in pingpong -e

2006-01-17 Thread Roland Dreier

Thanks, applied.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] ipoib: path-ah

2006-01-17 Thread Roland Dreier

Thanks, applied.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] ipoib: pkt_queue

2006-01-17 Thread Roland Dreier

  -while (!skb_queue_empty(mcast-pkt_queue))
  +while (!skb_queue_empty(mcast-pkt_queue)) {
  +spin_lock_irqsave(priv-tx_lock, flags);
  +++priv-stats.tx_dropped;
  +spin_unlock_irqrestore(priv-tx_lock, flags);
   dev_kfree_skb_any(skb_dequeue(mcast-pkt_queue));
  +}

Any reason to drop the lock every time around this loop?  Would it
make more sense to count the number of packets and then just add it in
after the loop?

  +spin_lock_irq(priv-tx_lock);
   while (!skb_queue_empty(mcast-pkt_queue)) {
   struct sk_buff *skb = skb_dequeue(mcast-pkt_queue);
  +spin_unlock_irq(priv-tx_lock);

Again, why are we dropping the lock every time through this loop?  Is
it just to reduce the lock hold time here?

 - R.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] uverbs: flush scheduled_work

2006-01-17 Thread Roland Dreier

Thanks, applied.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] ipoib: pkt_queue

2006-01-17 Thread Michael S. Tsirkin

Quoting r. Roland Dreier [EMAIL PROTECTED]:
 Subject: Re: [PATCH] ipoib: pkt_queue
 
   -  while (!skb_queue_empty(mcast-pkt_queue))
   +  while (!skb_queue_empty(mcast-pkt_queue)) {
   +  spin_lock_irqsave(priv-tx_lock, flags);
   +  ++priv-stats.tx_dropped;
   +  spin_unlock_irqrestore(priv-tx_lock, flags);
  dev_kfree_skb_any(skb_dequeue(mcast-pkt_queue));
   +  }
 
 Any reason to drop the lock every time around this loop?  Would it
 make more sense to count the number of packets and then just add it in
 after the loop?

Makes sense.

   +  spin_lock_irq(priv-tx_lock);
  while (!skb_queue_empty(mcast-pkt_queue)) {
  struct sk_buff *skb = skb_dequeue(mcast-pkt_queue);
   +  spin_unlock_irq(priv-tx_lock);
 
 Again, why are we dropping the lock every time through this loop?  Is
 it just to reduce the lock hold time here?

We seem to be doing operations that cant be called under
tx_lock a few lines below.

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] ipoib: pkt_queue

2006-01-17 Thread Roland Dreier

Michael We seem to be doing operations that cant be called under
Michael tx_lock a few lines below.

Do you mean dev_queue_xmit()?  Can that call directly back into our
xmit function (I honestly don't know the locking rules here)?

 - R.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] Re: [PATCH] ipoib: pkt_queue

2006-01-17 Thread Michael S. Tsirkin

Quoting r. Roland Dreier [EMAIL PROTECTED]:
 Subject: [openib-general] Re: [PATCH] ipoib: pkt_queue
 
 Michael We seem to be doing operations that cant be called under
 Michael tx_lock a few lines below.
 
 Do you mean dev_queue_xmit()?  Can that call directly back into our
 xmit function (I honestly don't know the locking rules here)?

Yes, exactly.

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: Re: [PATCH] ipoib: pkt_queue

2006-01-17 Thread Michael S. Tsirkin

Quoting r. Michael S. Tsirkin [EMAIL PROTECTED]:
 Subject: Re: Re: [PATCH] ipoib: pkt_queue

 Quoting r. Roland Dreier [EMAIL PROTECTED]:
  Subject: [openib-general] Re: [PATCH] ipoib: pkt_queue

  Michael We seem to be doing operations that cant be called under
  Michael tx_lock a few lines below.

  Do you mean dev_queue_xmit()?  Can that call directly back into our
  xmit function (I honestly don't know the locking rules here)?

 Yes, exactly.

Here it is from net/core/dev.c

/**
 *  dev_queue_xmit - transmit a buffer
 *  @skb: buffer to transmit
 *
 *  Queue a buffer for transmission to a network device. The caller must
 *  have set the device and priority and built the buffer before calling
 *  this function. The function can be called from an interrupt.
 *
 *  A negative errno code is returned on a failure. A success does not
 *  guarantee the frame will be transmitted as it may be dropped due
 *  to congestion or traffic shaping.
 *
 * 
---
 *  I notice this method can also return errors from the queue disciplines,
 *  including NET_XMIT_DROP, which is a positive value.  So, errors can also
 *  be positive.
 *
 *  Regardless of the return value, the skb is consumed, so it is currently
 *  difficult to retry a send to this method.  (You can bump the ref count
 *  before sending to hold a reference for retry if you are careful.)
 *
 *  When calling this method, interrupts MUST be enabled.  This is because
 *  the BH enable code must have IRQs enabled so that it will not deadlock.
 *  --BLG
 */

int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb-dev;
struct Qdisc *q;
int rc = -ENOMEM;

if (skb_shinfo(skb)-frag_list 
!(dev-features  NETIF_F_FRAGLIST) 
__skb_linearize(skb, GFP_ATOMIC))
goto out_kfree_skb;

/* Fragmented skb is linearized if device does not support SG,
 * or if at least one of fragments is in highmem and device
 * does not support DMA from it.
 */
if (skb_shinfo(skb)-nr_frags 
(!(dev-features  NETIF_F_SG) || illegal_highdma(dev, skb)) 
__skb_linearize(skb, GFP_ATOMIC))
goto out_kfree_skb;

/* If packet is not checksummed and device does not support
 * checksumming for this protocol, complete checksumming here.
 */
if (skb-ip_summed == CHECKSUM_HW 
(!(dev-features  (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) 
 (!(dev-features  NETIF_F_IP_CSUM) ||
  skb-protocol != htons(ETH_P_IP
if (skb_checksum_help(skb, 0))
goto out_kfree_skb;

spin_lock_prefetch(dev-queue_lock);

/* Disable soft irqs for various locks below. Also 
 * stops preemption for RCU. 
 */
local_bh_disable(); 

/* Updates of qdisc are serialized by queue_lock. 
 * The struct Qdisc which is pointed to by qdisc is now a 
 * rcu structure - it may be accessed without acquiring 
 * a lock (but the structure may be stale.) The freeing of the
 * qdisc will be deferred until it's known that there are no 
 * more references to it.
 * 
 * If the qdisc has an enqueue function, we still need to 
 * hold the queue_lock before calling it, since queue_lock
 * also serializes access to the device queue.
 */

q = rcu_dereference(dev-qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb-tc_verd = SET_TC_AT(skb-tc_verd,AT_EGRESS);
#endif
if (q-enqueue) {
/* Grab device queue */
spin_lock(dev-queue_lock);

rc = q-enqueue(skb, q);

qdisc_run(dev);

spin_unlock(dev-queue_lock);
rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
goto out;
}

/* The device has no queue. Common case for software devices:
   loopback, all the sorts of tunnels...

   Really, it is unlikely that xmit_lock protection is necessary here.
   (f.e. loopback and IP tunnels are clean ignoring statistics
   counters.)
   However, it is possible, that they rely on protection
   made by us here.

   Check this and shot the lock. It is not prone from deadlocks.
   Either shot noqueue qdisc, it is even simpler 8)
 */
if (dev-flags  IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */

if (dev-xmit_lock_owner != cpu) {

HARD_TX_LOCK(dev, cpu);

if (!netif_queue_stopped(dev)) {
if (netdev_nit)
dev_queue_xmit_nit(skb, dev);

[openib-general] Re: [PATCH] ipoib: pkt_queue

2006-01-17 Thread Michael S. Tsirkin

Quoting r. Roland Dreier [EMAIL PROTECTED]:
 Any reason to drop the lock every time around this loop?  Would it
 make more sense to count the number of packets and then just add it in
 after the loop?

Is this better?

--

Protect accesses to mcast-pkt_queue by tx_lock.
Count multicast packets removed from pkt_queue as dropped.

Signed-off-by: Michael S. Tsirkin [EMAIL PROTECTED]

Index: openib/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===
--- openib.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c  2006-01-15 
16:14:00.0 +0200
+++ openib/drivers/infiniband/ulp/ipoib/ipoib_multicast.c   2006-01-17 
21:11:39.0 +0200
@@ -97,6 +97,7 @@ static void ipoib_mcast_free(struct ipoi
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh, *tmp;
unsigned long flags;
+   int tx_dropped = 0;
 
ipoib_dbg_mcast(netdev_priv(dev),
deleting multicast group  IPOIB_GID_FMT \n,
@@ -123,8 +124,13 @@ static void ipoib_mcast_free(struct ipoi
if (mcast-ah)
ipoib_put_ah(mcast-ah);
 
-   while (!skb_queue_empty(mcast-pkt_queue))
+   while (!skb_queue_empty(mcast-pkt_queue)) {
+   ++tx_dropped;
dev_kfree_skb_any(skb_dequeue(mcast-pkt_queue));
+   }
+   spin_lock_irqsave(priv-tx_lock, flags);
+   priv-stats.tx_dropped += tx_dropped;
+   spin_unlock_irqrestore(priv-tx_lock, flags);
 
kfree(mcast);
 }
@@ -276,8 +282,10 @@ static int ipoib_mcast_join_finish(struc
}
 
/* actually send any queued packets */
+   spin_lock_irq(priv-tx_lock);
while (!skb_queue_empty(mcast-pkt_queue)) {
struct sk_buff *skb = skb_dequeue(mcast-pkt_queue);
+   spin_unlock_irq(priv-tx_lock);
 
skb-dev = dev;
 
@@ -288,7 +296,9 @@ static int ipoib_mcast_join_finish(struc
 
if (dev_queue_xmit(skb))
ipoib_warn(priv, dev_queue_xmit failed to requeue 
packet\n);
+   spin_lock_irq(priv-tx_lock);
}
+   spin_unlock_irq(priv-tx_lock);
 
return 0;
 }
@@ -300,6 +310,7 @@ ipoib_mcast_sendonly_join_complete(int s
 {
struct ipoib_mcast *mcast = mcast_ptr;
struct net_device *dev = mcast-dev;
+   struct ipoib_dev_priv *priv = netdev_priv(dev);
 
if (!status)
ipoib_mcast_join_finish(mcast, mcmember);
@@ -310,8 +321,12 @@ ipoib_mcast_sendonly_join_complete(int s
IPOIB_GID_ARG(mcast-mcmember.mgid), 
status);
 
/* Flush out any queued packets */
-   while (!skb_queue_empty(mcast-pkt_queue))
+   spin_lock_irq(priv-tx_lock);
+   while (!skb_queue_empty(mcast-pkt_queue)) {
+   ++priv-stats.tx_dropped;
dev_kfree_skb_any(skb_dequeue(mcast-pkt_queue));
+   }
+   spin_unlock_irq(priv-tx_lock);
 
/* Clear the busy flag so we try again */
clear_bit(IPOIB_MCAST_FLAG_BUSY, mcast-flags);
@@ -687,6 +702,7 @@ void ipoib_mcast_send(struct net_device 
if (!mcast) {
ipoib_warn(priv, unable to allocate memory for 
   multicast structure\n);
+   ++priv-stats.tx_dropped;
dev_kfree_skb_any(skb);
goto out;
}
@@ -700,8 +716,10 @@ void ipoib_mcast_send(struct net_device 
if (!mcast-ah) {
if (skb_queue_len(mcast-pkt_queue)  IPOIB_MAX_MCAST_QUEUE)
skb_queue_tail(mcast-pkt_queue, skb);
-   else
+   else {
+   ++priv-stats.tx_dropped;
dev_kfree_skb_any(skb);
+   }
 
if (mcast-query)
ipoib_dbg_mcast(priv, no address vector, 

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] ipoib_mcast_send.patch

2006-01-17 Thread Michael S. Tsirkin

Quoting r. Michael S. Tsirkin [EMAIL PROTECTED]:
 Subject: Re: Re: ipoib: outstanding patches
 
 Quoting Roland Dreier [EMAIL PROTECTED]:
   ipoib_mcast_send.patch
  
  Could we reuse the IPOIB_MCAST_RUN bit rather than adding a new bit?
  It seems that we could kill mcast_mutex and replace uses with
  priv-lock instead -- I don't see anything that sleeps inside mcast_mutex.
 
 Yes, I now believe that we should be able to do it this way.

Something like this?

---

Fix the following race scenario:
Device is up.
Port event or set mcast list triggers ipoib_mcast_stop_thread,
this cancels the query and waits on mcast done completion.
Completion is called and done is set.
Meanwhile, ipoib_mcast_send arrives and starts a new query,
re-initializing done.

Further, there's an additional issue that I saw in testing:
ipoib_mcast_send may get called when priv-broadcast is NULL
(e.g. if the device was downed and then upped internally because
of a port event).
If this happends and the sendonly join request gets completed before
priv-broadcast is set, we get an oops


Do not send multicasts if mcast thread is stopped or if
priv-broadcast is not set.

Signed-off-by: Michael S. Tsirkin [EMAIL PROTECTED]

Index: openib/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===
--- openib.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c  2006-01-17 
21:13:43.0 +0200
+++ openib/drivers/infiniband/ulp/ipoib/ipoib_multicast.c   2006-01-17 
21:18:36.0 +0200
@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
 Enable multicast debug tracing if  0);
 #endif
 
-static DEFINE_MUTEX(mcast_mutex);
-
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
@@ -385,10 +383,10 @@ static void ipoib_mcast_join_complete(in
 
if (!status  !ipoib_mcast_join_finish(mcast, mcmember)) {
mcast-backoff = 1;
-   mutex_lock(mcast_mutex);
+   spin_lock_irq(priv-lock);
if (test_bit(IPOIB_MCAST_RUN, priv-flags))
queue_work(ipoib_workqueue, priv-mcast_task);
-   mutex_unlock(mcast_mutex);
+   spin_unlock_irq(priv-lock);
complete(mcast-done);
return;
}
@@ -418,7 +416,7 @@ static void ipoib_mcast_join_complete(in
 
mcast-query = NULL;
 
-   mutex_lock(mcast_mutex);
+   spin_lock_irq(priv-lock);
if (test_bit(IPOIB_MCAST_RUN, priv-flags)) {
if (status == -ETIMEDOUT)
queue_work(ipoib_workqueue, priv-mcast_task);
@@ -427,7 +425,7 @@ static void ipoib_mcast_join_complete(in
   mcast-backoff * HZ);
} else
complete(mcast-done);
-   mutex_unlock(mcast_mutex);
+   spin_unlock_irq(priv-lock);
 
return;
 }
@@ -482,12 +480,12 @@ static void ipoib_mcast_join(struct net_
if (mcast-backoff  IPOIB_MAX_BACKOFF_SECONDS)
mcast-backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
-   mutex_lock(mcast_mutex);
+   spin_lock_irq(priv-lock);
if (test_bit(IPOIB_MCAST_RUN, priv-flags))
queue_delayed_work(ipoib_workqueue,
   priv-mcast_task,
   mcast-backoff * HZ);
-   mutex_unlock(mcast_mutex);
+   spin_unlock_irq(priv-lock);
} else
mcast-query_id = ret;
 }
@@ -520,11 +518,11 @@ void ipoib_mcast_join_task(void *dev_ptr
priv-broadcast = ipoib_mcast_alloc(dev, 1);
if (!priv-broadcast) {
ipoib_warn(priv, failed to allocate broadcast 
group\n);
-   mutex_lock(mcast_mutex);
+   spin_lock_irq(priv-lock);
if (test_bit(IPOIB_MCAST_RUN, priv-flags))
queue_delayed_work(ipoib_workqueue,
   priv-mcast_task, HZ);
-   mutex_unlock(mcast_mutex);
+   spin_unlock_irq(priv-lock);
return;
}
 
@@ -580,10 +578,10 @@ int ipoib_mcast_start_thread(struct net_
 
ipoib_dbg_mcast(priv, starting multicast thread\n);
 
-   mutex_lock(mcast_mutex);
+   spin_lock_irq(priv-lock);
if (!test_and_set_bit(IPOIB_MCAST_RUN, priv-flags))
queue_work(ipoib_workqueue, priv-mcast_task);
-   mutex_unlock(mcast_mutex);
+   spin_unlock_irq(priv-lock);
 
return 0;
 }
@@ -595,10 +593,10 @@ int ipoib_mcast_stop_thread(struct net_d
 
ipoib_dbg_mcast(priv, stopping multicast thread\n);
 
-   mutex_lock(mcast_mutex);
+   spin_lock_irq(priv-lock);
clear_bit(IPOIB_MCAST_RUN, priv-flags);

Re: [openib-general] RE: [RFC] DAT 2.0 extension proposal

2006-01-17 Thread Arlin Davis


Kanevsky, Arkady wrote:


Arlin,
 
1. Does it mean that existing DAT providers will have to be modified 
so they report

DAT_NOT_IMPLEMENTED for each extension?


No.

During the open, a dat library built to support extensions, a query call 
is made to verify that the provider supports extensions and sets a 
global flag accordingly. This flag is checked via our single 
dat_extension call in dat_api. Take a look at the patch for all the details.


 
2. Why is there DAT_INVALID in DAT_DTOS?


no reason. I can get rid of it. I will go ahead and keep this in sync 
with the latest 1.3 (2.0) definitions.


 
3. Do you want to use DAT_EXTENSION_DATA or DAT_EXT_DATA?


sure.

 
4. The proposed operations are operation on EP and they are DTOs.

Why not define DAT_DTO_EXT_OP instead of DAT_EXT_OP?


Yes, it makes more sense if we decide to limit these extensions to DTO 
types.


 
MY concern is that if these are not DTO then we have a new event 
stream type
for extensions and we need to define rules for this event stream 
including
ordering rules and interactions with other event streams, provider 
attributes

for stream mixing and so on...
 
If we restrict extensions to DTO operation extension we avoid all 
these issues

and simplify APIs. On the negative side these extension are restrictive.



I  have no problem limiting this proposal and work to DTO extensions. 
However, we should get consensus on this.


 
5. Memory protection extension for atomic operations
 
6. error returns for extensions?


yes and yes;  I will work these into the next patch and update the proposal.

-arlin
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Code with questionable license in OpenIB tree

2006-01-17 Thread Bryan O'Sullivan

Hi, Michael -

I have found some code in the OpenIB Subversion repo that appears to
have been committed by you, and which has Mellanox proprietary licenses
in the header files.

Most of the files in the src/userspace/imgen directory contain the
following boilerplate text:

 *  - Mellanox Confidential and Proprietary -
 *
 * Copyright (C) July 2002, Mellanox Technologies Ltd. ALL RIGHTS 
RESERVED.
 *
 * Except as specifically permitted herein, no portion of the 
information,
 * including but not limited to object code and source code, may be 
reproduced,
 * modified, distributed, republished or otherwise exploited in any 
form or by
 * any means for any purpose without the prior written permission of 
Mellanox
 * Technologies Ltd. Use of software subject to the terms and conditions
 * detailed in the file LICENSE.txt.

There is no LICENSE.txt file in that portion of the tree.  The only file
by that name anywhere in the tree is a copy of the Common Public License
in src/userspace/dapl/LICENSE.txt.  However, it is not at all clear that
this is the license that you intended to reference.

I would appreciate a modification of the licensing language on those
files, to something that is more in line with the rest of the openib.org
tree (i.e. open to free redistribution and modification).

Regards,

b

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] RE: [RFC] DAT 2.0 extension proposal

2006-01-17 Thread Arlin Davis


Arlin Davis wrote:


Kanevsky, Arkady wrote:



5. Memory protection extension for atomic operations
 
6. error returns for extensions?



yes and yes;  I will work these into the next patch and update the 
proposal.


For error returns I am thinking about carving up the return type, adding 
a new mask, and extension get type macro. Suggestions on carving up the 
following? Carve into type or subtype? other suggestions?


type: DAT_RETURN_CLASSDAT_RETURN_TYPEDAT_RETURN_SUBTYPE
bits:  31-30   
29-16  15-0


-arlin
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: ipoib_mcast_send.patch

2006-01-17 Thread Roland Dreier

Does this actually work?

  +if (!test_bit(IPOIB_MCAST_RUN, priv-flags) || !priv-broadcast) {
  +dev_kfree_skb_any(skb);
  +goto unlock;
  +}

It seems that this code at the end of ipoib_mcast_join_task() might
screw things up:

ipoib_dbg_mcast(priv, successfully joined all multicast groups\n);

clear_bit(IPOIB_MCAST_RUN, priv-flags);

Probably the semantics of IPOIB_MCAST_RUN need to change slightly.
I'm not sure this necessarily can be made to work -- maybe we just
need more than one bit of status information to handle everything.

Also should we count dropped packets here?

 - R.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] ipoib: pkt_queue

2006-01-17 Thread Roland Dreier

Yes, looks good ... committed
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] Re: [PATCH] enable the fmr pool user to set the page size

2006-01-17 Thread Roland Dreier

Seems reasonable.  Unfortunately we just missed the 2.6.16-rc1 window
so I think this should wait for the 2.6.17 window.

BTW, do you ever see the SCSI layer giving you 512 byte blocks?

 - R.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] Re: ipoib_mcast_send.patch

2006-01-17 Thread Michael S. Tsirkin

Quoting Roland Dreier [EMAIL PROTECTED]:
 It seems that this code at the end of ipoib_mcast_join_task() might
 screw things up:
 
   ipoib_dbg_mcast(priv, successfully joined all multicast groups\n);
 
   clear_bit(IPOIB_MCAST_RUN, priv-flags);

Right. That probably was the reason I invented MCAST_STARTED.

 Probably the semantics of IPOIB_MCAST_RUN need to change slightly.
 I'm not sure this necessarily can be made to work -- maybe we just
 need more than one bit of status information to handle everything.

Kind of like what original patch in svn does?

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] Re: ipoib_mcast_send.patch

2006-01-17 Thread Roland Dreier

Michael Kind of like what original patch in svn does?

Yeah -- my original question about reusing the MCAST_RUN bit was an
honest question -- and it seems the answer is probably, no, we need
another bit to make it work.

It seems that killing mcast_mutex might be a good, independent cleanup.
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] Re: ipoib_mcast_send.patch

2006-01-17 Thread Michael S. Tsirkin

Quoting r. Roland Dreier [EMAIL PROTECTED]:
 Subject: Re: [openib-general] Re: ipoib_mcast_send.patch
 
 Michael Kind of like what original patch in svn does?
 
 Yeah -- my original question about reusing the MCAST_RUN bit was an
 honest question -- and it seems the answer is probably, no, we need
 another bit to make it work.
 
 It seems that killing mcast_mutex might be a good, independent cleanup.

pkey_mutex too.

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] [PATCH] fix minor typo in SDP

2006-01-17 Thread Ralph Campbell

This patch fixes a minor misspelling in SDP.

Signed-off-by: Ralph Campbell [EMAIL PROTECTED]

Index: ulp/sdp/sdp_inet.c
===
--- ulp/sdp/sdp_inet.c  (revision 5055)
+++ ulp/sdp/sdp_inet.c  (working copy)
@@ -836,7 +836,7 @@
 
/*
 * file and/or wait can be NULL, once poll is asleep and needs to
-* recheck the falgs on being woken.
+* recheck the flags on being woken.
 */
sk = sock-sk;
conn = sdp_sk(sk);
Index: ulp/sdp/sdp_recv.c
===
--- ulp/sdp/sdp_recv.c  (revision 5055)
+++ ulp/sdp/sdp_recv.c  (working copy)
@@ -1234,7 +1234,7 @@
sk = sock-sk;
conn = sdp_sk(sk);
 
-   sdp_dbg_data(conn, state %08x size %Zu pending %d falgs %08x,
+   sdp_dbg_data(conn, state %08x size %Zu pending %d flags %08x,
 conn-state, size, conn-byte_strm, flags);
sdp_dbg_data(conn, read IOCB %d addr %p users %d flags %08lx,
 req-ki_key, msg-msg_iov-iov_base,

-- 
Ralph Campbell [EMAIL PROTECTED]

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] [PATCH 1 of 3] move destructor to struct neigh_parms

2006-01-17 Thread Michael S. Tsirkin

Quoting Michael S. Tsirkin [EMAIL PROTECTED]:
 Subject: [PATCH 1 of 3] move destructor to struct neigh_parms
 
 This is an alternative approach to the one presented in
 ipoib_all_neigh_issues_2.patch.
 
 ---
 
 Move destructor from neigh_ops (which is shared between devices)
 to neigh_parms which is not, so that multiple drivers can set
 it safely.
 
 Signed-off-by: Michael S. Tsirkin [EMAIL PROTECTED]
 
 Index: linux-2.6.15/net/core/neighbour.c
 ===
 --- linux-2.6.15.orig/net/core/neighbour.c2006-01-12 11:58:15.0 
 +0200
 +++ linux-2.6.15/net/core/neighbour.c 2006-01-12 20:10:00.0 +0200
 @@ -586,8 +586,8 @@ void neigh_destroy(struct neighbour *nei
   kfree(hh);
   }
  
 - if (neigh-ops  neigh-ops-destructor)
 - (neigh-ops-destructor)(neigh);
 + if (neigh-parms-neigh_destructor)
 + (neigh-parms-neigh_destructor)(neigh);
  
   skb_queue_purge(neigh-arp_queue);
  
 Index: linux-2.6.15/include/net/neighbour.h
 ===
 --- linux-2.6.15.orig/include/net/neighbour.h 2006-01-03 05:21:10.0 
 +0200
 +++ linux-2.6.15/include/net/neighbour.h  2006-01-12 20:09:27.0 
 +0200
 @@ -68,6 +68,7 @@ struct neigh_parms
   struct net_device *dev;
   struct neigh_parms *next;
   int (*neigh_setup)(struct neighbour *);
 + void(*neigh_destructor)(struct neighbour *);
   struct neigh_table *tbl;
  
   void*sysctl_table;
 @@ -145,7 +146,6 @@ struct neighbour
  struct neigh_ops
  {
   int family;
 - void(*destructor)(struct neighbour *);
   void(*solicit)(struct neighbour *, struct sk_buff*);
   void(*error_report)(struct neighbour *, struct 
 sk_buff*);
   int (*output)(struct sk_buff*);
 

Roland, what do you say to this approach?
We still could try this for 2.6.16, couldnt we?
Its small and the interface is unused in kernel except by us.

Otherwise for 2.6.16 and earlier we'll have to maintain the global list of
neighbours along the lines of ipoib_all_neigh_issues_2.patch.

-- 
MST
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] [PATCH 0/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Sean Hefty

The following set of patches defines a connection abstraction for Infiniband and
other RDMA devices, and serves several purposes:

* It implements a connection protocol over Infiniband based on IP addressing.
This greatly simplifies clients wishing to establish connections over
Infiniband.

* It defines a connection abstraction that works over multiple RDMA devices.
The submitted implementation targets Infiniband, but has been tested over other
RDMA devices as well.

* It handles RDMA device insertion and removal on behalf of its clients.

The changes have been broken into 5 separate patches.  The basic purpose of each
patch is:

1. Provide common handling for marshalling data between userspace clients and
kernel mode Infiniband  drivers.

2. Extend the Infiniband CM to include private data comparisons as part of its
connection request matching process.

3. Provide an address translation service that maps IP addresses to Infiniband
addresses (GIDs).  This patch touches outside of the Infiniband core, so I'm
including the netdev mailing list.

4. Implement the kernel mode RDMA connection management agent.

5. Implement the userspace RDMA connection management agent kernel support
module.

Please copy the openib-general mailing list on any replies.

Thanks,
Sean

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] [PATCH 1/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Sean Hefty

The following patch provides common handling for marshalling data between
userspace clients and kernel mode Infiniband drivers.

Signed-off-by: Sean Hefty [EMAIL PROTECTED]

---

diff -uprN -X linux-2.6.git/Documentation/dontdiff 
linux-2.6.git/drivers/infiniband/core/Makefile 
linux-2.6.ib/drivers/infiniband/core/Makefile
--- linux-2.6.git/drivers/infiniband/core/Makefile  2006-01-16 
10:25:27.0 -0800
+++ linux-2.6.ib/drivers/infiniband/core/Makefile   2006-01-16 
15:34:15.0 -0800
@@ -16,4 +16,5 @@ ib_umad-y :=  user_mad.o
 
 ib_ucm-y :=ucm.o
 
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \
+   uverbs_marshall.o
diff -uprN -X linux-2.6.git/Documentation/dontdiff 
linux-2.6.git/drivers/infiniband/core/ucm.c 
linux-2.6.ib/drivers/infiniband/core/ucm.c
--- linux-2.6.git/drivers/infiniband/core/ucm.c 2006-01-16 10:25:26.0 
-0800
+++ linux-2.6.ib/drivers/infiniband/core/ucm.c  2006-01-16 15:34:15.0 
-0800
@@ -30,7 +30,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $
+ * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 #include linux/init.h
 #include linux/fs.h
@@ -48,6 +48,7 @@
 
 #include rdma/ib_cm.h
 #include rdma/ib_user_cm.h
+#include rdma/ib_marshall.h
 
 MODULE_AUTHOR(Libor Michalek);
 MODULE_DESCRIPTION(InfiniBand userspace Connection Manager access);
@@ -203,36 +204,6 @@ error:
return NULL;
 }
 
-static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
- struct ib_sa_path_rec  *kpath)
-{
-   if (!kpath || !upath)
-   return;
-
-   memcpy(upath-dgid, kpath-dgid.raw, sizeof *upath-dgid);
-   memcpy(upath-sgid, kpath-sgid.raw, sizeof *upath-sgid);
-
-   upath-dlid = kpath-dlid;
-   upath-slid = kpath-slid;
-   upath-raw_traffic  = kpath-raw_traffic;
-   upath-flow_label   = kpath-flow_label;
-   upath-hop_limit= kpath-hop_limit;
-   upath-traffic_class= kpath-traffic_class;
-   upath-reversible   = kpath-reversible;
-   upath-numb_path= kpath-numb_path;
-   upath-pkey = kpath-pkey;
-   upath-sl   = kpath-sl;
-   upath-mtu_selector = kpath-mtu_selector;
-   upath-mtu  = kpath-mtu;
-   upath-rate_selector= kpath-rate_selector;
-   upath-rate = kpath-rate;
-   upath-packet_life_time = kpath-packet_life_time;
-   upath-preference   = kpath-preference;
-
-   upath-packet_life_time_selector =
-   kpath-packet_life_time_selector;
-}
-
 static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
 struct ib_cm_req_event_param *kreq)
 {
@@ -251,8 +222,10 @@ static void ib_ucm_event_req_get(struct 
ureq-srq= kreq-srq;
ureq-port   = kreq-port;
 
-   ib_ucm_event_path_get(ureq-primary_path, kreq-primary_path);
-   ib_ucm_event_path_get(ureq-alternate_path, kreq-alternate_path);
+   ib_copy_path_rec_to_user(ureq-primary_path, kreq-primary_path);
+   if (kreq-alternate_path)
+   ib_copy_path_rec_to_user(ureq-alternate_path,
+kreq-alternate_path);
 }
 
 static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
@@ -322,8 +295,8 @@ static int ib_ucm_event_process(struct i
info  = evt-param.rej_rcvd.ari;
break;
case IB_CM_LAP_RECEIVED:
-   ib_ucm_event_path_get(uvt-resp.u.lap_resp.path,
- evt-param.lap_rcvd.alternate_path);
+   ib_copy_path_rec_to_user(uvt-resp.u.lap_resp.path,
+evt-param.lap_rcvd.alternate_path);
uvt-data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
uvt-resp.present = IB_UCM_PRES_ALTERNATE;
break;
@@ -635,65 +608,11 @@ static ssize_t ib_ucm_attr_id(struct ib_
return result;
 }
 
-static void ib_ucm_copy_ah_attr(struct ib_ucm_ah_attr *dest_attr,
-   struct ib_ah_attr *src_attr)
-{
-   memcpy(dest_attr-grh_dgid, src_attr-grh.dgid.raw,
-  sizeof src_attr-grh.dgid);
-   dest_attr-grh_flow_label = src_attr-grh.flow_label;
-   dest_attr-grh_sgid_index = src_attr-grh.sgid_index;
-   dest_attr-grh_hop_limit = src_attr-grh.hop_limit;
-   dest_attr-grh_traffic_class = src_attr-grh.traffic_class;
-
-   dest_attr-dlid = src_attr-dlid;
-   dest_attr-sl = src_attr-sl;
-   dest_attr-src_path_bits = src_attr-src_path_bits;
-   dest_attr-static_rate = src_attr-static_rate;
-   dest_attr-is_global = (src_attr-ah_flags

[openib-general] RE: [PATCH 2/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Sean Hefty

The following patch extends matching connection requests to listens in the
Infiniband CM to include private data.

Signed-off-by: Sean Hefty [EMAIL PROTECTED]

---

diff -uprN -X linux-2.6.git/Documentation/dontdiff 
linux-2.6.git/drivers/infiniband/core/cm.c 
linux-2.6.ib/drivers/infiniband/core/cm.c
--- linux-2.6.git/drivers/infiniband/core/cm.c  2006-01-16 10:25:26.0 
-0800
+++ linux-2.6.ib/drivers/infiniband/core/cm.c   2006-01-16 16:03:35.0 
-0800
@@ -32,7 +32,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $
+ * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 #include linux/dma-mapping.h
 #include linux/err.h
@@ -130,6 +130,7 @@ struct cm_id_private {
/* todo: use alternate port on send failure */
struct cm_av av;
struct cm_av alt_av;
+   struct ib_cm_private_data_compare *compare_data;
 
void *private_data;
__be64 tid;
@@ -355,6 +356,40 @@ static struct cm_id_private * cm_acquire
return cm_id_priv;
 }
 
+static void cm_mask_compare_data(u8 *dst, u8 *src, u8 *mask)
+{
+   int i;
+
+   for (i = 0; i  IB_CM_PRIVATE_DATA_COMPARE_SIZE; i++)
+   dst[i] = src[i]  mask[i];
+}
+
+static int cm_compare_data(struct ib_cm_private_data_compare *src_data,
+  struct ib_cm_private_data_compare *dst_data)
+{
+   u8 src[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
+   u8 dst[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
+
+   if (!src_data || !dst_data)
+   return 0;
+   
+   cm_mask_compare_data(src, src_data-data, dst_data-mask);
+   cm_mask_compare_data(dst, dst_data-data, src_data-mask);
+   return memcmp(src, dst, IB_CM_PRIVATE_DATA_COMPARE_SIZE);
+}
+
+static int cm_compare_private_data(u8 *private_data,
+  struct ib_cm_private_data_compare *dst_data)
+{
+   u8 src[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
+
+   if (!dst_data)
+   return 0;
+   
+   cm_mask_compare_data(src, private_data, dst_data-mask);
+   return memcmp(src, dst_data-data, IB_CM_PRIVATE_DATA_COMPARE_SIZE);
+}
+
 static struct cm_id_private * cm_insert_listen(struct cm_id_private 
*cm_id_priv)
 {
struct rb_node **link = cm.listen_service_table.rb_node;
@@ -362,14 +397,18 @@ static struct cm_id_private * cm_insert_
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv-id.service_id;
__be64 service_mask = cm_id_priv-id.service_mask;
+   int data_cmp;
 
while (*link) {
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
  service_node);
+   data_cmp = cm_compare_data(cm_id_priv-compare_data,
+  cur_cm_id_priv-compare_data);
if ((cur_cm_id_priv-id.service_mask  service_id) ==
(service_mask  cur_cm_id_priv-id.service_id) 
-   (cm_id_priv-id.device == cur_cm_id_priv-id.device))
+   (cm_id_priv-id.device == cur_cm_id_priv-id.device) 
+   !data_cmp)
return cur_cm_id_priv;
 
if (cm_id_priv-id.device  cur_cm_id_priv-id.device)
@@ -378,6 +417,10 @@ static struct cm_id_private * cm_insert_
link = (*link)-rb_right;
else if (service_id  cur_cm_id_priv-id.service_id)
link = (*link)-rb_left;
+   else if (service_id  cur_cm_id_priv-id.service_id)
+   link = (*link)-rb_right;
+   else if (data_cmp  0)
+   link = (*link)-rb_left;
else
link = (*link)-rb_right;
}
@@ -387,16 +430,20 @@ static struct cm_id_private * cm_insert_
 }
 
 static struct cm_id_private * cm_find_listen(struct ib_device *device,
-__be64 service_id)
+__be64 service_id,
+u8 *private_data)
 {
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
+   int data_cmp;
 
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
+   data_cmp = cm_compare_private_data(private_data,
+  cm_id_priv-compare_data);
if ((cm_id_priv-id.service_mask  service_id) ==
 cm_id_priv-id.service_id 
-   (cm_id_priv-id.device == device))
+   (cm_id_priv-id.device == device)  !data_cmp)
return cm_id_priv;
 
if (device  cm_id_priv-id.device)
@@ -405,6 +452,10 @@ static struct cm_id_private * cm_find_li
node = node-rb_right;

[openib-general] [PATCH 3/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Sean Hefty

The following provides an address translation service that maps IP addresses
to Infiniband addresses (GIDs) using IPoIB.

Signed-off-by: Sean Hefty [EMAIL PROTECTED]

---

diff -uprN -X linux-2.6.git/Documentation/dontdiff 
linux-2.6.git/drivers/infiniband/core/addr.c 
linux-2.6.ib/drivers/infiniband/core/addr.c
--- linux-2.6.git/drivers/infiniband/core/addr.c1969-12-31 
16:00:00.0 -0800
+++ linux-2.6.ib/drivers/infiniband/core/addr.c 2006-01-16 16:14:24.0 
-0800
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the Common Public License 1.0 a copy of which is
+ *available from the Open Source Initiative, see
+ *http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the The BSD License a copy of which is
+ *available from the Open Source Initiative, see
+ *http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the GNU General Public License (GPL) Version 2 a
+ *copy of which is available from the Open Source Initiative, see
+ *http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include linux/inetdevice.h
+#include linux/workqueue.h
+#include net/arp.h
+#include net/neighbour.h
+#include net/route.h
+#include rdma/ib_addr.h
+
+MODULE_AUTHOR(Sean Hefty);
+MODULE_DESCRIPTION(IB Address Translation);
+MODULE_LICENSE(Dual BSD/GPL);
+
+struct addr_req {
+   struct list_head list;
+   struct sockaddr src_addr;
+   struct sockaddr dst_addr;
+   struct rdma_dev_addr *addr;
+   void *context;
+   void (*callback)(int status, struct sockaddr *src_addr,
+struct rdma_dev_addr *addr, void *context);
+   unsigned long timeout;
+   int status;
+};
+
+static void process_req(void *data);
+
+static DECLARE_MUTEX(mutex);
+static LIST_HEAD(req_list);
+static DECLARE_WORK(work, process_req, NULL);
+struct workqueue_struct *rdma_wq;
+EXPORT_SYMBOL(rdma_wq);
+
+static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+unsigned char *dst_dev_addr)
+{
+   switch (dev-type) {
+   case ARPHRD_INFINIBAND:
+   dev_addr-dev_type = IB_NODE_CA;
+   break;
+   default:
+   return -EADDRNOTAVAIL;
+   }
+
+   memcpy(dev_addr-src_dev_addr, dev-dev_addr, MAX_ADDR_LEN);
+   memcpy(dev_addr-broadcast, dev-broadcast, MAX_ADDR_LEN);
+   if (dst_dev_addr)
+   memcpy(dev_addr-dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
+   return 0;
+}
+
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+   struct net_device *dev;
+   u32 ip = ((struct sockaddr_in *) addr)-sin_addr.s_addr;
+   int ret;
+
+   dev = ip_dev_find(ip);
+   if (!dev)
+   return -EADDRNOTAVAIL;
+
+   ret = copy_addr(dev_addr, dev, NULL);
+   dev_put(dev);
+   return ret;
+}
+EXPORT_SYMBOL(rdma_translate_ip);
+
+static void set_timeout(unsigned long time)
+{
+   unsigned long delay;
+
+   cancel_delayed_work(work);
+
+   delay = time - jiffies;
+   if ((long)delay = 0)
+   delay = 1;
+
+   queue_delayed_work(rdma_wq, work, delay);
+}
+
+static void queue_req(struct addr_req *req)
+{
+   struct addr_req *temp_req;
+
+   down(mutex);
+   list_for_each_entry_reverse(temp_req, req_list, list) {
+   if (time_after(req-timeout, temp_req-timeout))
+   break;
+   }
+
+   list_add(req-list, temp_req-list);
+
+   if (req_list.next == req-list)
+   set_timeout(req-timeout);
+   up(mutex);
+}
+
+static void addr_send_arp(struct sockaddr_in *dst_in)
+{
+   struct rtable *rt;
+   struct flowi fl;
+   u32 dst_ip = dst_in-sin_addr.s_addr;
+
+   memset(fl, 0, sizeof fl);
+   fl.nl_u.ip4_u.daddr = dst_ip;
+   if (ip_route_output_key(rt, fl))
+   return;
+
+   arp_send(ARPOP_REQUEST, ETH_P_ARP, rt-rt_gateway, rt-idev-dev,
+rt-rt_src, NULL, rt-idev-dev-dev_addr, NULL);
+   ip_rt_put(rt);
+}
+
+static int addr_resolve_remote(struct sockaddr_in *src_in,
+  struct sockaddr_in *dst_in,
+  struct rdma_dev_addr *addr)
+{
+   u32 src_ip

[openib-general] [PATCH 4/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Sean Hefty

The following patch implements a kernel mode connection management agent
over Infiniband that connects based on IP addresses.

The agent defines a generic RDMA connection abstraction to support clients
wanting to connect over different RDMA devices.

It also handles RDMA device hotplug events on behalf of clients.

- Signed-off-by: Sean Hefty [EMAIL PROTECTED]

---

diff -uprN -X linux-2.6.git/Documentation/dontdiff 
linux-2.6.git/drivers/infiniband/core/cma.c 
linux-2.6.ib/drivers/infiniband/core/cma.c
--- linux-2.6.git/drivers/infiniband/core/cma.c 1969-12-31 16:00:00.0 
-0800
+++ linux-2.6.ib/drivers/infiniband/core/cma.c  2006-01-16 16:17:34.0 
-0800
@@ -0,0 +1,1639 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the Common Public License 1.0 a copy of which is
+ *available from the Open Source Initiative, see
+ *http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the The BSD License a copy of which is
+ *available from the Open Source Initiative, see
+ *http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the GNU General Public License (GPL) Version 2 a
+ *copy of which is available from the Open Source Initiative, see
+ *http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+#include linux/in.h
+#include linux/in6.h
+#include linux/random.h
+#include rdma/rdma_cm.h
+#include rdma/ib_cache.h
+#include rdma/ib_cm.h
+#include rdma/ib_sa.h
+
+MODULE_AUTHOR(Guy German);
+MODULE_DESCRIPTION(Generic RDMA CM Agent);
+MODULE_LICENSE(Dual BSD/GPL);
+
+#define CMA_CM_RESPONSE_TIMEOUT 20
+#define CMA_MAX_CM_RETRIES 3
+
+static void cma_add_one(struct ib_device *device);
+static void cma_remove_one(struct ib_device *device);
+
+static struct ib_client cma_client = {
+   .name   = cma,
+   .add= cma_add_one,
+   .remove = cma_remove_one
+};
+
+static LIST_HEAD(dev_list);
+static LIST_HEAD(listen_any_list);
+static DECLARE_MUTEX(mutex);
+
+struct cma_device {
+   struct list_headlist;
+   struct ib_device*device;
+   __be64  node_guid;
+   wait_queue_head_t   wait;
+   atomic_trefcount;
+   struct list_headid_list;
+};
+
+enum cma_state {
+   CMA_IDLE,
+   CMA_ADDR_QUERY,
+   CMA_ADDR_RESOLVED,
+   CMA_ROUTE_QUERY,
+   CMA_ROUTE_RESOLVED,
+   CMA_CONNECT,
+   CMA_ADDR_BOUND,
+   CMA_LISTEN,
+   CMA_DEVICE_REMOVAL,
+   CMA_DESTROYING
+};
+
+/*
+ * Device removal can occur at anytime, so we need extra handling to
+ * serialize notifying the user of device removal with other callbacks.
+ * We do this by disabling removal notification while a callback is in process,
+ * and reporting it after the callback completes.
+ */
+struct rdma_id_private {
+   struct rdma_cm_id   id;
+
+   struct list_headlist;
+   struct list_headlisten_list;
+   struct cma_device   *cma_dev;
+
+   enum cma_state  state;
+   spinlock_t  lock;
+   wait_queue_head_t   wait;
+   atomic_trefcount;
+   wait_queue_head_t   wait_remove;
+   atomic_tdev_remove;
+
+   int backlog;
+   int timeout_ms;
+   struct ib_sa_query  *query;
+   int query_id;
+   struct ib_cm_id *cm_id;
+
+   u32 seq_num;
+   u32 qp_num;
+   enum ib_qp_type qp_type;
+   u8  srq;
+};
+
+struct cma_work {
+   struct work_struct  work;
+   struct rdma_id_private  *id;
+};
+
+union cma_ip_addr {
+   struct in6_addr ip6;
+   struct {
+   __u32 pad[3];
+   __u32 addr;
+   } ip4;
+};
+
+struct cma_hdr {
+   u8 cma_version;
+   u8 ip_version;  /* IP version: 7:4 */
+   __u16 port;
+   union cma_ip_addr src_addr;
+   union cma_ip_addr dst_addr;
+};
+
+struct sdp_hh {
+   u8 sdp_version;
+   u8 ip_version;  /* IP version: 7:4 */
+   u8 sdp_specific1[10];
+   __u16 port;
+   __u16 sdp_specific2;
+   union cma_ip_addr src_addr;
+   union cma_ip_addr dst_addr;
+};
+

[openib-general] Re: [PATCH 2/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Stephen Hemminger

Minor nits.

On Tue, 17 Jan 2006 15:24:37 -0800
Sean Hefty [EMAIL PROTECTED] wrote:

 The following patch extends matching connection requests to listens in the
 Infiniband CM to include private data.
 
 Signed-off-by: Sean Hefty [EMAIL PROTECTED]
 
 ---

 +static void cm_mask_compare_data(u8 *dst, u8 *src, u8 *mask)

static void cm_mask_compare_data(u8 *dst, const u8 *src, u8 *mask)

but I would rename it to cm_mask_copy since it doesn't really do a compare.


 +{
 + int i;
 +
 + for (i = 0; i  IB_CM_PRIVATE_DATA_COMPARE_SIZE; i++)
 + dst[i] = src[i]  mask[i];
 +}
 +
 +static int cm_compare_data(struct ib_cm_private_data_compare *src_data,
 +struct ib_cm_private_data_compare *dst_data)

static int cm_compare_data(const struct ib_cm_private_data_compare *src,
   cosnt struct ib_cm_private_data_compare *dst)
Your data type names are getting too long 


flamebait
Also should infiniband exports be EXPORT_SYMBOL_GPL, to make
it clear that binary drivers for this are not allowed??
/flamebait

-- 
Stephen Hemminger [EMAIL PROTECTED]
OSDL http://developer.osdl.org/~shemminger
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[openib-general] [PATCH 5/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Sean Hefty

This patch adds the kernel component to support the userspace Infiniband/RDMA
connection agent library.

Signed-off-by: Sean Hefty [EMAIL PROTECTED]

---

diff -uprN -X linux-2.6.git/Documentation/dontdiff 
linux-2.6.git/drivers/infiniband/core/Makefile 
linux-2.6.ib/drivers/infiniband/core/Makefile
--- linux-2.6.git/drivers/infiniband/core/Makefile  2006-01-16 
16:58:58.0 -0800
+++ linux-2.6.ib/drivers/infiniband/core/Makefile   2006-01-16 
16:55:25.0 -0800
@@ -1,5 +1,5 @@
 obj-$(CONFIG_INFINIBAND) +=ib_core.o ib_mad.o ib_sa.o \
-   ib_cm.o ib_addr.o rdma_cm.o
+   ib_cm.o ib_addr.o rdma_cm.o rdma_ucm.o
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=   ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=ib_uverbs.o ib_ucm.o
 
@@ -14,6 +14,8 @@ ib_cm-y :=cm.o
 
 rdma_cm-y :=   cma.o
 
+rdma_ucm-y :=  ucma.o
+
 ib_addr-y :=   addr.o
 
 ib_umad-y :=   user_mad.o
diff -uprN -X linux-2.6.git/Documentation/dontdiff 
linux-2.6.git/drivers/infiniband/core/ucma.c 
linux-2.6.ib/drivers/infiniband/core/ucma.c
--- linux-2.6.git/drivers/infiniband/core/ucma.c1969-12-31 
16:00:00.0 -0800
+++ linux-2.6.ib/drivers/infiniband/core/ucma.c 2006-01-16 16:54:31.0 
-0800
@@ -0,0 +1,788 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include linux/poll.h
+#include linux/idr.h
+#include linux/in.h
+#include linux/in6.h
+#include linux/miscdevice.h
+
+#include rdma/rdma_user_cm.h
+#include rdma/ib_marshall.h
+#include rdma/rdma_cm.h
+
+MODULE_AUTHOR(Sean Hefty);
+MODULE_DESCRIPTION(RDMA Userspace Connection Manager Access);
+MODULE_LICENSE(Dual BSD/GPL);
+
+enum {
+   UCMA_MAX_BACKLOG= 128
+};
+
+struct ucma_file {
+   struct semaphoremutex;
+   struct file *filp;
+   struct list_headctxs;
+   struct list_headevents;
+   wait_queue_head_t   poll_wait;
+};
+
+struct ucma_context {
+   int id;
+   wait_queue_head_t   wait;
+   atomic_tref;
+   int events_reported;
+   int backlog;
+
+   struct ucma_file*file;
+   struct rdma_cm_id   *cm_id;
+   __u64   uid;
+
+   struct list_headevents;/* list of pending events. */
+   struct list_headfile_list; /* member in file ctx list */
+};
+
+struct ucma_event {
+   struct ucma_context *ctx;
+   struct list_headfile_list; /* member in file event list */
+   struct list_headctx_list;  /* member in ctx event list */
+   struct rdma_cm_id   *cm_id;
+   struct rdma_ucm_event_resp resp;
+};
+
+static DECLARE_MUTEX(ctx_mutex);
+static DEFINE_IDR(ctx_idr);
+
+static struct ucma_context* ucma_get_ctx(struct ucma_file *file, int id)
+{
+   struct ucma_context *ctx;
+
+   down(ctx_mutex);
+   ctx = idr_find(ctx_idr, id);
+   if (!ctx)
+   ctx = ERR_PTR(-ENOENT);
+   else if (ctx-file != file)
+   ctx = ERR_PTR(-EINVAL);
+   else
+   atomic_inc(ctx-ref);
+   up(ctx_mutex);
+
+   return ctx;
+}
+
+static void ucma_put_ctx(struct ucma_context *ctx)
+{
+   if (atomic_dec_and_test(ctx-ref))
+   wake_up(ctx-wait);
+}
+
+static void ucma_cleanup_events(struct ucma_context *ctx)
+{
+   struct ucma_event *uevent;
+
+   down(ctx-file-mutex);
+

[openib-general] RE: [PATCH 2/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Sean Hefty

 +static void cm_mask_compare_data(u8 *dst, u8 *src, u8 *mask)

static void cm_mask_compare_data(u8 *dst, const u8 *src, u8 *mask)

but I would rename it to cm_mask_copy since it doesn't really do a compare.

I'll change this.  The function is masking the data to use in the comparison,
but I can see the confusion.

 +static int cm_compare_data(struct ib_cm_private_data_compare *src_data,
 +   struct ib_cm_private_data_compare *dst_data)

static int cm_compare_data(const struct ib_cm_private_data_compare *src,
  cosnt struct ib_cm_private_data_compare *dst)
Your data type names are getting too long 

I'll fix.

Thanks for the comments.

- Sean

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] RE: [PATCH 2/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Grant Grundler

On Tue, Jan 17, 2006 at 03:24:37PM -0800, Sean Hefty wrote:
 +static void cm_mask_compare_data(u8 *dst, u8 *src, u8 *mask)
 +{
 + int i;
 +
 + for (i = 0; i  IB_CM_PRIVATE_DATA_COMPARE_SIZE; i++)
 + dst[i] = src[i]  mask[i];
 +}

Is this code going to get invoked very often?

If so, can the mask operation use a native size since
IB_CM_PRIVATE_DATA_COMPARE_SIZE is hard coded to 64 byte?

e.g something like:
for (i = 0; i  IB_CM_PRIVATE_DATA_COMPARE_SIZE/sizeof(unsigned long);
i++)
((unsigned long *)dst)[i] = ((unsigned long *)src)[i] 
 ((unsigned long *)mask)[i];

thanks,
grant

 +
 +static int cm_compare_data(struct ib_cm_private_data_compare *src_data,
 +struct ib_cm_private_data_compare *dst_data)
 +{
 + u8 src[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
 + u8 dst[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
 +
 + if (!src_data || !dst_data)
 + return 0;
 + 
 + cm_mask_compare_data(src, src_data-data, dst_data-mask);
 + cm_mask_compare_data(dst, dst_data-data, src_data-mask);
 + return memcmp(src, dst, IB_CM_PRIVATE_DATA_COMPARE_SIZE);
 +}
 +
 +static int cm_compare_private_data(u8 *private_data,
 +struct ib_cm_private_data_compare *dst_data)
 +{
 + u8 src[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
 +
 + if (!dst_data)
 + return 0;
 + 
 + cm_mask_compare_data(src, private_data, dst_data-mask);
 + return memcmp(src, dst_data-data, IB_CM_PRIVATE_DATA_COMPARE_SIZE);
 +}
 +
  static struct cm_id_private * cm_insert_listen(struct cm_id_private 
 *cm_id_priv)
  {
   struct rb_node **link = cm.listen_service_table.rb_node;
 @@ -362,14 +397,18 @@ static struct cm_id_private * cm_insert_
   struct cm_id_private *cur_cm_id_priv;
   __be64 service_id = cm_id_priv-id.service_id;
   __be64 service_mask = cm_id_priv-id.service_mask;
 + int data_cmp;
  
   while (*link) {
   parent = *link;
   cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
 service_node);
 + data_cmp = cm_compare_data(cm_id_priv-compare_data,
 +cur_cm_id_priv-compare_data);
   if ((cur_cm_id_priv-id.service_mask  service_id) ==
   (service_mask  cur_cm_id_priv-id.service_id) 
 - (cm_id_priv-id.device == cur_cm_id_priv-id.device))
 + (cm_id_priv-id.device == cur_cm_id_priv-id.device) 
 + !data_cmp)
   return cur_cm_id_priv;
  
   if (cm_id_priv-id.device  cur_cm_id_priv-id.device)
 @@ -378,6 +417,10 @@ static struct cm_id_private * cm_insert_
   link = (*link)-rb_right;
   else if (service_id  cur_cm_id_priv-id.service_id)
   link = (*link)-rb_left;
 + else if (service_id  cur_cm_id_priv-id.service_id)
 + link = (*link)-rb_right;
 + else if (data_cmp  0)
 + link = (*link)-rb_left;
   else
   link = (*link)-rb_right;
   }
 @@ -387,16 +430,20 @@ static struct cm_id_private * cm_insert_
  }
  
  static struct cm_id_private * cm_find_listen(struct ib_device *device,
 -  __be64 service_id)
 +  __be64 service_id,
 +  u8 *private_data)
  {
   struct rb_node *node = cm.listen_service_table.rb_node;
   struct cm_id_private *cm_id_priv;
 + int data_cmp;
  
   while (node) {
   cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
 + data_cmp = cm_compare_private_data(private_data,
 +cm_id_priv-compare_data);
   if ((cm_id_priv-id.service_mask  service_id) ==
cm_id_priv-id.service_id 
 - (cm_id_priv-id.device == device))
 + (cm_id_priv-id.device == device)  !data_cmp)
   return cm_id_priv;
  
   if (device  cm_id_priv-id.device)
 @@ -405,6 +452,10 @@ static struct cm_id_private * cm_find_li
   node = node-rb_right;
   else if (service_id  cm_id_priv-id.service_id)
   node = node-rb_left;
 + else if (service_id  cm_id_priv-id.service_id)
 + node = node-rb_right;
 + else if (data_cmp  0)
 + node = node-rb_left;
   else
   node = node-rb_right;
   }
 @@ -728,15 +779,14 @@ retest:
   wait_event(cm_id_priv-wait, !atomic_read(cm_id_priv-refcount));
   while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
   cm_free_work(work);
 -

[openib-general] Re: [PATCH 2/5] [RFC] Infiniband: connection abstraction

2006-01-17 Thread Arjan van de Ven


 
 flamebait
 Also should infiniband exports be EXPORT_SYMBOL_GPL, to make
 it clear that binary drivers for this are not allowed??
 /flamebait

the dual license text needs a bit of clarification I suspect to make
explicit that the or BSD part only applies when used entirely outside
the linux kernel. (that already is the case, just it's not explicit.
Making that explicit would be good).



___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

54 matches

Mail list logo