Re: [PATCH] IB/uverbs: Handle large number of entries in poll CQ

2010-12-08 Thread Roland Dreier
So I finally got around to applying this, and I ended up monkeying
around a fair bit, mostly because gcc (my version is 4.4.4) seems to end
up generating horrible code for non-constant designated initializers.
Please look this over before I send it to Linus (and Dan, if you don't
want to be author any more, let me know ;)

 - R.


In ib_uverbs_poll_cq() code there is a potential integer overflow if
userspace passes in a large cmd.ne.  The calls to kmalloc() would
allocate smaller buffers than intended, leading to memory corruption.
There iss also an information leak if resp wasn't all used.
Unprivileged userspace may call this function, although only if an
RDMA device that uses this function is present.

Fix this by copying CQ entries one at a time, which avoids the
allocation entirely, and also by moving this copying into a function
that makes sure to initialize all memory copied to userspace.

Special thanks to Jason Gunthorpe jguntho...@obsidianresearch.com
for his help and advice.

Cc: sta...@kernel.org
Signed-off-by: Dan Carpenter erro...@gmail.com

[ Monkey around with things a bit to avoid bad code generation by gcc
  when designated initializers are used.  - Roland ]

Signed-off-by: Roland Dreier rola...@cisco.com
---
 drivers/infiniband/core/uverbs_cmd.c |  101 +++---
 1 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c 
b/drivers/infiniband/core/uverbs_cmd.c
index b342248..f8c6f4e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -893,68 +893,81 @@ out:
return ret ? ret : in_len;
 }
 
+static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
+{
+   struct ib_uverbs_wc tmp;
+
+   tmp.wr_id   = wc-wr_id;
+   tmp.status  = wc-status;
+   tmp.opcode  = wc-opcode;
+   tmp.vendor_err  = wc-vendor_err;
+   tmp.byte_len= wc-byte_len;
+   tmp.ex.imm_data = (__u32 __force) wc-ex.imm_data;
+   tmp.qp_num  = wc-qp-qp_num;
+   tmp.src_qp  = wc-src_qp;
+   tmp.wc_flags= wc-wc_flags;
+   tmp.pkey_index  = wc-pkey_index;
+   tmp.slid= wc-slid;
+   tmp.sl  = wc-sl;
+   tmp.dlid_path_bits  = wc-dlid_path_bits;
+   tmp.port_num= wc-port_num;
+   tmp.reserved= wc-port_num;
+
+   if (copy_to_user(dest, tmp, sizeof tmp))
+   return -EFAULT;
+
+   return 0;
+}
+
 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
  const char __user *buf, int in_len,
  int out_len)
 {
struct ib_uverbs_poll_cq   cmd;
-   struct ib_uverbs_poll_cq_resp *resp;
+   struct ib_uverbs_poll_cq_resp  resp;
+   u8 __user *header_ptr;
+   u8 __user *data_ptr;
struct ib_cq  *cq;
-   struct ib_wc  *wc;
-   intret = 0;
-   inti;
-   intrsize;
+   struct ib_wc   wc;
+   intret;
 
if (copy_from_user(cmd, buf, sizeof cmd))
return -EFAULT;
 
-   wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
-   if (!wc)
-   return -ENOMEM;
-
-   rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
-   resp = kmalloc(rsize, GFP_KERNEL);
-   if (!resp) {
-   ret = -ENOMEM;
-   goto out_wc;
-   }
-
cq = idr_read_cq(cmd.cq_handle, file-ucontext, 0);
-   if (!cq) {
-   ret = -EINVAL;
-   goto out;
-   }
+   if (!cq)
+   return -EINVAL;
 
-   resp-count = ib_poll_cq(cq, cmd.ne, wc);
+   /* we copy a struct ib_uverbs_poll_cq_resp to user space */
+   header_ptr = (void __user *)(unsigned long) cmd.response;
+   data_ptr = header_ptr + sizeof resp;
 
-   put_cq_read(cq);
+   memset(resp, 0, sizeof resp);
+   while (resp.count  cmd.ne) {
+   ret = ib_poll_cq(cq, 1, wc);
+   if (ret  0)
+   goto out_put;
+   if (!ret)
+   break;
+
+   ret = copy_wc_to_user(data_ptr, wc);
+   if (ret)
+   goto out_put;
 
-   for (i = 0; i  resp-count; i++) {
-   resp-wc[i].wr_id  = wc[i].wr_id;
-   resp-wc[i].status = wc[i].status;
-   resp-wc[i].opcode = wc[i].opcode;
-   resp-wc[i].vendor_err = wc[i].vendor_err;
-   resp-wc[i].byte_len   = wc[i].byte_len;
-   resp-wc[i].ex.imm_data= (__u32 __force) wc[i].ex.imm_data;
-   resp-wc[i].qp_num = wc[i].qp-qp_num;
-   

Re: [PATCH] IB/uverbs: Handle large number of entries in poll CQ

2010-12-08 Thread Dan Carpenter
On Wed, Dec 08, 2010 at 03:56:09PM -0800, Roland Dreier wrote:
 + tmp.port_num= wc-port_num;
 + tmp.reserved= wc-port_num;

I would probably have put a zero in tmp.reserved.

Otherwise it looks good.

regards,
dan carpenter

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html