Re: [PATCH v2 10/15] opensm: Add opensm option to specify file name for extra torus-2QoS configuration information.

2010-06-10 Thread Sasha Khapyorsky
Hi Jim,

On 11:06 Wed 10 Mar , Jim Schutt wrote:
 
 Signed-off-by: Jim Schutt jasc...@sandia.gov
 ---
  opensm/include/opensm/osm_base.h   |   18 ++
  opensm/include/opensm/osm_subnet.h |5 +
  opensm/opensm/main.c   |9 +
  opensm/opensm/osm_subnet.c |1 +
  opensm/opensm/osm_torus.c  |2 +-

It breaks to apply at this point. It is because file
'opensm/opensm/osm_torus.c' doesn't exist in previous patches. Could you
please resend the patch series with files included? Thanks.

Sasha

  5 files changed, 34 insertions(+), 1 deletions(-)
 
 diff --git a/opensm/include/opensm/osm_base.h 
 b/opensm/include/opensm/osm_base.h
 index 4e9aaa9..8720c38 100644
 --- a/opensm/include/opensm/osm_base.h
 +++ b/opensm/include/opensm/osm_base.h
 @@ -277,6 +277,24 @@ BEGIN_C_DECLS
  #endif /* __WIN__ */
  /***/
  
 +/d* OpenSM: Base/OSM_DEFAULT_TORUS_CONF_FILE
 +* NAME
 +*OSM_DEFAULT_TORUS_CONF_FILE
 +*
 +* DESCRIPTION
 +*Specifies the default file name for extra torus-2QoS configuration
 +*
 +* SYNOPSIS
 +*/
 +#ifdef __WIN__
 +#define OSM_DEFAULT_TORUS_CONF_FILE strcat(GetOsmCachePath(), 
 osm-torus-2QoS.conf)
 +#elif defined(OPENSM_CONFIG_DIR)
 +#define OSM_DEFAULT_TORUS_CONF_FILE OPENSM_CONFIG_DIR /torus-2QoS.conf
 +#else
 +#define OSM_DEFAULT_TORUS_CONF_FILE /etc/opensm/torus-2QoS.conf
 +#endif /* __WIN__ */
 +/***/
 +
  /d* OpenSM: Base/OSM_DEFAULT_PREFIX_ROUTES_FILE
  * NAME
  *OSM_DEFAULT_PREFIX_ROUTES_FILE
 diff --git a/opensm/include/opensm/osm_subnet.h 
 b/opensm/include/opensm/osm_subnet.h
 index d74a57c..d2d9661 100644
 --- a/opensm/include/opensm/osm_subnet.h
 +++ b/opensm/include/opensm/osm_subnet.h
 @@ -201,6 +201,7 @@ typedef struct osm_subn_opt {
   char *guid_routing_order_file;
   char *sa_db_file;
   boolean_t sa_db_dump;
 + char *torus_conf_file;
   boolean_t do_mesh_analysis;
   boolean_t exit_on_fatal;
   boolean_t honor_guid2lid_file;
 @@ -418,6 +419,10 @@ typedef struct osm_subn_opt {
  *When TRUE causes OpenSM to dump SA DB at the end of every
  *light sweep regardless the current verbosity level.
  *
 +*torus_conf_file
 +*Name of the file with extra configuration info for torus-2QoS
 +*routing engine.
 +*
  *exit_on_fatal
  *If TRUE (default) - SM will exit on fatal subnet initialization
  *issues.
 diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
 index f396de4..578ae9f 100644
 --- a/opensm/opensm/main.c
 +++ b/opensm/opensm/main.c
 @@ -231,6 +231,10 @@ static void show_usage(void)
Set the order port guids will be routed for the 
 MinHop\n
and Up/Down routing algorithms to the guids provided 
 in the\n
given file (one to a line)\n\n);
 + printf(--torus_config path to file\n
 +  This option defines the file name for the extra 
 configuration\n
 +  info needed for the torus-2QoS routing engine.   The 
 default\n
 +  name is \'OSM_DEFAULT_TORUS_CONF_FILE\'\n\n);
   printf(--once, -o\n
This option causes OpenSM to configure the subnet\n
once, then exit.  Ports remain in the ACTIVE 
 state.\n\n);
 @@ -610,6 +614,7 @@ int main(int argc, char *argv[])
   {sm_sl, 1, NULL, 7},
   {retries, 1, NULL, 8},
   {log_prefix, 1, NULL, 9},
 + {torus_config, 1, NULL, 10},
   {NULL, 0, NULL, 0}  /* Required at the end of the array */
   };
  
 @@ -992,6 +997,10 @@ int main(int argc, char *argv[])
   SET_STR_OPT(opt.log_prefix, optarg);
   printf(Log prefix = %s\n, opt.log_prefix);
   break;
 + case 10:
 + SET_STR_OPT(opt.torus_conf_file, optarg);
 + printf(Torus-2QoS config file = %s\n, 
 opt.torus_conf_file);
 + break;
   case 'h':
   case '?':
   case ':':
 diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
 index 55b9384..47aa529 100644
 --- a/opensm/opensm/osm_subnet.c
 +++ b/opensm/opensm/osm_subnet.c
 @@ -758,6 +758,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt)
   p_opt-guid_routing_order_file = NULL;
   p_opt-sa_db_file = NULL;
   p_opt-sa_db_dump = FALSE;
 + p_opt-torus_conf_file = strdup(OSM_DEFAULT_TORUS_CONF_FILE);
   p_opt-do_mesh_analysis = FALSE;
   p_opt-exit_on_fatal = TRUE;
   p_opt-enable_quirks = FALSE;
 diff --git a/opensm/opensm/osm_torus.c b/opensm/opensm/osm_torus.c
 index 7f80034..7c3b550 100644
 --- a/opensm/opensm/osm_torus.c
 +++ b/opensm/opensm/osm_torus.c
 @@ -9043,7 +9043,7 @@ int torus_build_lfts(void *context)
   torus-osm = ctx-osm;
   fabric-osm = ctx-osm;
  
 - if 

[PATCH v3] opensm/osmeventplugin: added new events to monitor SM

2010-06-10 Thread Yevgeny Kliteynik
Hi Sasha,

Adding new events that allow event plug-in to see
when SM finishes heavy sweep and routing configuration,
when it updates dump files, when it is no longer master,
and when SM port is down:

  OSM_EVENT_ID_HEAVY_SWEEP_DONE
  OSM_EVENT_ID_UCAST_ROUTING_DONE
  OSM_EVENT_ID_ENTERING_STANDBY
  OSM_EVENT_ID_SM_PORT_DOWN
  OSM_EVENT_ID_SA_DB_DUMPED

The last event is reported when SA DB is actually dumped.

Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il
---

Changes from V2:
  - reduced number of events that are reported
  - rebased to latest master

---
 opensm/include/opensm/osm_event_plugin.h   |7 ++-
 opensm/opensm/osm_state_mgr.c  |   16 +++-
 opensm/osmeventplugin/src/osmeventplugin.c |   15 +++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/opensm/include/opensm/osm_event_plugin.h 
b/opensm/include/opensm/osm_event_plugin.h
index 33d1920..a565123 100644
--- a/opensm/include/opensm/osm_event_plugin.h
+++ b/opensm/include/opensm/osm_event_plugin.h
@@ -72,7 +72,12 @@ typedef enum {
OSM_EVENT_ID_PORT_SELECT,
OSM_EVENT_ID_TRAP,
OSM_EVENT_ID_SUBNET_UP,
-   OSM_EVENT_ID_MAX
+   OSM_EVENT_ID_MAX,
+   OSM_EVENT_ID_HEAVY_SWEEP_DONE,
+   OSM_EVENT_ID_UCAST_ROUTING_DONE,
+   OSM_EVENT_ID_ENTERING_STANDBY,
+   OSM_EVENT_ID_SM_PORT_DOWN,
+   OSM_EVENT_ID_SA_DB_DUMPED
 } osm_epi_event_id_t;

 typedef struct osm_epi_port_id {
diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
index 81c8f54..3231ae9 100644
--- a/opensm/opensm/osm_state_mgr.c
+++ b/opensm/opensm/osm_state_mgr.c
@@ -1151,6 +1151,8 @@ static void do_sweep(osm_sm_t * sm)
if (!sm-p_subn-subnet_initialization_error) {
OSM_LOG_MSG_BOX(sm-p_log, OSM_LOG_VERBOSE,
REROUTE COMPLETE);
+   osm_opensm_report_event(sm-p_subn-p_osm,
+   OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);
return;
}
}
@@ -1185,6 +1187,8 @@ repeat_discovery:

/* Move to DISCOVERING state */
osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER);
+   osm_opensm_report_event(sm-p_subn-p_osm,
+   OSM_EVENT_ID_SM_PORT_DOWN, NULL);
return;
}

@@ -1205,6 +1209,8 @@ repeat_discovery:
ENTERING STANDBY STATE);
/* notify master SM about us */
osm_send_trap144(sm, 0);
+   osm_opensm_report_event(sm-p_subn-p_osm,
+   OSM_EVENT_ID_ENTERING_STANDBY, NULL);
return;
}

@@ -1212,6 +1218,9 @@ repeat_discovery:
if (sm-p_subn-force_heavy_sweep)
goto repeat_discovery;

+   osm_opensm_report_event(sm-p_subn-p_osm,
+   OSM_EVENT_ID_HEAVY_SWEEP_DONE, NULL);
+
OSM_LOG_MSG_BOX(sm-p_log, OSM_LOG_VERBOSE, HEAVY SWEEP COMPLETE);

/* If we are MASTER - get the highest remote_sm, and
@@ -1314,6 +1323,8 @@ repeat_discovery:

OSM_LOG_MSG_BOX(sm-p_log, OSM_LOG_VERBOSE,
SWITCHES CONFIGURED FOR UNICAST);
+   osm_opensm_report_event(sm-p_subn-p_osm,
+   OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);

if (!sm-p_subn-opt.disable_multicast) {
osm_mcast_mgr_process(sm);
@@ -1375,7 +1386,10 @@ repeat_discovery:

if (osm_log_is_active(sm-p_log, OSM_LOG_VERBOSE) ||
sm-p_subn-opt.sa_db_dump)
-   osm_sa_db_file_dump(sm-p_subn-p_osm);
+   if (!osm_sa_db_file_dump(sm-p_subn-p_osm))
+   osm_opensm_report_event(sm-p_subn-p_osm,
+   OSM_EVENT_ID_SA_DB_DUMPED, NULL);
+
}

/*
diff --git a/opensm/osmeventplugin/src/osmeventplugin.c 
b/opensm/osmeventplugin/src/osmeventplugin.c
index b4d9ce9..af68a5c 100644
--- a/opensm/osmeventplugin/src/osmeventplugin.c
+++ b/opensm/osmeventplugin/src/osmeventplugin.c
@@ -176,6 +176,21 @@ static void report(void *_log, osm_epi_event_id_t 
event_id, void *event_data)
case OSM_EVENT_ID_SUBNET_UP:
fprintf(log-log_file, Subnet up reported\n);
break;
+   case OSM_EVENT_ID_HEAVY_SWEEP_DONE:
+   fprintf(log-log_file, Heavy sweep completed\n);
+   break;
+   case OSM_EVENT_ID_UCAST_ROUTING_DONE:
+   fprintf(log-log_file, Unicast routing completed\n);
+   break;
+   case OSM_EVENT_ID_ENTERING_STANDBY:
+   fprintf(log-log_file, Entering stand-by state\n);
+   break;
+   case OSM_EVENT_ID_SM_PORT_DOWN:
+   fprintf(log-log_file, SM port is down\n);
+   break;
+   case OSM_EVENT_ID_SA_DB_DUMPED:
+   fprintf(log-log_file, SA 

[Patch v2] opensm/main.c: force stdout to be line-buffered

2010-06-10 Thread Yevgeny Kliteynik
When stdout is assigned to a terminal, it is line-buffered.
But when opensm's stdout is redirected to a file, stdout
becomes block-buffered, which means that '\n' won't cause
the buffer to be flushed.

Forcing stdout to always be line-buffered and to have a
more predictable behavior when used as opensm  some_file.

Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il
---

Changes since V1:
  - replacing setlinebuf() with an ANSI C compliant setvbuf()
  - Note: similar patch for ibv_asyncwatch was accepted by Roland:

  http://www.mail-archive.com/linux-rdma@vger.kernel.org/msg04161.html

 opensm/opensm/main.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
index 0093aa7..6e6c733 100644
--- a/opensm/opensm/main.c
+++ b/opensm/opensm/main.c
@@ -618,6 +618,9 @@ int main(int argc, char *argv[])
{NULL, 0, NULL, 0}  /* Required at the end of the array */
};

+   /* force stdout to be line-buffered */
+   setvbuf(stdout, NULL, _IOLBF, 0);
+
/* Make sure that the opensm and complib were compiled using
   same modes (debug/free) */
if (osm_is_debug() != cl_is_debug()) {
-- 
1.5.1.4


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH resend] opensm/osm_sa_path_record.c: adding wrapper for pr_rcv_get_path_parms()

2010-06-10 Thread Yevgeny Kliteynik
Adding non-static wrapper function for pr_rcv_get_path_parms()
function to enable calling path record calculation function from
outside this file.

Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il

---
 opensm/opensm/osm_sa_path_record.c |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_sa_path_record.c 
b/opensm/opensm/osm_sa_path_record.c
index f0d7ca2..2897c7b 100644
--- a/opensm/opensm/osm_sa_path_record.c
+++ b/opensm/opensm/osm_sa_path_record.c
@@ -764,6 +764,18 @@ Exit:
return status;
 }

+ib_api_status_t osm_get_path_params(IN osm_sa_t * sa,
+   IN const osm_port_t * p_src_port,
+   IN const osm_port_t * p_dest_port,
+   IN const uint16_t dlid_ho,
+   OUT osm_path_parms_t * p_parms)
+{
+   ib_path_rec_t pr;
+   memset(pr, 0, sizeof(ib_path_rec_t));
+   return pr_rcv_get_path_parms(sa, pr,
+   p_src_port, p_dest_port, dlid_ho, 0, p_parms);
+}
+
 static void pr_rcv_build_pr(IN osm_sa_t * sa, IN const osm_port_t * p_src_port,
IN const osm_port_t * p_dest_port,
IN const ib_gid_t * p_dgid,
-- 
1.5.1.4


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 10/15] opensm: Add opensm option to specify file name for extra torus-2QoS configuration information.

2010-06-10 Thread Jim Schutt


Hi Sasha,

Thanks for taking a look at this.


On Thu, 2010-06-10 at 05:25 -0600, Sasha Khapyorsky wrote:
 Hi Jim,
 
 On 11:06 Wed 10 Mar , Jim Schutt wrote:
  
  Signed-off-by: Jim Schutt jasc...@sandia.gov
  ---
   opensm/include/opensm/osm_base.h   |   18 ++
   opensm/include/opensm/osm_subnet.h |5 +
   opensm/opensm/main.c   |9 +
   opensm/opensm/osm_subnet.c |1 +
   opensm/opensm/osm_torus.c  |2 +-
 
 It breaks to apply at this point. It is because file
 'opensm/opensm/osm_torus.c' doesn't exist in previous patches. Could
 you
 please resend the patch series with files included? Thanks.

So 7/15 has the patch that adds osm_torus.c as a compressed attachment,
because the patch is so big.

I sent it that way because I was afraid it would otherwise be
rejected by vger.

So you want me to resend with that big patch inline?

Also, I have accumulated a few bug fixes to torus-2QoS
that I haven't posted yet.  I can

1) repost the patch series with no attachments, and
   add the bugfix patches at the end of series
2) repost a v3 patchset with these fixes merged.
3) do something else that you prefer.

Let me know?

-- Jim

 
 Sasha
 


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] return no path when path does not exist

2010-06-10 Thread Eli Dorfman (Voltaire)

return OSM_NO_PATH (instead of port num) when path does not exists.
this will also be reported as error in the log.

Signed-off-by: Eli Dorfman e...@voltaire.com
---
 opensm/opensm/osm_switch.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c
index 311c4f7..b621852 100644
--- a/opensm/opensm/osm_switch.c
+++ b/opensm/opensm/osm_switch.c
@@ -628,6 +628,8 @@ uint8_t osm_switch_recommend_mcast_path(IN osm_switch_t * 
p_sw,
   a black hole that will destroy the Earth in a firey conflagration.
 */
least_hops = osm_switch_get_least_hops(p_sw, base_lid);
+   if (least_hops == OSM_NO_PATH)
+   return OSM_NO_PATH;
for (port_num = 1; port_num  num_ports; port_num++)
if (osm_switch_get_hop_count(p_sw, base_lid, port_num) ==
least_hops)
-- 
1.5.5

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


converting ib-mgmt scripts to executables

2010-06-10 Thread Hefty, Sean
Sasha,

Before we do the work, would there be any issue converting a couple of the 
ib-diag scripts to executables?  Specifically, we'd like to have ibchecknet and 
ibcheckerrors functionality available on Windows.

- Sean
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH/RFC] mlx4_core: module param to limit msix vec allocation

2010-06-10 Thread Arthur Kepner

The mlx4_core driver allocates 'nreq' msix vectors (and irqs), 
where:

  nreq = min_t(int, dev-caps.num_eqs - dev-caps.reserved_eqs,
   num_possible_cpus() + 1);

ConnectX HCAs support 512 event queues (4 reserved). On a system 
with enough processors, we get:

  mlx4_core 0006:01:00.0: Requested 508 vectors, but only 256 MSI-X vectors 
available, trying again

Further attempts (by other drivers) to allocate interrupts fail, 
because mlx4_core got 'em all.

How about this?

Signed-off-by: Arthur Kepner akep...@sgi.com

---

 main.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index e3e0d54..0a316d0 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -68,6 +68,10 @@ static int msi_x = 1;
 module_param(msi_x, int, 0444);
 MODULE_PARM_DESC(msi_x, attempt to use MSI-X if nonzero);
 
+static int max_msi_x_vec = 64;
+module_param(max_msi_x_vec, int, 0444);
+MODULE_PARM_DESC(max_msi_x_vec, max MSI-X vectors we'll attempt to allocate);
+
 #else /* CONFIG_PCI_MSI */
 
 #define msi_x (0)
@@ -968,8 +972,10 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
int i;
 
if (msi_x) {
+   nreq = min_t(int, num_possible_cpus() + 1, max_msi_x_vec);
nreq = min_t(int, dev-caps.num_eqs - dev-caps.reserved_eqs,
-num_possible_cpus() + 1);
+nreq);
+
entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
if (!entries)
goto no_msi;
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/19 V4] mlx4 SRIOV support

2010-06-10 Thread Yevgeny Petrilin
Hello Roland,
This is another round of SRIOV support patches for mlx4.
Currently the support is for the mlx4_core and mlx4_en modules.
The main changes from previous round are:
1. Events on the Communication channel, no more polling.
2. Steering mechanism change for Ethernet traffic.

Thanks,
Yevgeny
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/19 V4] mlx4_core: identify function as pf or vf

2010-06-10 Thread Yevgeny Petrilin
Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 include/linux/mlx4/device.h |   19 +++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7a7f9c1..84de4a6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -42,6 +42,9 @@
 enum {
MLX4_FLAG_MSI_X = 1  0,
MLX4_FLAG_OLD_PORT_CMDS = 1  1,
+   MLX4_FLAG_MASTER= 1  2,
+   MLX4_FLAG_SLAVE = 1  3,
+   MLX4_FLAG_SRIOV = 1  4,
 };
 
 enum {
@@ -376,6 +379,7 @@ struct mlx4_av {
 struct mlx4_dev {
struct pci_dev *pdev;
unsigned long   flags;
+   unsigned long   num_slaves;
struct mlx4_capscaps;
struct radix_tree_root  qp_table_tree;
u32 rev_id;
@@ -401,6 +405,21 @@ struct mlx4_init_port_param {
if (((type) == MLX4_PORT_TYPE_IB ? (dev)-caps.port_mask : \
 ~(dev)-caps.port_mask)  1  ((port) - 1))
 
+static inline int mlx4_is_slave(struct mlx4_dev *dev)
+{
+   return dev-flags  MLX4_FLAG_SLAVE;
+}
+
+static inline int mlx4_is_master(struct mlx4_dev *dev)
+{
+   return dev-flags  MLX4_FLAG_MASTER;
+}
+
+static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
+{
+   return dev-flags  (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE);
+}
+
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
   struct mlx4_buf *buf);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/19 V4] mlx4_core: add multi-function communication channel

2010-06-10 Thread Yevgeny Petrilin
The communication channel consists of 2 registers per vf (a slave function)
that are shared with the pf (the master function), as well as a new command for
inter-function memory copying (only exposed to the master).

The communication channel is used to establish a Virtual HCA Command Register
(vHCR) in each slave function, which allows it to pass FW commands to the master
function for execution.
The slave also uses the vHCR to pull slave-specific events from the master.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c  |  746 ++-
 drivers/net/mlx4/en_port.h  |5 -
 drivers/net/mlx4/eq.c   |   89 +-
 drivers/net/mlx4/fw.c   |8 +
 drivers/net/mlx4/main.c |   17 +-
 drivers/net/mlx4/mlx4.h |   72 -
 include/linux/mlx4/cmd.h|   12 +-
 include/linux/mlx4/device.h |3 +-
 8 files changed, 921 insertions(+), 31 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 23cee7b..672e13b 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -141,6 +141,46 @@ static int mlx4_status_to_errno(u8 status)
return trans_table[status];
 }
 
+static int comm_pending(struct mlx4_dev *dev)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   u32 status = readl(priv-mfunc.comm-slave_read);
+
+   return (swab32(status)  30) != priv-cmd.comm_toggle;
+}
+
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long 
timeout)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   unsigned long end;
+   u32 val;
+
+   /* First, verify that the master reports correct status */
+   if (comm_pending(dev)) {
+   mlx4_warn(dev, Communication channel is not idle\n);
+   return -EAGAIN;
+   }
+
+   /* Write command */
+   if (cmd == MLX4_COMM_CMD_RESET)
+   priv-cmd.comm_toggle = 0;
+   else if (++priv-cmd.comm_toggle  2)
+   priv-cmd.comm_toggle = 1;
+   val = param | (cmd  16) | (priv-cmd.comm_toggle  30);
+   __raw_writel((__force u32) cpu_to_be32(val), 
priv-mfunc.comm-slave_write);
+   wmb();
+
+   end = msecs_to_jiffies(timeout) + jiffies;
+   while (comm_pending(dev)  time_before(jiffies, end))
+   cond_resched();
+
+   if (comm_pending(dev)) {
+   mlx4_warn(dev, Communication channel timed out\n);
+   return -ETIMEDOUT;
+   }
+   return 0;
+}
+
 static int cmd_pending(struct mlx4_dev *dev)
 {
u32 status = readl(mlx4_priv(dev)-cmd.hcr + HCR_STATUS_OFFSET);
@@ -208,6 +248,33 @@ out:
return ret;
 }
 
+static int mlx4_slave_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 
*out_param,
+int out_is_imm, u32 in_modifier, u8 op_modifier,
+u16 op, unsigned long timeout)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   struct mlx4_vhcr *vhcr = priv-mfunc.vhcr;
+   int ret;
+
+   down(priv-cmd.poll_sem);
+   vhcr-in_param = in_param;
+   vhcr-out_param = out_param ? *out_param : 0;
+   vhcr-in_modifier = in_modifier;
+   vhcr-timeout = timeout;
+   vhcr-op = op;
+   vhcr-token = CMD_POLL_TOKEN;
+   vhcr-op_modifier = op_modifier;
+   vhcr-errno = 0;
+   ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, MLX4_COMM_TIME + 
timeout);
+   if (!ret) {
+   if (out_is_imm)
+   *out_param = vhcr-out_param;
+   ret = vhcr-errno;
+   }
+   up(priv-cmd.poll_sem);
+   return ret;
+}
+
 static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 int out_is_imm, u32 in_modifier, u8 op_modifier,
 u16 op, unsigned long timeout)
@@ -315,12 +382,646 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 
*out_param,
if (mlx4_priv(dev)-cmd.use_events)
return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
 in_modifier, op_modifier, op, timeout);
+
+   if (mlx4_is_slave(dev))
+   return mlx4_slave_cmd_poll(dev, in_param, out_param, out_is_imm,
+in_modifier, op_modifier, op, timeout);
else
return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm,
 in_modifier, op_modifier, op, timeout);
 }
 EXPORT_SYMBOL_GPL(__mlx4_cmd);
 
+static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
+  int slave, u64 slave_addr,
+  int size, int is_read)
+{
+   u64 in_param;
+   u64 out_param;
+
+   if ((slave_addr  0xfff) | (master_addr  0xfff) |
+   (slave  ~0x7f) | (size  0xff)) {
+   mlx4_err(dev, Bad access mem params - slave_addr:0x%llx 
+ master_addr:0x%llx 

[PATCH 03/19 V4] mlx4_core: add WRITE_MTT support

2010-06-10 Thread Yevgeny Petrilin
Used by vfs to modify mtts, since they cannot access in-memory mtts directly.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c  |   10 +-
 drivers/net/mlx4/mlx4.h |5 +++
 drivers/net/mlx4/mr.c   |   67 ++
 3 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 672e13b..eac3b21 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -37,8 +37,6 @@
 #include linux/pci.h
 #include linux/errno.h
 
-#include linux/mlx4/cmd.h
-
 #include asm/io.h
 
 #include mlx4.h
@@ -498,6 +496,14 @@ static struct mlx4_cmd_info {
.wrapper = NULL
},
{
+   .opcode = MLX4_CMD_WRITE_MTT,
+   .has_inbox = true,
+   .has_outbox = false,
+   .out_is_imm = false,
+   .verify = NULL, /* need verifier */
+   .wrapper = mlx4_WRITE_MTT_wrapper
+   },
+   {
.opcode = MLX4_CMD_SYNC_TPT,
.has_inbox = true,
.has_outbox = false,
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index fac5d6e..71b191e 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -46,6 +46,7 @@
 #include linux/mlx4/device.h
 #include linux/mlx4/driver.h
 #include linux/mlx4/doorbell.h
+#include linux/mlx4/cmd.h
 
 #define DRV_NAME   mlx4_core
 #define PFXDRV_NAME : 
@@ -420,6 +421,10 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
 
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr 
*vhcr,
+struct mlx4_cmd_mailbox *inbox,
+struct mlx4_cmd_mailbox 
*outbox);
+
 void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 void mlx4_catas_init(void);
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 3dc69be..67c0539 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -263,6 +263,35 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct 
mlx4_cmd_mailbox *mailbox
!mailbox, MLX4_CMD_HW2SW_MPT, 
MLX4_CMD_TIME_CLASS_B);
 }
 
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr 
*vhcr,
+struct mlx4_cmd_mailbox *inbox,
+struct mlx4_cmd_mailbox 
*outbox)
+{
+   struct mlx4_mtt mtt;
+   u64 *page_list = inbox-buf;
+   int i;
+
+   /* Call the SW implementation of write_mtt:
+* - Prepare a dummy mtt struct
+* - Translate inbox contents to simple addresses in host endianess */
+   mtt.first_seg = 0;
+   mtt.order = 0;
+   mtt.page_shift = 0;
+   for (i = 0; i  vhcr-in_modifier; ++i)
+   page_list[i + 2] = be64_to_cpu(page_list[i + 2])  ~1ULL;
+   vhcr-errno = mlx4_write_mtt(dev, mtt, be64_to_cpu(page_list[0]),
+   vhcr-in_modifier,
+   page_list + 2);
+   return 0;
+}
+
+static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox 
*mailbox,
+ int num_entries)
+{
+   return mlx4_cmd(dev, mailbox-dma, num_entries, 0, MLX4_CMD_WRITE_MTT,
+   MLX4_CMD_TIME_CLASS_A);
+}
+
 int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
  int npages, int page_shift, struct mlx4_mr *mr)
 {
@@ -414,24 +443,50 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, 
struct mlx4_mtt *mtt,
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
   int start_index, int npages, u64 *page_list)
 {
+   struct mlx4_cmd_mailbox *mailbox = NULL;
int chunk;
-   int err;
+   int err = 0;
+   __be64 *inbox = NULL;
+   int i;
 
if (mtt-order  0)
return -EINVAL;
 
+   if (mlx4_is_slave(dev)) {
+   mailbox = mlx4_alloc_cmd_mailbox(dev);
+   if (IS_ERR(mailbox))
+   return PTR_ERR(mailbox);
+   inbox = mailbox-buf;
+   }
+
while (npages  0) {
-   chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
-   err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, 
page_list);
+   if (mlx4_is_slave(dev)) {
+   int s = mtt-first_seg * dev-caps.mtts_per_seg + 
start_index;
+   chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 
dev-caps.mtts_per_seg, npages);
+   if (s / (PAGE_SIZE / sizeof (u64)) !=
+   (s + chunk - 1) / (PAGE_SIZE / sizeof (u64)))
+  

[PATCH 04/19 V4] mlx4_core: add slave resource allocation

2010-06-10 Thread Yevgeny Petrilin
All QPs/CQs/SRQs/MPTs/MTTs are allocated from shared pools, which are owned by
the master. In addition, all backing ICM memory for these objects is managed by
the master.
To allow slaves to allocate resources, ICM allocation is separated from the rest
of the object state, which is held at the slave.
Slaves can then reserve resource ranges and allocate ICM over the comm channel.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c   |  110 +
 drivers/net/mlx4/cq.c|   91 +---
 drivers/net/mlx4/mlx4.h  |   27 
 drivers/net/mlx4/mr.c|  125 ++
 drivers/net/mlx4/qp.c|  151 +-
 drivers/net/mlx4/srq.c   |   88 ---
 include/linux/mlx4/cmd.h |2 +
 7 files changed, 496 insertions(+), 98 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index eac3b21..3c95da7 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -418,6 +418,100 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 
master_addr,
   MLX4_CMD_TIME_CLASS_A);
 }
 
+static int mlx4_RESOURCE_wrapper(struct mlx4_dev *dev, int slave, struct 
mlx4_vhcr *vhcr,
+  struct mlx4_cmd_mailbox 
*inbox,
+  struct mlx4_cmd_mailbox 
*outbox)
+{
+   u32 param1 = *((u32 *) vhcr-in_param);
+   u32 param2 = *(((u32 *) vhcr-in_param) + 1);
+   int ret;
+
+#if 0
+   char *res[] = {QP, CQ, SRQ, MPT, MTT};
+   mlx4_warn(dev, resource wrapper - %s (mode: %s) type:%s param1:%d 
param2:%d\n,
+   vhcr-op == MLX4_CMD_ALLOC_RES ? allocate : free,
+   vhcr-op_modifier == ICM_RESERVE ? reserve :
+   (vhcr-op_modifier == ICM_ALLOC ? alloc : 
reserve+alloc),
+   res[vhcr-in_modifier], param1, param2);
+#endif
+
+   vhcr-errno = 0;
+   switch (vhcr-in_modifier) {
+   case RES_QP:
+   switch (vhcr-op_modifier) {
+   case ICM_RESERVE:
+   if (vhcr-op == MLX4_CMD_ALLOC_RES) {
+   vhcr-errno = mlx4_qp_reserve_range(dev, 
param1, param2, ret);
+   if (!vhcr-errno)
+   vhcr-out_param = ret;
+   } else {
+   mlx4_qp_release_range(dev, param1, param2);
+   }
+   break;
+   case ICM_ALLOC:
+   if (vhcr-op == MLX4_CMD_ALLOC_RES)
+   vhcr-errno = mlx4_qp_alloc_icm(dev, param1);
+   else
+   mlx4_qp_free_icm(dev, param1);
+   break;
+   default:
+   vhcr-errno = -EINVAL;
+   }
+   break;
+   case RES_CQ:
+   if (vhcr-op == MLX4_CMD_ALLOC_RES) {
+   vhcr-errno = mlx4_cq_alloc_icm(dev, ret);
+   if (!vhcr-errno)
+   vhcr-out_param = ret;
+   } else
+   mlx4_cq_free_icm(dev, param1);
+   break;
+   case RES_SRQ:
+   if (vhcr-op == MLX4_CMD_ALLOC_RES) {
+   vhcr-errno = mlx4_srq_alloc_icm(dev, ret);
+   if (!vhcr-errno)
+   vhcr-out_param = ret;
+   } else
+   mlx4_srq_free_icm(dev, param1);
+   break;
+   case RES_MPT:
+   switch (vhcr-op_modifier) {
+   case ICM_RESERVE:
+   if (vhcr-op == MLX4_CMD_ALLOC_RES) {
+   ret = mlx4_mr_reserve(dev);
+   if (ret == -1)
+   vhcr-errno = -ENOMEM;
+   else
+   vhcr-out_param = ret;
+   } else
+   mlx4_mr_release(dev, param1);
+   break;
+   case ICM_ALLOC:
+   if (vhcr-op == MLX4_CMD_ALLOC_RES)
+   vhcr-errno = mlx4_mr_alloc_icm(dev, param1);
+   else
+   mlx4_mr_free_icm(dev, param1);
+   break;
+   default:
+   vhcr-errno = -EINVAL;
+   }
+   break;
+   case RES_MTT:
+   if (vhcr-op == MLX4_CMD_ALLOC_RES) {
+   ret = mlx4_alloc_mtt_range(dev, param1 /* order */);
+   if (ret == -1)
+   vhcr-errno = -ENOMEM;
+

[PATCH 05/19 V4] mlx4_core: add port para-virtualization

2010-06-10 Thread Yevgeny Petrilin
Ports are a shared resource among functions, so special behavior is needed here:
- Bring up ports if at least one function has done so.
- Bring down ports if all functions have done so.
- Aggregate IB port capabilities
- Set max mtu among for Eth port
- Ensure steering is not broken for Eth ports.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c   |   58 +++
 drivers/net/mlx4/en_netdev.c |   32 +++---
 drivers/net/mlx4/en_port.c   |1 +
 drivers/net/mlx4/en_port.h   |   12 ++-
 drivers/net/mlx4/en_rx.c |   11 +--
 drivers/net/mlx4/fw.c|   79 ++-
 drivers/net/mlx4/main.c  |   22 +++--
 drivers/net/mlx4/mlx4.h  |   25 +-
 drivers/net/mlx4/port.c  |  220 +
 include/linux/mlx4/cmd.h |2 +
 include/linux/mlx4/device.h  |5 +-
 11 files changed, 399 insertions(+), 68 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 3c95da7..6637d5a 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -506,6 +506,24 @@ static int mlx4_RESOURCE_wrapper(struct mlx4_dev *dev, int 
slave, struct mlx4_vh
} else
mlx4_free_mtt_range(dev, param1 /* first */, param2 /* 
order */);
break;
+   case RES_MAC:
+   switch (vhcr-op) {
+   case MLX4_CMD_ALLOC_RES:
+   ret = mlx4_register_mac(dev, vhcr-op_modifier,
+   vhcr-in_param, (int *) 
vhcr-out_param);
+   vhcr-errno = ret;
+   break;
+   case MLX4_CMD_FREE_RES:
+   mlx4_unregister_mac(dev, vhcr-op_modifier, 
vhcr-in_param);
+   break;
+   case MLX4_CMD_REPLACE_RES:
+   ret = mlx4_replace_mac(dev, vhcr-op_modifier,
+  vhcr-out_param, vhcr-in_param);
+   vhcr-errno = ret;
+   break;
+   default:
+   vhcr-errno = -EINVAL;
+   }
default:
vhcr-errno = -EINVAL;
}
@@ -541,6 +559,38 @@ static struct mlx4_cmd_info {
},
 
{
+   .opcode = MLX4_CMD_INIT_PORT,
+   .has_inbox = false,
+   .has_outbox = false,
+   .out_is_imm = false,
+   .verify = NULL,
+   .wrapper = mlx4_INIT_PORT_wrapper},
+   {
+   .opcode = MLX4_CMD_CLOSE_PORT,
+   .has_inbox = false,
+   .has_outbox = false,
+   .out_is_imm  = false,
+   .verify = NULL,
+   .wrapper = mlx4_CLOSE_PORT_wrapper
+   },
+   {
+   .opcode = MLX4_CMD_QUERY_PORT,
+   .has_inbox = false,
+   .has_outbox = true,
+   .out_is_imm = false,
+   .verify = NULL,
+   .wrapper = mlx4_QUERY_PORT_wrapper
+   },
+   {
+   .opcode = MLX4_CMD_SET_PORT,
+   .has_inbox = true,
+   .has_outbox = false,
+   .out_is_imm = false,
+   .verify = NULL,
+   .wrapper = mlx4_SET_PORT_wrapper
+   },
+
+   {
.opcode = MLX4_CMD_SW2HW_EQ,
.has_inbox = true,
.has_outbox = false,
@@ -574,6 +624,14 @@ static struct mlx4_cmd_info {
},
 
{
+   .opcode = MLX4_CMD_REPLACE_RES,
+   .has_inbox = false,
+   .has_outbox = false,
+   .out_is_imm = true,
+   .verify = NULL,
+   .wrapper = mlx4_RESOURCE_wrapper
+   },
+   {
.opcode = MLX4_CMD_SW2HW_MPT,
.has_inbox = true,
.has_outbox = false,
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 96180c0..7389fa2 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -146,9 +146,8 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
mutex_lock(mdev-state_lock);
if (priv-port_up) {
/* Remove old MAC and insert the new one */
-   mlx4_unregister_mac(mdev-dev, priv-port, priv-mac_index);
-   err = mlx4_register_mac(mdev-dev, priv-port,
-   priv-mac, priv-mac_index);
+   err = mlx4_replace_mac(mdev-dev, priv-port,
+  priv-base_qpn, priv-mac);
if (err)
en_err(priv, Failed changing HW MAC address\n);
} else
@@ -589,10 +588,19 @@ int mlx4_en_start_port(struct net_device *dev)
++rx_index;
}
 
+   /* Set port mac number */
+   en_dbg(DRV, priv, Setting mac for port %d\n, priv-port);
+   err = 

[PATCH 06/19 V4] mlx4_core: dispatch slave asynch events

2010-06-10 Thread Yevgeny Petrilin
Affiliated and unaffiliated asynch events are handled by a single EQ owned by
the master. A per-slave SW event queue is added to log and dispatch both 
slave-specific
events and events that apply to all slaves.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c  |   12 ++-
 drivers/net/mlx4/eq.c   |   92 +++---
 drivers/net/mlx4/mlx4.h |8 
 3 files changed, 105 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 6637d5a..ce467ca 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -622,6 +622,14 @@ static struct mlx4_cmd_info {
.verify = NULL,
.wrapper = mlx4_RESOURCE_wrapper
},
+   {
+   .opcode = MLX4_CMD_GET_EVENT,
+   .has_inbox = false,
+   .has_outbox = false,
+   .out_is_imm = true,
+   .verify = NULL,
+   .wrapper = mlx4_GET_EVENT_wrapper
+   },
 
{
.opcode = MLX4_CMD_REPLACE_RES,
@@ -1168,8 +1176,10 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
if (!priv-mfunc.master.slave_state)
goto err_comm;
 
-   for (i = 0; i  dev-num_slaves; ++i)
+   for (i = 0; i  dev-num_slaves; ++i) {
priv-mfunc.master.slave_state[i].last_cmd = 
MLX4_COMM_CMD_RESET;
+   spin_lock_init(priv-mfunc.master.slave_state[i].lock);
+   }
 
INIT_DELAYED_WORK(priv-mfunc.comm_work, 
mlx4_master_poll_comm);
priv-mfunc.comm_wq = 
create_singlethread_workqueue(mlx4_comm);
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 3c1aa18..e0cba6f 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -161,6 +161,61 @@ static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
return !!(eqe-owner  0x80) ^ !!(eq-cons_index  eq-nent) ? NULL : 
eqe;
 }
 
+void mlx4_slave_event(struct mlx4_dev *dev, int slave, u8 type, u8 port, u32 
param)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   struct mlx4_slave_state *ctx = priv-mfunc.master.slave_state[slave];
+   unsigned long flags;
+
+   if (ctx-last_cmd != MLX4_COMM_CMD_VHCR_POST) {
+   mlx4_warn(dev, received event for inactive slave:%d\n, slave);
+   return;
+   }
+
+   /* Unconditionally add the new event - during overflows, we drop the
+* oldest events */
+   spin_lock_irqsave(ctx-lock, flags);
+   ctx-eq[ctx-eq_pi  MLX4_MFUNC_EQE_MASK].type = type;
+   ctx-eq[ctx-eq_pi  MLX4_MFUNC_EQE_MASK].port = port;
+   ctx-eq[ctx-eq_pi  MLX4_MFUNC_EQE_MASK].param = param;
+   ++ctx-eq_pi;
+   spin_unlock_irqrestore(ctx-lock, flags);
+}
+
+static void mlx4_slave_event_all(struct mlx4_dev *dev, u8 type, u8 port, u32 
param)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   int i;
+
+   for (i = 0; i  dev-num_slaves; ++i)
+   if (priv-mfunc.master.slave_state[i].last_cmd == 
MLX4_COMM_CMD_VHCR_POST)
+   mlx4_slave_event(dev, i, type, port, param);
+}
+
+int mlx4_GET_EVENT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr 
*vhcr,
+struct mlx4_cmd_mailbox *inbox,
+struct mlx4_cmd_mailbox 
*outbox)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   struct mlx4_slave_state *ctx = priv-mfunc.master.slave_state[slave];
+   unsigned long flags;
+
+   spin_lock_irqsave(ctx-lock, flags);
+   if (ctx-eq_ci == ctx-eq_pi) {
+   vhcr-out_param = MLX4_EVENT_TYPE_NONE;
+   } else if ((u16) (ctx-eq_pi - ctx-eq_ci)  MLX4_MFUNC_MAX_EQES) {
+   ctx-eq_ci = ctx-eq_pi - MLX4_MFUNC_MAX_EQES;
+   vhcr-out_param = MLX4_EVENT_TYPE_EQ_OVERFLOW;
+   } else {
+   vhcr-out_param = ctx-eq[ctx-eq_ci  
MLX4_MFUNC_EQE_MASK].type |
+ ((u64) ctx-eq[ctx-eq_ci  
MLX4_MFUNC_EQE_MASK].port  8) |
+ ((u64) ctx-eq[ctx-eq_ci  
MLX4_MFUNC_EQE_MASK].param  32);
+   ++ctx-eq_ci;
+   }
+   spin_unlock_irqrestore(ctx-lock, flags);
+   return 0;
+}
+
 static int mlx4_GET_EVENT(struct mlx4_dev *dev, struct mlx4_slave_eqe *eqe)
 {
int ret;
@@ -206,14 +261,26 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct 
mlx4_eq *eq)
case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
-   mlx4_qp_event(dev, be32_to_cpu(eqe-event.qp.qpn)  
0xff,
- eqe-type);
+   if (mlx4_is_master(dev)) {
+   /* TODO: forward only to slave owning the QP 

[PATCH 08/19 V4] mlx4_core: multi-function resource setup

2010-06-10 Thread Yevgeny Petrilin
Only master function needs to configure eq asynch events, and initialize 
resource
allocators.
Only the master function polls for fatal events.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cq.c   |4 +++
 drivers/net/mlx4/eq.c   |   52 +-
 drivers/net/mlx4/intf.c |6 +++-
 drivers/net/mlx4/main.c |   40 ++---
 drivers/net/mlx4/mcg.c  |6 +
 drivers/net/mlx4/mr.c   |6 +
 drivers/net/mlx4/pd.c   |7 -
 drivers/net/mlx4/qp.c   |   28 +-
 drivers/net/mlx4/srq.c  |4 +++
 include/linux/mlx4/device.h |1 +
 10 files changed, 109 insertions(+), 45 deletions(-)

diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index c896f71..89af831 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -352,6 +352,8 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
 
spin_lock_init(cq_table-lock);
INIT_RADIX_TREE(cq_table-tree, GFP_ATOMIC);
+   if (mlx4_is_slave(dev))
+   return 0;
 
err = mlx4_bitmap_init(cq_table-bitmap, dev-caps.num_cqs,
   dev-caps.num_cqs - 1, dev-caps.reserved_cqs, 
0);
@@ -363,6 +365,8 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
 
 void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
 {
+   if (mlx4_is_slave(dev))
+   return;
/* Nothing to do to clean up radix_tree */
mlx4_bitmap_cleanup(mlx4_priv(dev)-cq_table.bitmap);
 }
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index e0cba6f..1cb692d 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -699,6 +699,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
int i;
+   u32 round_eqs = roundup_pow_of_two(dev-caps.num_eqs);
 
priv-eq_table.uar_map = kcalloc(sizeof *priv-eq_table.uar_map,
 mlx4_num_eq_uar(dev), GFP_KERNEL);
@@ -707,33 +708,38 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
goto err_out_free;
}
 
-   err = mlx4_bitmap_init(priv-eq_table.bitmap, dev-caps.num_eqs,
-  dev-caps.num_eqs - 1, dev-caps.reserved_eqs, 
0);
+   err = mlx4_bitmap_init(priv-eq_table.bitmap, round_eqs, round_eqs - 1,
+  dev-caps.reserved_eqs, round_eqs - 
dev-caps.num_eqs);
if (err)
goto err_out_free;
 
for (i = 0; i  mlx4_num_eq_uar(dev); ++i)
priv-eq_table.uar_map[i] = NULL;
 
-   err = mlx4_map_clr_int(dev);
-   if (err)
-   goto err_out_bitmap;
+   if (!mlx4_is_slave(dev)) {
+   err = mlx4_map_clr_int(dev);
+   if (err)
+   goto err_out_bitmap;
 
-   priv-eq_table.clr_mask =
-   swab32(1  (priv-eq_table.inta_pin  31));
-   priv-eq_table.clr_int  = priv-clr_base +
-   (priv-eq_table.inta_pin  32 ? 4 : 0);
+   priv-eq_table.clr_mask =
+   swab32(1  (priv-eq_table.inta_pin  31));
+   priv-eq_table.clr_int  = priv-clr_base +
+   (priv-eq_table.inta_pin  32 ? 4 : 0);
+   }
 
priv-eq_table.irq_names =
kmalloc(MLX4_IRQNAME_SIZE * (dev-caps.num_comp_vectors + 1),
GFP_KERNEL);
if (!priv-eq_table.irq_names) {
err = -ENOMEM;
-   goto err_out_bitmap;
+   i = 0;
+   goto err_out_unmap;
}
 
for (i = 0; i  dev-caps.num_comp_vectors; ++i) {
-   err = mlx4_create_eq(dev, dev-caps.num_cqs + 
MLX4_NUM_SPARE_EQE,
+   err = mlx4_create_eq(dev, dev-caps.num_cqs -
+ dev-caps.reserved_cqs +
+ MLX4_NUM_SPARE_EQE,
 (dev-flags  MLX4_FLAG_MSI_X) ? i : 0,
 priv-eq_table.eq[i]);
if (err) {
@@ -791,11 +797,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
priv-eq_table.have_irq = 1;
}
 
-   err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
- priv-eq_table.eq[dev-caps.num_comp_vectors].eqn);
-   if (err)
-   mlx4_warn(dev, MAP_EQ for async EQ %d failed (%d)\n,
-  priv-eq_table.eq[dev-caps.num_comp_vectors].eqn, 
err);
+   if (!mlx4_is_slave(dev)) { /* hw async events cannot be shared */
+   err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+ 
priv-eq_table.eq[dev-caps.num_comp_vectors].eqn);
+   if (err)
+   mlx4_warn(dev, MAP_EQ for async EQ %d failed (%d)\n,
+  

[PATCH 09/19 V4] mlx4_core: boot sriov

2010-06-10 Thread Yevgeny Petrilin
Add virtual function device ids. Setting VF flag to device private data
Establish comm channel when sriov is enabled, and boot vfs through it.
Each slave gets one msi-X for completions, the master also gets one for
async events.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/eq.c   |5 +-
 drivers/net/mlx4/main.c |  307 +++---
 drivers/net/mlx4/mlx4.h |4 +
 3 files changed, 240 insertions(+), 76 deletions(-)

diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 1cb692d..9126c8e 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -805,13 +805,14 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
   
priv-eq_table.eq[dev-caps.num_comp_vectors].eqn, err);
}
 
-   for (i = 0; i  dev-caps.num_comp_vectors + 1; ++i)
+   for (i = 0; i  dev-caps.num_comp_vectors + !(mlx4_is_slave(dev)); ++i)
eq_set_ci(priv-eq_table.eq[i], 1);
 
return 0;
 
 err_out_async:
-   mlx4_free_eq(dev, priv-eq_table.eq[dev-caps.num_comp_vectors]);
+   if (!mlx4_is_slave(dev))
+   mlx4_free_eq(dev, 
priv-eq_table.eq[dev-caps.num_comp_vectors]);
 
 err_out_comp:
i = dev-caps.num_comp_vectors;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index f67f992..3331c33 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -74,6 +74,23 @@ MODULE_PARM_DESC(msi_x, attempt to use MSI-X if nonzero);
 
 #endif /* CONFIG_PCI_MSI */
 
+#ifdef CONFIG_PCI_IOV
+
+static int sr_iov;
+module_param(sr_iov, int, 0444);
+MODULE_PARM_DESC(sr_iov, enable #sr_iov functions if sr_iov  0);
+
+static int probe_vf;
+module_param(probe_vf, int, 0444);
+MODULE_PARM_DESC(probe_vf, number of vfs to probe by pf driver (sr_iov  0));
+
+#else /* CONFIG_PCI_IOV */
+
+#define sr_iov 0
+#define probe_vf 0
+
+#endif /* CONFIG_PCI_IOV */
+
 static char mlx4_version[] __devinitdata =
DRV_NAME : Mellanox ConnectX core driver v
DRV_VERSION  ( DRV_RELDATE )\n;
@@ -780,12 +797,56 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
mlx4_free_icm(dev, priv-fw.aux_icm, 0);
 }
 
+static void mlx4_slave_exit(struct mlx4_dev *dev)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+
+   down(priv-cmd.poll_sem);
+   if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+   mlx4_warn(dev, Failed to close slave function.\n);
+   up(priv-cmd.poll_sem);
+}
+
 static void mlx4_close_hca(struct mlx4_dev *dev)
 {
-   mlx4_CLOSE_HCA(dev, 0);
-   mlx4_free_icms(dev);
-   mlx4_UNMAP_FA(dev);
-   mlx4_free_icm(dev, mlx4_priv(dev)-fw.fw_icm, 0);
+   if (mlx4_is_slave(dev))
+   mlx4_slave_exit(dev);
+   else {
+   mlx4_CLOSE_HCA(dev, 0);
+   mlx4_free_icms(dev);
+   mlx4_UNMAP_FA(dev);
+   mlx4_free_icm(dev, mlx4_priv(dev)-fw.fw_icm, 0);
+   }
+}
+
+static int mlx4_init_slave(struct mlx4_dev *dev)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   u64 dma = (u64) priv-mfunc.vhcr_dma;
+
+   down(priv-cmd.poll_sem);
+   mlx4_warn(dev, Sending reset\n);
+   if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+   goto err;
+   mlx4_warn(dev, Sending vhcr0\n);
+   if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma  48,
+   MLX4_COMM_TIME))
+   goto err;
+   if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma  32,
+   MLX4_COMM_TIME))
+   goto err;
+   if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma  16,
+   MLX4_COMM_TIME))
+   goto err;
+   if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+   goto err;
+   up(priv-cmd.poll_sem);
+   return 0;
+
+err:
+   mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+   up(priv-cmd.poll_sem);
+   return -EIO;
 }
 
 static int mlx4_init_hca(struct mlx4_dev *dev)
@@ -799,51 +860,65 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
u64 icm_size;
int err;
 
-   err = mlx4_QUERY_FW(dev);
-   if (err) {
-   if (err == -EACCES)
-   mlx4_info(dev, non-primary physical function, 
skipping.\n);
-   else
-   mlx4_err(dev, QUERY_FW command failed, aborting.\n);
-   return err;
-   }
+   if (!mlx4_is_slave(dev)) {
+   err = mlx4_QUERY_FW(dev);
+   if (err) {
+   if (err == -EACCES)
+   mlx4_info(dev, non-primary physical function, 
skipping.\n);
+   else
+   mlx4_err(dev, QUERY_FW command failed, 
aborting.\n);
+   return err;
+   

[PATCH 10/19 V4] mlx4_core: Determine primary physical function

2010-06-10 Thread Yevgeny Petrilin
In multifunctional devices, only the primary function would succeed
to execute QUERY_FW command, all other would fail with _EACCES error.
Ownership on the device can also be claimed by reading a descriptor before
sw reset. If it is 0, no one claimed ownership on the device so far,
otherwise, you are not the owner.
A physical function that is not primary would behave as slave.

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/fw.c   |4 
 drivers/net/mlx4/fw.h   |1 +
 drivers/net/mlx4/main.c |   32 ++--
 drivers/net/mlx4/mlx4.h |2 ++
 drivers/net/mlx4/reset.c|   33 +
 include/linux/mlx4/device.h |2 ++
 6 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index dc0570f..d1427e5 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -309,6 +309,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
dev_cap-max_rdma_global = 1  (field  0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
dev_cap-local_ca_ack_delay = field  0x1f;
+   MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET);
+   dev_cap-pf_num = field;
+   if (dev_cap-pf_num  1)
+   dev-flags |= MLX4_FLAG_MASTER;
MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
dev_cap-num_ports = field  0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index d066c69..a9d7e55 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -64,6 +64,7 @@ struct mlx4_dev_cap {
int max_responder_per_qp;
int max_rdma_global;
int local_ca_ack_delay;
+   int pf_num;
int num_ports;
u32 max_msg_sz;
int ib_mtu[MLX4_MAX_PORTS + 1];
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 3331c33..9dca6f4 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -191,6 +191,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
return -ENODEV;
}
 
+   dev-caps.pf_num = dev_cap-pf_num;
dev-caps.num_ports  = dev_cap-num_ports;
for (i = 1; i = dev-caps.num_ports; ++i) {
dev-caps.vl_cap[i] = dev_cap-max_vl[i];
@@ -1296,6 +1297,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, const 
struct pci_device_id *id)
 
/* We reset the device and enable SRIOV only for physical devices */
if (!mlx4_is_slave(dev)) {
+   /* Claim ownership on the device,
+* if already taken, act as slave*/
+   err = mlx4_get_ownership(dev);
+   if (err) {
+   if (err  0) {
+   goto err_free_dev;
+   } else {
+   err = 0;
+   dev-flags |= MLX4_FLAG_SLAVE;
+   goto slave_start;
+   }
+   }
+
/*
 * Now reset the HCA before we touch the PCI capabilities or
 * attempt a firmware command, since a boot ROM may have left
@@ -1317,6 +1331,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const 
struct pci_device_id *id)
}
}
 
+slave_start:
if (mlx4_cmd_init(dev)) {
mlx4_err(dev, Failed to init command interface, aborting.\n);
goto err_sriov;
@@ -1332,8 +1347,17 @@ static int __mlx4_init_one(struct pci_dev *pdev, const 
struct pci_device_id *id)
}
 
err = mlx4_init_hca(dev);
-   if (err)
-   goto err_cmd;
+   if (err) {
+   if (err == -EACCES) {
+   /* Not primary Physical function
+* Running in slave mode */
+   mlx4_cmd_cleanup(dev);
+   dev-flags |= MLX4_FLAG_SLAVE;
+   dev-flags = ~MLX4_FLAG_MASTER;
+   goto slave_start;
+   } else
+   goto err_cmd;
+   }
 
/* In master functions, the communication channel must be initialized 
after obtaining
 * its address from fw */
@@ -1422,6 +1446,8 @@ err_sriov:
pci_disable_sriov(pdev);
 
 err_free_dev:
+   if (!mlx4_is_slave(dev))
+   mlx4_free_ownership(dev);
kfree(priv);
 
 err_release_regions:
@@ -1490,6 +1516,8 @@ static void mlx4_remove_one(struct pci_dev *pdev)
pci_disable_sriov(pdev);
}
 
+   if (!mlx4_is_slave(dev))
+   mlx4_free_ownership(dev);
kfree(priv);
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/mlx4/mlx4.h 

[PATCH 11/19 V4] mlx4_core: Activating ports according to function number

2010-06-10 Thread Yevgeny Petrilin
In devices with multiple physical functions, each function activates
only one port, according to the function number.
Even functions activate port 1, odd functions activate port2.
For every virtual function we query the FW to which physical function it 
belongs,
as all the functions are served by the master function.

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c  |8 
 drivers/net/mlx4/fw.c   |   36 ++--
 drivers/net/mlx4/fw.h   |1 +
 drivers/net/mlx4/main.c |   19 ---
 drivers/net/mlx4/mlx4.h |3 +++
 drivers/net/mlx4/port.c |9 +
 include/linux/mlx4/cmd.h|1 +
 include/linux/mlx4/device.h |6 +++---
 8 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index a4722e2..b25e40e 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -423,9 +423,11 @@ static int mlx4_RESOURCE_wrapper(struct mlx4_dev *dev, int 
slave, struct mlx4_vh
   struct mlx4_cmd_mailbox 
*inbox,
   struct mlx4_cmd_mailbox 
*outbox)
 {
+   struct mlx4_priv *priv = mlx4_priv(dev);
u32 param1 = *((u32 *) vhcr-in_param);
u32 param2 = *(((u32 *) vhcr-in_param) + 1);
int ret;
+   u8 pf_num = priv-mfunc.master.slave_state[slave].pf_num;
 
 #if 0
char *res[] = {QP, CQ, SRQ, MPT, MTT};
@@ -508,6 +510,7 @@ static int mlx4_RESOURCE_wrapper(struct mlx4_dev *dev, int 
slave, struct mlx4_vh
mlx4_free_mtt_range(dev, param1 /* first */, param2 /* 
order */);
break;
case RES_MAC:
+   vhcr-in_param |= (u64) (pf_num)  48;
switch (vhcr-op) {
case MLX4_CMD_ALLOC_RES:
ret = mlx4_register_mac(dev, vhcr-op_modifier,
@@ -1096,6 +1099,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int 
slave, u8 cmd, u16 para
if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2)
goto reset_slave;
slave_state[slave].vhcr_dma |= param;
+   if (mlx4_QUERY_FUNC(dev, slave, slave_state[slave].pf_num)) {
+   mlx4_err(dev, Failed to determine physical function 
+ number for slave %d\n, slave);
+   goto reset_slave;
+   }
break;
case MLX4_COMM_CMD_VHCR_POST:
if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) 
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index d1427e5..55377c0 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -152,9 +152,13 @@ int mlx4_QUERY_SLAVE_CAP_wrapper(struct mlx4_dev *dev, int 
slave, struct mlx4_vh
 
memcpy(caps, dev-caps, sizeof *caps);
 
+   /* Ports are activated according to physical function number */
+   mlx4_set_port_mask(dev, caps, slave);
+
/* PDs have the same range in every guest; the distinction is in the 
msbs,
 * which contains the guest ID (vf + 1) */
caps-pd_base = slave + 1;
+   caps-function = slave;
 
/* All other resources are allocated by the master, but we still report
 * 'num' and 'reserved' capabilities as follows:
@@ -596,6 +600,7 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
 
 #define QUERY_FW_OUT_SIZE 0x100
 #define QUERY_FW_VER_OFFSET0x00
+#define QUERY_FW_PPF_ID   0x09
 #define QUERY_FW_CMD_IF_REV_OFFSET 0x0a
 #define QUERY_FW_MAX_CMD_OFFSET0x0f
 #define QUERY_FW_ERR_START_OFFSET  0x30
@@ -628,6 +633,9 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
((fw_ver  0xull)  16) |
((fw_ver  0xull)  16);
 
+   MLX4_GET(lg, outbox, QUERY_FW_PPF_ID);
+   dev-caps.function = lg;
+
MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET);
if (cmd_if_rev  MLX4_COMMAND_INTERFACE_MIN_REV ||
cmd_if_rev  MLX4_COMMAND_INTERFACE_MAX_REV) {
@@ -938,7 +946,8 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
mlx4_free_cmd_mailbox(dev, mailbox);
} else {
if (mlx4_is_master(dev))
-   err = mlx4_common_init_port(dev, 0, port);
+   err = mlx4_common_init_port(dev, dev-caps.function,
+   port);
else
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
   MLX4_CMD_TIME_CLASS_A);
@@ -978,7 +987,7 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int 
slave, struct mlx4_vhcr *v
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
 {
if (mlx4_is_master(dev))
-   return mlx4_common_close_port(dev, 0, port);
+   return 

[PATCH 2.6.35 1/3] RDMA/cxgb4: Don't call abort_connection() for active connect failures.

2010-06-10 Thread Steve Wise
Signed-off-by: Steve Wise sw...@opengridcomputing.com
---

 drivers/infiniband/hw/cxgb4/cm.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 30ce0a8..3e15a07 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -969,7 +969,8 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct 
sk_buff *skb)
goto err;
goto out;
 err:
-   abort_connection(ep, skb, GFP_KERNEL);
+   state_set(ep-com, ABORTING);
+   send_abort(ep, skb, GFP_KERNEL);
 out:
connect_reply_upcall(ep, err);
return;

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2.6.35 2/3] RDMA/cxgb4: Support variable sized work requests.

2010-06-10 Thread Steve Wise
T4 EQ entries are in multiples of 64B.  Currently the RDMA SQ and RQ
use fixed sized entries composed of 4 EQ entries for the SQ and 2 EQ
entries for the RQ.  For optimial latency with small IO, we need to
change this so the HW only needs to DMA the EQ entries actually used by
a given work request.

Implementation:

- add wq_pidx counter to track where we are in the EQ.  cidx/pidx are
used for the sw sq/rq tracking and flow control.

- the variable part of work requests is the SGL.  Add new functions to
build the SGL and/or immediate data directly in the EQ memory wrapping
when needed.

- adjust the min burst size for the EQ contexts to 64B.

Signed-off-by: Steve Wise sw...@opengridcomputing.com
---

 drivers/infiniband/hw/cxgb4/qp.c |  220 --
 drivers/infiniband/hw/cxgb4/t4.h |   32 +++---
 2 files changed, 130 insertions(+), 122 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 0c28ed1..7d87fe5 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -162,7 +162,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq 
*wq,
res-u.sqrq.dcaen_to_eqsize = cpu_to_be32(
V_FW_RI_RES_WR_DCAEN(0) |
V_FW_RI_RES_WR_DCACPU(0) |
-   V_FW_RI_RES_WR_FBMIN(3) |
+   V_FW_RI_RES_WR_FBMIN(2) |
V_FW_RI_RES_WR_FBMAX(3) |
V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -185,7 +185,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq 
*wq,
res-u.sqrq.dcaen_to_eqsize = cpu_to_be32(
V_FW_RI_RES_WR_DCAEN(0) |
V_FW_RI_RES_WR_DCACPU(0) |
-   V_FW_RI_RES_WR_FBMIN(3) |
+   V_FW_RI_RES_WR_FBMIN(2) |
V_FW_RI_RES_WR_FBMAX(3) |
V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -235,12 +235,78 @@ err1:
return -ENOMEM;
 }
 
-static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
+ struct ib_send_wr *wr, int max, u32 *plenp)
 {
+   u8 *dstp, *srcp;
+   u32 plen = 0;
int i;
+   int rem, len;
+
+   dstp = (u8 *)immdp-data;
+   for (i = 0; i  wr-num_sge; i++) {
+   if ((plen + wr-sg_list[i].length)  max)
+   return -EMSGSIZE;
+   srcp = (u8 *)(unsigned long)wr-sg_list[i].addr;
+   plen += wr-sg_list[i].length;
+   rem = wr-sg_list[i].length;
+   while (rem) {
+   if (dstp == (u8 *)sq-queue[sq-size])
+   dstp = (u8 *)sq-queue;
+   if (rem = (u8 *)sq-queue[sq-size] - dstp)
+   len = rem;
+   else
+   len = (u8 *)sq-queue[sq-size] - dstp;
+   memcpy(dstp, srcp, len);
+   dstp += len;
+   srcp += len;
+   rem -= len;
+   }
+   }
+   immdp-op = FW_RI_DATA_IMMD;
+   immdp-r1 = 0;
+   immdp-r2 = 0;
+   immdp-immdlen = cpu_to_be32(plen);
+   *plenp = plen;
+   return 0;
+}
+
+static int build_isgl(__be64 *queue_start, __be64 *queue_end,
+ struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
+ int num_sge, u32 *plenp)
+
+{
+   int i;
+   u32 plen = 0;
+   __be64 *flitp = (__be64 *)isglp-sge;
+
+   for (i = 0; i  num_sge; i++) {
+   if ((plen + sg_list[i].length)  plen)
+   return -EMSGSIZE;
+   plen += sg_list[i].length;
+   *flitp = cpu_to_be64(((u64)sg_list[i].lkey  32) |
+sg_list[i].length);
+   if (++flitp == queue_end)
+   flitp = queue_start;
+   *flitp = cpu_to_be64(sg_list[i].addr);
+   if (++flitp == queue_end)
+   flitp = queue_start;
+   }
+   isglp-op = FW_RI_DATA_ISGL;
+   isglp-r1 = 0;
+   isglp-nsge = cpu_to_be16(num_sge);
+   isglp-r2 = 0;
+   if (plenp)
+   *plenp = plen;
+   return 0;
+}
+
+static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
+  struct ib_send_wr *wr, u8 *len16)
+{
u32 plen;
int size;
-   u8 *datap;
+   int ret;
 
if (wr-num_sge  T4_MAX_SEND_SGE)
return -EINVAL;
@@ -267,43 +333,23 @@ static int build_rdma_send(union t4_wr *wqe, struct 
ib_send_wr *wr, u8 *len16)
default:
return -EINVAL;
}
+
plen = 0;
if (wr-num_sge) {
if (wr-send_flags  IB_SEND_INLINE) {
-   datap = (u8 *)wqe-send.u.immd_src[0].data;
-   for 

[PATCH 2.6.35 3/3] RDMA/cxgb4: Avoid false GTS CIDX_INC overflows.

2010-06-10 Thread Steve Wise
The T4 IQ hw design assumes CIDX_INC credits will be returned on a regular
basis and always before the CIDX counter crosses over the PIDX counter.
For RDMA CQs, however, returning CIDX_INC credits is only needed and
desired when and if the CQ is armed for notification.  This can lead
to a GTS write returning credits that causes the HW to reject the
credit update because it causes CIDX to pass PIDX.  Once this happens,
the CIDX/PIDX counters get out of whack and an application can miss a
notification and get stuck blocked awaiting a notification.

To avoid this,  we allocate the HW IQ 2x times the requested size.
This seems to avoid the false overflow failures.  If we see more issues
with this, then we'll have to add code in the poll path to return credits
periodically like when the amount reaches 1/2 the queue depth).  I would
like to avoid this as it adds a PCI write transaction for applications
that never arm the CQ (like most MPIs).

Signed-off-by: Steve Wise sw...@opengridcomputing.com
---

 drivers/infiniband/hw/cxgb4/cq.c |   25 -
 1 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 2447f52..4311501 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -764,7 +764,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int 
entries,
struct c4iw_create_cq_resp uresp;
struct c4iw_ucontext *ucontext = NULL;
int ret;
-   size_t memsize;
+   size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
 
PDBG(%s ib_dev %p entries %d\n, __func__, ibdev, entries);
@@ -788,14 +788,29 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int 
entries,
 * entries must be multiple of 16 for HW.
 */
entries = roundup(entries, 16);
-   memsize = entries * sizeof *chp-cq.queue;
+
+   /*
+* Make actual HW queue 2x to avoid cdix_inc overflows.
+*/
+   hwentries = entries * 2;
+
+   /*
+* Make HW queue at least 64 entries so GTS updates aren't too
+* frequent.
+*/
+   if (hwentries  64)
+   hwentries = 64;
+
+   memsize = hwentries * sizeof *chp-cq.queue;
 
/*
 * memsize must be a multiple of the page size if its a user cq.
 */
-   if (ucontext)
+   if (ucontext) {
memsize = roundup(memsize, PAGE_SIZE);
-   chp-cq.size = entries;
+   hwentries = memsize / sizeof *chp-cq.queue;
+   }
+   chp-cq.size = hwentries;
chp-cq.memsize = memsize;
 
ret = create_cq(rhp-rdev, chp-cq,
@@ -805,7 +820,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int 
entries,
 
chp-rhp = rhp;
chp-cq.size--; /* status page */
-   chp-ibcq.cqe = chp-cq.size - 1;
+   chp-ibcq.cqe = entries - 2;
spin_lock_init(chp-lock);
atomic_set(chp-refcnt, 1);
init_waitqueue_head(chp-wait);

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/19 V4] mlx4_core: slave multicast support

2010-06-10 Thread Yevgeny Petrilin
Multicast table processing requires multiple related commands.
To keep things simple, low-level multicast handling is done only by the master;
a new virtual command is added to allow slaves to attach/detach QPs to mulitcast
groups at a higher abstraction level.
The multicast attachment mechanism is used both by IB and Ethernet,
so we need to specify for each multicast address (whether it is gid or mac)
its protocol.
For ethernet addresses, their VEP number should be specified. This field is
set according device capabilities. Search and hash calculation is also done
according to this field.
A Ethernet ports now need to register to the multicast groups, we can not longer
use the default multicast queue per port because of the multiple clients per 
port.

Signed-off-by: Liran Liss lir...@mellanox.co.il
Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/infiniband/hw/mlx4/main.c |6 +-
 drivers/net/mlx4/cmd.c|8 +++
 drivers/net/mlx4/en_netdev.c  |   42 -
 drivers/net/mlx4/en_port.c|4 +-
 drivers/net/mlx4/en_port.h|5 ++
 drivers/net/mlx4/fw.c |4 +
 drivers/net/mlx4/fw.h |2 +
 drivers/net/mlx4/main.c   |2 +
 drivers/net/mlx4/mcg.c|  127 +++-
 drivers/net/mlx4/mlx4.h   |4 +
 drivers/net/mlx4/port.c   |4 +-
 include/linux/mlx4/cmd.h  |1 +
 include/linux/mlx4/device.h   |   14 -
 13 files changed, 196 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 4e94e36..2c28f98 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -452,13 +452,15 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union 
ib_gid *gid, u16 lid)
return mlx4_multicast_attach(to_mdev(ibqp-device)-dev,
 to_mqp(ibqp)-mqp, gid-raw,
 !!(to_mqp(ibqp)-flags 
-   MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
+   MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+MLX4_PROT_IB_IPV6);
 }
 
 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
return mlx4_multicast_detach(to_mdev(ibqp-device)-dev,
-to_mqp(ibqp)-mqp, gid-raw);
+to_mqp(ibqp)-mqp, gid-raw,
+MLX4_PROT_IB_IPV6);
 }
 
 static int init_node_data(struct mlx4_ib_dev *dev)
diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index b25e40e..0f45fde 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -903,6 +903,14 @@ static struct mlx4_cmd_info {
 
/* Native multicast commands are not available for guests */
{
+   .opcode = MLX4_CMD_MCAST_ATTACH,
+   .has_inbox = true,
+   .has_outbox = false,
+   .out_is_imm = false,
+   .verify = NULL,
+   .wrapper = mlx4_MCAST_wrapper
+   },
+   {
.opcode = MLX4_CMD_DIAG_RPRT,
.has_inbox = false,
.has_outbox = true,
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 7389fa2..5ebe135 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -173,6 +173,7 @@ static void mlx4_en_cache_mclist(struct net_device *dev)
int mc_addrs_cnt = netdev_mc_count(dev);
int i;
 
+   mlx4_en_clear_list(dev);
mc_addrs = kmalloc(mc_addrs_cnt * ETH_ALEN, GFP_ATOMIC);
if (!mc_addrs) {
en_err(priv, failed to allocate multicast list\n);
@@ -203,6 +204,7 @@ static void mlx4_en_do_set_multicast(struct work_struct 
*work)
struct mlx4_en_dev *mdev = priv-mdev;
struct net_device *dev = priv-dev;
u64 mcast_addr = 0;
+   u8 mc_list[16] = {0};
int err;
 
mutex_lock(mdev-state_lock);
@@ -284,6 +286,14 @@ static void mlx4_en_do_set_multicast(struct work_struct 
*work)
if (err)
en_err(priv, Failed disabling multicast filter\n);
 
+   /* Detach our qp from all the multicast addresses */
+   for (i = 0; i  priv-mc_addrs_cnt; i++) {
+   memcpy(mc_list[10], priv-mc_addrs + i * ETH_ALEN,
+  ETH_ALEN);
+   mc_list[7] = (priv-port - 1)  4;
+   mlx4_multicast_detach(mdev-dev, 
priv-rss_map.indir_qp,
+ mc_list, MLX4_PROT_ETH);
+   }
/* Flush mcast filter and init it with broadcast address */
mlx4_SET_MCAST_FLTR(mdev-dev, priv-port, ETH_BCAST,
1, MLX4_MCAST_CONFIG);
@@ -294,6 +304,11 @@ static void 

Re: InfiniBand/RDMA merge plans for 2.6.35

2010-06-10 Thread Andy Grover

On 06/09/2010 11:08 AM, Roland Dreier wrote:

Please also pick up the 3-patch set Least attached vector support
from Yevgeny on 2010-5-13? RDS changes depend on these.

It's now post -rc2, so these obviously wait for 2.6.36 at best.

However, I haven't replied to these patches in detail but in general I
don't like this approach of pick a random vector since it is
non-deterministic and not likely to end up with an optimal result.


What is the optimal way to do this, if it isn't to spread CQs evenly 
across all available vectors? (least attached vector != random.)


I guess we'll just round-robin modulo caps.num_comp_vectors for now, but 
I do think this should be up to the hca, not the ulp, since the ULP has 
no visibility into other ulp's usage of vectors.


Regards -- Andy
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: converting ib-mgmt scripts to executables

2010-06-10 Thread Hefty, Sean
 As for ibchecknet, under what use case do you see people running it?  Could
 ibnetdiscover or iblinkinfo provide the same functionality?

I'll check on how it is being used and whether the other calls would work just 
as well.

Thanks,
- Sean
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: converting ib-mgmt scripts to executables

2010-06-10 Thread Ira Weiny
On Thu, 10 Jun 2010 09:04:01 -0700
Hefty, Sean sean.he...@intel.com wrote:

 Sasha,
 
 Before we do the work, would there be any issue converting a couple of the 
 ib-diag scripts to executables?  Specifically, we'd like to have ibchecknet 
 and ibcheckerrors functionality available on Windows.
 

Here at LLNL we were thinking it would be best to start removing some of the
scripts to reduce the confusion about what tools do.  For example, there is
some confusion among our users as to what ibcheckerrs, ibcheckerrors, and
ibqueryerrors does?

AFAICT ibcheckerrors and ibqueryerrors only differ in 3 respects.

   1) the formated output is different
   2) ibcheckerrors calls ibcheckerrs which ignores error counts which are
  below a threshold either hard coded or specified by a file.
   3) ibcheckerrs defaults to use AllPortSelect which may result in a faster
  scan. [*]
   
  [*] I have a patch for ibqueryerrors which querys AllPortSelect first
  and only issues individual queries if it sees errors.  However, frankly
  it did not seem to speed up the scan on our large clusters so I don't
  know if this is a big difference between ibcheckerrors and
  ibqueryerrors.)

2 is very easy to add to ibqueryerrors, I don't think 1 would be that hard,
and 3 is basically done.

As for ibchecknet, under what use case do you see people running it?  Could
ibnetdiscover or iblinkinfo provide the same functionality?

Ira

 - Sean
 --
 To unsubscribe from this list: send the line unsubscribe linux-rdma in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://*vger.kernel.org/majordomo-info.html
 
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/19 V4] mlx4_core: Giving Mac addresses for slave functions.

2010-06-10 Thread Yevgeny Petrilin
For physical slaves, Mac address is retreived from static configuration.
For virtual slaves get random Macs.

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/fw.c |   64 +
 drivers/net/mlx4/fw.h |6 
 2 files changed, 70 insertions(+), 0 deletions(-)

diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index e53a392..773de63 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -32,6 +32,7 @@
  * SOFTWARE.
  */
 
+#include linux/etherdevice.h
 #include linux/mlx4/cmd.h
 #include linux/cache.h
 
@@ -136,6 +137,45 @@ int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct 
mlx4_mod_stat_cfg *cfg)
return err;
 }
 
+int mlx4_QUERY_VEP_CFG(struct mlx4_dev *dev, u8 vep_num,
+  struct mlx4_vep_cfg *cfg)
+{
+   int err;
+   u32 in_mod;
+   u64 output;
+
+#define QUERY_VEP_CFG_OPMOD3
+
+#define QUERY_VEP_CFG_INMOD(2  28)
+#define QUERY_VEP_CFG_INMOD_VEP_OFFSET 16
+
+#define QUERY_VEP_CFG_MAC_OFFSET   0x90
+#define QUERY_VEP_CFG_LINK_OFFSET  0xa0
+
+
+   in_mod = QUERY_VEP_CFG_INMOD | (vep_num  
QUERY_VEP_CFG_INMOD_VEP_OFFSET);
+
+   err = mlx4_cmd_imm(dev, 0, output, in_mod | QUERY_VEP_CFG_MAC_OFFSET,
+  QUERY_VEP_CFG_OPMOD, MLX4_CMD_MOD_STAT_CFG,
+  MLX4_CMD_TIME_CLASS_A);
+   if (err) {
+   mlx4_err(dev, Failed to retrieve mac for function %d\n, 
vep_num);
+   return err;
+   }
+   cfg-mac = output  0xULL;
+
+   err = mlx4_cmd_imm(dev, 0, output, in_mod | QUERY_VEP_CFG_LINK_OFFSET,
+  QUERY_VEP_CFG_OPMOD, MLX4_CMD_MOD_STAT_CFG,
+  MLX4_CMD_TIME_CLASS_A);
+   if (err) {
+   mlx4_err(dev, Failed to retrieve link for function %d\n, 
vep_num);
+   return err;
+   }
+   cfg-link = (output  32)  1;
+
+   return 0;
+}
+
 int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr 
*vhcr,
  struct 
mlx4_cmd_mailbox *inbox,
  struct 
mlx4_cmd_mailbox *outbox)
@@ -148,10 +188,34 @@ int mlx4_QUERY_SLAVE_CAP_wrapper(struct mlx4_dev *dev, 
int slave, struct mlx4_vh
   struct mlx4_cmd_mailbox 
*inbox,
   struct mlx4_cmd_mailbox 
*outbox)
 {
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   struct mlx4_mfunc_master_ctx *master = priv-mfunc.master;
+   struct mlx4_slave_state *slave_st = master-slave_state[slave];
struct mlx4_caps *caps = outbox-buf;
+   struct mlx4_vep_cfg cfg;
+   u8 pf_num = slave_st-pf_num;
+   u8 rand_mac[6];
+   int i, j, err = 0;
 
memcpy(caps, dev-caps, sizeof *caps);
 
+   /* For physical functions Mac should be defined by fw */
+   if (pf_num == slave) {
+   err = mlx4_QUERY_VEP_CFG(dev, pf_num, cfg);
+   if (err)
+   mlx4_warn(dev, Failed to retreive mac address for vep 
%d\n, pf_num);
+   else
+   caps-def_mac[(pf_num  1) + 1] = cfg.mac;
+   }
+   if (pf_num != slave || err) {
+   for (i = 1; i = dev-caps.num_ports; ++i) {
+   random_ether_addr(rand_mac);
+   caps-def_mac[i] = 0;
+   for (j = 0; j  ETH_ALEN; j++)
+   caps-def_mac[i] |= ((u64)(rand_mac[1])  8 * 
j);
+   }
+   }
+
/* Ports are activated according to physical function number */
mlx4_set_port_mask(dev, caps, slave);
 
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index d5c17cf..f8d49d0 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -43,6 +43,11 @@ struct mlx4_mod_stat_cfg {
u8 log_pg_sz_m;
 };
 
+struct mlx4_vep_cfg {
+   u64 mac;
+   u8  link;
+};
+
 struct mlx4_dev_cap {
int max_srq_sz;
int max_qp_sz;
@@ -180,6 +185,7 @@ int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm 
*icm);
 int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
 int mlx4_NOP(struct mlx4_dev *dev);
 int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg);
+int mlx4_QUERY_VEP_CFG(struct mlx4_dev *dev, u8 vep_num, struct mlx4_vep_cfg 
*cfg);
 int mlx4_QUERY_FUNC(struct mlx4_dev *dev, int func, u8 *pf_num);
 
 #endif /* MLX4_FW_H */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/19 V4] mlx4_core: Managing common port configuration by master function

2010-06-10 Thread Yevgeny Petrilin
The Multicast filter configuration is done by the master,
that manages the filter which is common for all the functions.
The master holds a list of multicast addresses for all the
slaves, and adds them to the filter.
In case some slave wishes to flush the filter, only his addresses
are removed.
The VLAN filter is a bitwise OR of all the VLAN filters for all functions,
the result is a false-positive filter.
All port configuration is moved to the mlx4_core module

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c  |   41 +--
 drivers/net/mlx4/en_port.c  |  105 --
 drivers/net/mlx4/en_port.h  |   47 --
 drivers/net/mlx4/mlx4.h |   63 +
 drivers/net/mlx4/mlx4_en.h  |1 -
 drivers/net/mlx4/port.c |  326 +--
 include/linux/mlx4/device.h |6 +
 7 files changed, 416 insertions(+), 173 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 0f45fde..4cfa407 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -926,7 +926,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL,
-   .wrapper = NULL /* need wrapper*/
+   .wrapper = mlx4_SET_VLAN_FLTR_wrapper
},
{
.opcode = MLX4_CMD_SET_MCAST_FLTR,
@@ -934,7 +934,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL,
-   .wrapper = NULL /* need wrapper*/
+   .wrapper = mlx4_SET_MCAST_FLTR_wrapper
},
{
.opcode = MLX4_CMD_DUMP_ETH_STATS,
@@ -1170,7 +1170,8 @@ static void mlx4_master_poll_comm(struct work_struct 
*work)
 int mlx4_multi_func_init(struct mlx4_dev *dev)
 {
struct mlx4_priv *priv = mlx4_priv(dev);
-   int i;
+   struct mlx4_slave_state *s_state;
+   int i, port;
 
priv-mfunc.vhcr = dma_alloc_coherent((dev-pdev-dev), PAGE_SIZE,
priv-mfunc.vhcr_dma,
@@ -1202,16 +1203,27 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
goto err_comm;
 
for (i = 0; i  dev-num_slaves; ++i) {
-   priv-mfunc.master.slave_state[i].last_cmd = 
MLX4_COMM_CMD_RESET;
-   spin_lock_init(priv-mfunc.master.slave_state[i].lock);
+   s_state = priv-mfunc.master.slave_state[i];
+   s_state-last_cmd = MLX4_COMM_CMD_RESET;
+   for (port = 1; port = MLX4_MAX_PORTS; port++) {
+   s_state-vlan_filter[port] =
+   kzalloc(sizeof(struct mlx4_vlan_fltr),
+   GFP_KERNEL);
+   if (!s_state-vlan_filter[port]) {
+   if (--port)
+   
kfree(s_state-vlan_filter[port]);
+   goto err_slaves;
+   }
+   INIT_LIST_HEAD(s_state-mcast_filters[port]);
+   }
+   spin_lock_init(s_state-lock);
}
 
INIT_DELAYED_WORK(priv-mfunc.comm_work, 
mlx4_master_poll_comm);
priv-mfunc.comm_wq = 
create_singlethread_workqueue(mlx4_comm);
-   if (!priv-mfunc.comm_wq) {
-   kfree(priv-mfunc.master.slave_state);
-   goto err_comm;
-   }
+   if (!priv-mfunc.comm_wq)
+   goto err_slaves;
+
} else {
priv-cmd.comm_toggle = 0;
INIT_DELAYED_WORK(priv-mfunc.comm_work, 
mlx4_slave_async_eq_poll);
@@ -1221,6 +1233,12 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
}
return 0;
 
+err_slaves:
+   while (--i) {
+   for (port = 1; port = MLX4_MAX_PORTS; port++)
+   
kfree(priv-mfunc.master.slave_state[i].vlan_filter[port]);
+   }
+   kfree(priv-mfunc.master.slave_state);
 err_comm:
iounmap(priv-mfunc.comm);
 err_vhcr:
@@ -1269,9 +1287,14 @@ err_hcr:
 void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
 {
struct mlx4_priv *priv = mlx4_priv(dev);
+   int i, port;
 
if (priv-mfunc.vhcr) {
destroy_workqueue(priv-mfunc.comm_wq);
+   for (i = 0; i  dev-num_slaves; i++) {
+   for (port = 1; port = MLX4_MAX_PORTS; port++)
+   
kfree(priv-mfunc.master.slave_state[i].vlan_filter[port]);
+   }
kfree(priv-mfunc.master.slave_state);
iounmap(priv-mfunc.comm);
dma_free_coherent((dev-pdev-dev), PAGE_SIZE,
diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c
index 

[PATCH 16/19 V4] mlx4_en: querying link state

2010-06-10 Thread Yevgeny Petrilin
In multifunction device, a certain function can initialize its port after some
other function already done that. In that case link event would not be 
generated.
Need to query the port to retrieve the port state.

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/en_netdev.c |   10 ++
 drivers/net/mlx4/en_port.c   |   32 
 drivers/net/mlx4/en_port.h   |   13 +
 drivers/net/mlx4/mlx4_en.h   |8 
 4 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 5ebe135..d171945 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -219,6 +219,16 @@ static void mlx4_en_do_set_multicast(struct work_struct 
*work)
goto out;
}
 
+   if (!netif_carrier_ok(dev)) {
+   if (!mlx4_en_QUERY_PORT(mdev, priv-port)) {
+   if (priv-port_state.link_state) {
+   priv-last_link_state = MLX4_DEV_EVENT_PORT_UP;
+   netif_carrier_on(dev);
+   en_dbg(LINK, priv, Link Up\n);
+   }
+   }
+   }
+
/*
 * Promsicuous mode: disable all filters
 */
diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c
index 2863a30..84cc32d 100644
--- a/drivers/net/mlx4/en_port.c
+++ b/drivers/net/mlx4/en_port.c
@@ -40,6 +40,38 @@
 #include en_port.h
 #include mlx4_en.h
 
+int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
+{
+   struct mlx4_en_query_port_context *qport_context;
+   struct mlx4_en_priv *priv = netdev_priv(mdev-pndev[port]);
+   struct mlx4_en_port_state *state = priv-port_state;
+   struct mlx4_cmd_mailbox *mailbox;
+   int err;
+
+   mailbox = mlx4_alloc_cmd_mailbox(mdev-dev);
+   if (IS_ERR(mailbox))
+   return PTR_ERR(mailbox);
+   memset(mailbox-buf, 0, sizeof(*qport_context));
+   err = mlx4_cmd_box(mdev-dev, 0, mailbox-dma, port, 0,
+  MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B);
+   if (err)
+   goto out;
+   qport_context = mailbox-buf;
+
+   /* This command is always accessed from Ethtool context
+* already synchronized, no need in locking */
+   state-link_state = !!(qport_context-link_up  MLX4_EN_LINK_UP_MASK);
+   if ((qport_context-link_speed  MLX4_EN_SPEED_MASK) ==
+   MLX4_EN_1G_SPEED)
+   state-link_speed = 1000;
+   else
+   state-link_speed = 1;
+   state-transciver = qport_context-transceiver;
+
+out:
+   mlx4_free_cmd_mailbox(mdev-dev, mailbox);
+   return err;
+}
 
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 {
diff --git a/drivers/net/mlx4/en_port.h b/drivers/net/mlx4/en_port.h
index 40918ab..ecbab85 100644
--- a/drivers/net/mlx4/en_port.h
+++ b/drivers/net/mlx4/en_port.h
@@ -41,6 +41,19 @@ enum {
MLX4_MCAST_ENABLE   = 2,
 };
 
+struct mlx4_en_query_port_context {
+   u8 link_up;
+#define MLX4_EN_LINK_UP_MASK   0x80
+   u8 reserved;
+   __be16 mtu;
+   u8 reserved2;
+   u8 link_speed;
+#define MLX4_EN_SPEED_MASK 0x3
+#define MLX4_EN_1G_SPEED   0x2
+   u16 reserved3[5];
+   __be64 mac;
+   u8 transceiver;
+};
 
 struct mlx4_en_stat_out_mbox {
/* Received frames with a length of 64 octets */
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 2204ec3..e0ce6c5 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -389,6 +389,12 @@ struct mlx4_en_rss_context {
__be32 rss_key[10];
 };
 
+struct mlx4_en_port_state {
+   int link_state;
+   int link_speed;
+   int transciver;
+};
+
 struct mlx4_en_pkt_stats {
unsigned long broadcast;
unsigned long rx_prio[8];
@@ -437,6 +443,7 @@ struct mlx4_en_priv {
struct vlan_group *vlgrp;
struct net_device_stats stats;
struct net_device_stats ret_stats;
+   struct mlx4_en_port_state port_state;
spinlock_t stats_lock;
 
unsigned long last_moder_packets;
@@ -562,6 +569,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, 
u32 base_qpn,
   u8 promisc);
 
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
+int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
 
 /*
  * Globals
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/19 V4] mlx4_core: setting MGM entry size to 512

2010-06-10 Thread Yevgeny Petrilin
Both Unicast and multicast addresses are using the same table

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/mlx4.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 8530032..7a7f787 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -64,7 +64,7 @@ enum {
 };
 
 enum {
-   MLX4_MGM_ENTRY_SIZE =  0x100,
+   MLX4_MGM_ENTRY_SIZE =  0x200,
MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2),
MLX4_MTT_ENTRY_PER_SEG  = 8
 };
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/19 V4] mlx4: Communication channel interrupts

2010-06-10 Thread Yevgeny Petrilin
The master function receives am interrupt each time on eof the slaves writes
to the communication channel, and then handles the channel from a deffered task.
The slaves can now receive command completions an async events by interrupts.
The open an EQ and the master generates eqes and pushes them to the slave's
EQ using Firmware GEN_EQE command.

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c  |  258 +++---
 drivers/net/mlx4/eq.c   |  296 +++
 drivers/net/mlx4/main.c |   50 +++-
 drivers/net/mlx4/mlx4.h |   97 +--
 include/linux/mlx4/cmd.h|7 +-
 include/linux/mlx4/device.h |1 +
 6 files changed, 459 insertions(+), 250 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 7efa85f..083ae0f 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -148,19 +148,11 @@ static int comm_pending(struct mlx4_dev *dev)
return (swab32(status)  30) != priv-cmd.comm_toggle;
 }
 
-int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long 
timeout)
+static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
 {
struct mlx4_priv *priv = mlx4_priv(dev);
-   unsigned long end;
u32 val;
 
-   /* First, verify that the master reports correct status */
-   if (comm_pending(dev)) {
-   mlx4_warn(dev, Communication channel is not idle\n);
-   return -EAGAIN;
-   }
-
-   /* Write command */
if (cmd == MLX4_COMM_CMD_RESET)
priv-cmd.comm_toggle = 0;
else if (++priv-cmd.comm_toggle  2)
@@ -168,6 +160,23 @@ int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, 
unsigned long timeout
val = param | (cmd  16) | (priv-cmd.comm_toggle  30);
__raw_writel((__force u32) cpu_to_be32(val), 
priv-mfunc.comm-slave_write);
wmb();
+}
+
+int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long 
timeout)
+{
+   struct mlx4_priv *priv = mlx4_priv(dev);
+   unsigned long end;
+   int err = 0;
+
+   /* First, verify that the master reports correct status */
+   if (comm_pending(dev)) {
+   mlx4_warn(dev, Communication channel is not idle\n);
+   return -EAGAIN;
+   }
+
+   /* Write command */
+   down(priv-cmd.poll_sem);
+   mlx4_comm_cmd_post(dev, cmd, param);
 
end = msecs_to_jiffies(timeout) + jiffies;
while (comm_pending(dev)  time_before(jiffies, end))
@@ -175,11 +184,57 @@ int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 
param, unsigned long timeout
 
if (comm_pending(dev)) {
mlx4_warn(dev, Communication channel timed out\n);
-   return -ETIMEDOUT;
+   err = -ETIMEDOUT;
}
+
+   up(priv-cmd.poll_sem);
return 0;
 }
 
+static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op,
+ u16 param, unsigned long timeout)
+{
+   struct mlx4_cmd *cmd = mlx4_priv(dev)-cmd;
+   struct mlx4_cmd_context *context;
+   int err = 0;
+
+   down(cmd-event_sem);
+
+   spin_lock(cmd-context_lock);
+   BUG_ON(cmd-free_head  0);
+   context = cmd-context[cmd-free_head];
+   context-token += cmd-token_mask + 1;
+   cmd-free_head = context-next;
+   spin_unlock(cmd-context_lock);
+
+   init_completion(context-done);
+
+   mlx4_comm_cmd_post(dev, op, param);
+
+   if (!wait_for_completion_timeout(context-done, 
msecs_to_jiffies(timeout))) {
+   err = -EBUSY;
+   goto out;
+   }
+
+   err = context-result;
+
+out:
+   spin_lock(cmd-context_lock);
+   context-next = cmd-free_head;
+   cmd-free_head = context - cmd-context;
+   spin_unlock(cmd-context_lock);
+
+   up(cmd-event_sem);
+   return err;
+}
+
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long 
timeout)
+{
+   if (mlx4_priv(dev)-cmd.use_events)
+   return mlx4_comm_cmd_wait(dev, cmd, param, timeout);
+   return mlx4_comm_cmd_poll(dev, cmd, param, timeout);
+}
+
 static int cmd_pending(struct mlx4_dev *dev)
 {
u32 status = readl(mlx4_priv(dev)-cmd.hcr + HCR_STATUS_OFFSET);
@@ -247,15 +302,15 @@ out:
return ret;
 }
 
-static int mlx4_slave_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 
*out_param,
-int out_is_imm, u32 in_modifier, u8 op_modifier,
-u16 op, unsigned long timeout)
+static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+ int out_is_imm, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
 {
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_vhcr *vhcr = priv-mfunc.vhcr;
int ret;
 
-   down(priv-cmd.poll_sem);
+   down(priv-cmd.slave_sem);
   

Re: [PATCH 2.6.35 2/3] RDMA/cxgb4: Support variable sized work requests.

2010-06-10 Thread Roland Dreier
  T4 EQ entries are in multiples of 64B.  Currently the RDMA SQ and RQ
  use fixed sized entries composed of 4 EQ entries for the SQ and 2 EQ
  entries for the RQ.  For optimial latency with small IO, we need to
  change this so the HW only needs to DMA the EQ entries actually used by
  a given work request.

This seems not to be a fix, just an optimization -- so at this point for
2.6.36 I think.  Or am I wrong?
-- 
Roland Dreier rola...@cisco.com || For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2.6.35 2/3] RDMA/cxgb4: Support variable sized work requests.

2010-06-10 Thread Roland Dreier
Linus has been being a hard-ass this cycle about quieting things down
post -rc2.  So I'll hold off.
-- 
Roland Dreier rola...@cisco.com || For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/19 V4] mlx4: Using mcg tables for Ethernet Unicast steering.

2010-06-10 Thread Yevgeny Petrilin
When there are multiple interfaces on the same physical port, the old steering
model (Mac steering) would not work. The reason is in the old model there could 
be
only one promiscuous QP per port.
With the new mechanism each interface can have promisc entries both for unicast 
and
multicast. A promisc QP is registered to all entries that belong to the same 
port and
also for the default entry.
In fw where this feature is not supported, using the Mac table steering.

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c   |   10 +-
 drivers/net/mlx4/en_netdev.c |   55 -
 drivers/net/mlx4/fw.c|3 +
 drivers/net/mlx4/main.c  |   67 +-
 drivers/net/mlx4/mcg.c   |  611 ++
 drivers/net/mlx4/mlx4.h  |   30 ++
 drivers/net/mlx4/mlx4_en.h   |1 +
 drivers/net/mlx4/port.c  |   91 ++-
 include/linux/mlx4/cmd.h |1 +
 include/linux/mlx4/device.h  |   12 +-
 10 files changed, 821 insertions(+), 60 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 660d001..7efa85f 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -514,7 +514,7 @@ static int mlx4_RESOURCE_wrapper(struct mlx4_dev *dev, int 
slave, struct mlx4_vh
switch (vhcr-op) {
case MLX4_CMD_ALLOC_RES:
ret = mlx4_register_mac(dev, vhcr-op_modifier,
-   vhcr-in_param, (int *) 
vhcr-out_param);
+   vhcr-in_param, (int *) 
vhcr-out_param, 1);
vhcr-errno = ret;
break;
case MLX4_CMD_FREE_RES:
@@ -937,6 +937,14 @@ static struct mlx4_cmd_info {
.wrapper = mlx4_MCAST_wrapper
},
{
+   .opcode = MLX4_CMD_PROMISC,
+   .has_inbox = false,
+   .has_outbox = false,
+   .out_is_imm = false,
+   .verify = NULL,
+   .wrapper = mlx4_PROMISC_wrapper
+   },
+   {
.opcode = MLX4_CMD_DIAG_RPRT,
.has_inbox = false,
.has_outbox = true,
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index d171945..6dd47e8 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -240,8 +240,12 @@ static void mlx4_en_do_set_multicast(struct work_struct 
*work)
priv-flags |= MLX4_EN_FLAG_PROMISC;
 
/* Enable promiscouos mode */
-   err = mlx4_SET_PORT_qpn_calc(mdev-dev, priv-port,
-priv-base_qpn, 1);
+   if (!mdev-dev-caps.vep_uc_steering)
+   err = mlx4_SET_PORT_qpn_calc(mdev-dev, 
priv-port,
+priv-base_qpn, 1);
+   else
+   err = mlx4_unicast_promisc_add(mdev-dev, 
priv-base_qpn,
+  priv-port - 1);
if (err)
en_err(priv, Failed enabling 
 promiscous mode\n);
@@ -253,6 +257,15 @@ static void mlx4_en_do_set_multicast(struct work_struct 
*work)
en_err(priv, Failed disabling 
 multicast filter\n);
 
+   /* Add the default qp number as multicast promisc */
+   if (!(priv-flags  MLX4_EN_FLAG_MC_PROMISC)) {
+   err = mlx4_multicast_promisc_add(mdev-dev, 
priv-base_qpn,
+priv-port - 
1);
+   if (err)
+   en_err(priv, Failed entering multicast 
promisc mode\n);
+   priv-flags |= MLX4_EN_FLAG_MC_PROMISC;
+   }
+
/* Disable port VLAN filter */
err = mlx4_SET_VLAN_FLTR(mdev-dev, priv-port, NULL);
if (err)
@@ -271,11 +284,24 @@ static void mlx4_en_do_set_multicast(struct work_struct 
*work)
priv-flags = ~MLX4_EN_FLAG_PROMISC;
 
/* Disable promiscouos mode */
-   err = mlx4_SET_PORT_qpn_calc(mdev-dev, priv-port,
-priv-base_qpn, 0);
+   if (!mdev-dev-caps.vep_uc_steering)
+   err = mlx4_SET_PORT_qpn_calc(mdev-dev, priv-port,
+priv-base_qpn, 0);
+   else
+   err = mlx4_unicast_promisc_remove(mdev-dev, 
priv-base_qpn,
+ priv-port - 1);
if (err)

Re: [PATCH 2.6.35 2/3] RDMA/cxgb4: Support variable sized work requests.

2010-06-10 Thread Steve Wise

Roland Dreier wrote:

  T4 EQ entries are in multiples of 64B.  Currently the RDMA SQ and RQ
  use fixed sized entries composed of 4 EQ entries for the SQ and 2 EQ
  entries for the RQ.  For optimial latency with small IO, we need to
  change this so the HW only needs to DMA the EQ entries actually used by
  a given work request.

This seems not to be a fix, just an optimization -- so at this point for
2.6.36 I think.  Or am I wrong?
  


You are correct...I was hoping since iw_cxgb4 is new to 2.6.35, we could 
still get this in.


But if you disagree, then 2.6.36...


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: InfiniBand/RDMA merge plans for 2.6.35

2010-06-10 Thread Roland Dreier
   However, I haven't replied to these patches in detail but in general I
   don't like this approach of pick a random vector since it is
   non-deterministic and not likely to end up with an optimal result.

  What is the optimal way to do this, if it isn't to spread CQs evenly
  across all available vectors? (least attached vector != random.)

Since there is no way to know whether a given vector has a bunch of CQs
that generate very few events or maybe a single CQ that generates a
heavy load of events, the number of attached CQs is really pretty
useless as a basis to decide.  I think it's much better to try and
attach your CQ to a vector that is directed at the CPU where you want to
process the work.

 - R.
-- 
Roland Dreier rola...@cisco.com || For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/19 V4] mlx4_core: Adding VEP number in resource allocation

2010-06-10 Thread Yevgeny Petrilin
The firmware should be aware of the function a resource is opened on.
The function number is passed in the lower bits of input paramater.
Non multi-function firmware masks these bits.

Signed-off-by: Yevgeny Petrilin yevge...@mellanox.co.il
---
 drivers/net/mlx4/cmd.c |   58 ++-
 1 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index 4cfa407..660d001 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -534,6 +534,32 @@ static int mlx4_RESOURCE_wrapper(struct mlx4_dev *dev, int 
slave, struct mlx4_vh
return 0;
 }
 
+static int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+   struct mlx4_vhcr *vhcr,
+   struct mlx4_cmd_mailbox *inbox,
+   struct mlx4_cmd_mailbox *outbox)
+{
+   u64 in_param = inbox ? inbox-dma : vhcr-in_param;
+
+   in_param |= (u64) slave;
+   return mlx4_cmd(dev, in_param, vhcr-in_modifier,
+   vhcr-op_modifier, vhcr-op, MLX4_CMD_TIME_CLASS_C);
+}
+
+static int mlx4_DMA_outbox_wrapper(struct mlx4_dev *dev, int slave,
+  struct mlx4_vhcr *vhcr,
+  struct mlx4_cmd_mailbox *inbox,
+  struct mlx4_cmd_mailbox *outbox)
+{
+   u64 in_param = inbox ? inbox-dma : vhcr-in_param;
+   u64 out_param = outbox ? outbox-dma : vhcr-out_param;
+
+   in_param |= (u64) slave;
+   return mlx4_cmd_box(dev, in_param, out_param,
+   vhcr-in_modifier, vhcr-op_modifier, vhcr-op,
+   MLX4_CMD_TIME_CLASS_C);
+}
+
 static struct mlx4_cmd_info {
u16 opcode;
bool has_inbox;
@@ -608,7 +634,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /*need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_NOP,
@@ -657,7 +683,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL,
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_QUERY_MPT,
@@ -703,10 +729,10 @@ static struct mlx4_cmd_info {
{
.opcode = MLX4_CMD_HW2SW_EQ,
.has_inbox = false,
-   .has_outbox = false,
+   .has_outbox = true,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_outbox_wrapper
},
{
.opcode = MLX4_CMD_QUERY_EQ,
@@ -722,7 +748,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_HW2SW_CQ,
@@ -730,7 +756,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_QUERY_CQ,
@@ -754,7 +780,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_HW2SW_SRQ,
@@ -786,7 +812,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_INIT2RTR_QP,
@@ -794,7 +820,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_RTR2RTS_QP,
@@ -802,7 +828,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+   .wrapper = mlx4_DMA_wrapper
},
{
.opcode = MLX4_CMD_RTS2RTS_QP,
@@ -810,7 +836,7 @@ static struct mlx4_cmd_info {
.has_outbox = false,
.out_is_imm = false,
.verify = NULL, /* need verifier */
-   .wrapper = NULL
+ 

[PATCH] ibqueryerrors.c: Optimize by querying AllPortSelect first

2010-06-10 Thread Ira Weiny

From: Ira Weiny wei...@hera2.llnl.gov
Date: Thu, 6 May 2010 13:49:55 -0700
Subject: [PATCH] ibqueryerrors.c: Optimize by querying AllPortSelect first

If errors are seen with AllPortSelect query individual ports for more
details.

Signed-off-by: Ira Weiny wei...@llnl.gov
---
 infiniband-diags/src/ibqueryerrors.c |   55 +++--
 1 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/infiniband-diags/src/ibqueryerrors.c 
b/infiniband-diags/src/ibqueryerrors.c
index f04e47f..e0b1c0b 100644
--- a/infiniband-diags/src/ibqueryerrors.c
+++ b/infiniband-diags/src/ibqueryerrors.c
@@ -245,9 +245,9 @@ static int query_and_dump(char *buf, size_t size, 
ib_portid_t * portid,
return n;
 }
 
-static void print_results(ib_portid_t * portid, char *node_name,
- ibnd_node_t * node, uint8_t * pc, int portnum,
- int *header_printed)
+static int print_results(ib_portid_t * portid, char *node_name,
+ibnd_node_t * node, uint8_t * pc, int portnum,
+int *header_printed)
 {
char buf[1024];
char *str = buf;
@@ -311,11 +311,16 @@ static void print_results(ib_portid_t * portid, char 
*node_name,
*header_printed = 1;
}
 
-   printf(   GUID 0x% PRIx64  port %d:%s\n, node-guid,
-  portnum, str);
-   if (port_config)
+   if (portnum == 0xFF)
+   printf(   GUID 0x% PRIx64  port ALL:%s\n,
+  node-guid, str);
+   else
+   printf(   GUID 0x% PRIx64  port %d:%s\n,
+  node-guid, portnum, str);
+   if (portnum != 0xFF  port_config)
print_port_config(node_name, node, portnum);
}
+   return (n);
 }
 
 static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
@@ -339,8 +344,8 @@ static int query_cap_mask(ib_portid_t * portid, char 
*node_name, int portnum,
return 0;
 }
 
-static void print_port(ib_portid_t * portid, uint16_t cap_mask, char 
*node_name,
-  ibnd_node_t * node, int portnum, int *header_printed)
+static int print_port(ib_portid_t * portid, uint16_t cap_mask, char *node_name,
+ ibnd_node_t * node, int portnum, int *header_printed)
 {
uint8_t pc[1024];
 
@@ -350,14 +355,15 @@ static void print_port(ib_portid_t * portid, uint16_t 
cap_mask, char *node_name,
   IB_GSI_PORT_COUNTERS, ibmad_port)) {
IBWARN(IB_GSI_PORT_COUNTERS query failed on %s, %s port %d,
   node_name, portid2str(portid), portnum);
-   return;
+   return (0);
}
if (!(cap_mask  0x1000)) {
/* if PortCounters:PortXmitWait not supported clear this 
counter */
uint32_t foo = 0;
mad_encode_field(pc, IB_PC_XMT_WAIT_F, foo);
}
-   print_results(portid, node_name, node, pc, portnum, header_printed);
+   return (print_results(portid, node_name, node, pc, portnum,
+ header_printed));
 }
 
 static void clear_port(ib_portid_t * portid, uint16_t cap_mask,
@@ -425,6 +431,27 @@ void print_node(ibnd_node_t * node, void *user_data)
 
node_name = remap_node_name(node_name_map, node-guid, node-nodedesc);
 
+   if (node-type == IB_NODE_SWITCH) {
+   ib_portid_set(portid, node-smalid, 0, 0);
+   p = 0;
+   } else {
+   for (p = 1; p = node-numports; p++) {
+   if (node-ports[p]) {
+   ib_portid_set(portid,
+ node-ports[p]-base_lid,
+ 0, 0);
+   break;
+   }
+   }
+   }
+   if ((query_cap_mask(portid, node_name, p, cap_mask) == 0) 
+   (cap_mask  0x100)) {
+   all_port_sup = 1;
+   if (!print_port(portid, cap_mask, node_name, node,
+   0xFF, header_printed))
+   goto clear;
+   }
+
for (p = startport; p = node-numports; p++) {
if (node-ports[p]) {
if (node-type == IB_NODE_SWITCH)
@@ -433,13 +460,6 @@ void print_node(ibnd_node_t * node, void *user_data)
ib_portid_set(portid, node-ports[p]-base_lid,
  0, 0);
 
-   if (query_cap_mask(portid, node_name, p, cap_mask) 
-   0)
-   continue;
-
-   if (cap_mask  0x100)
-   all_port_sup = 1;
-
print_port(portid, cap_mask, node_name, node, p,