Matt, you're far too kind :) I put together a test program that uses the
block of code in question and... it works for me? I've attached the
reproducer here. A compile should be just a "gcc -libverbs ib_verbs_q.c".
I'm a little perplexed. I truthfully didn't expect it to work given that
the same block called from inside of openmpi on the same node(s) where Matt
had it fail earlier.

-Aaron

On Wed, Jul 13, 2016 at 9:17 PM, Aaron Knister <aaron.s.knis...@nasa.gov>
wrote:

> On Wed, Jul 13, 2016 at 9:50 AM, Nathan Hjelm <hje...@me.com> wrote:
>
>> As of 2.0.0 we now support experimental verbs. It looks like one of the
>> calls is failing:
>>
>> #if HAVE_DECL_IBV_EXP_QUERY_DEVICE
>>     device->ib_exp_dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1;
>>     if(ibv_exp_query_device(device->ib_dev_context,
>> &device->ib_exp_dev_attr)){
>>         BTL_ERROR(("error obtaining device attributes for %s errno says
>> %s",
>>                     ibv_get_device_name(device->ib_dev),
>> strerror(errno)));
>>         goto error;
>>     }
>> #endif
>>
>> Do you know what OFED or MOFED version you are running?
>>
>
> Per one of our gurus, answers from your IB page:
>
> 1. Which OpenFabrics version are you running? Please specify where you got
> the software from (e.g., from the OpenFabrics community web site, from a
> vendor, or it was already included in your Linux distribution).
>    Mellanox OFED 3.1-1.0.3 (soon to be 3.3-1.0.0)
>
> 2. What distro and version of Linux are you running? What is your kernel
> version?
>    SLES11 SP3 (LTSS); 3.0.101-0.47.71-default (soon to be
> 3.0.101-0.47.79-default)
>
> 3. Which subnet manager are you running? (e.g., OpenSM, a vendor-specific
> subnet manager, etc.)
>    Mellanox UFM (OpenSM under the covers)
>
> --
> Matt Thompson
>
> Man Among Men
> Fulcrum of History
>
>
#include <string.h>
#include <infiniband/verbs.h>
struct ompi_device {
    struct ibv_device *ib_dev;
    struct ibv_exp_device_attr ib_exp_dev_attr;
    struct ibv_context *ib_dev_context;
};

#define btl_error(...) fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n");
#define BTL_ERROR(args) btl_error args

int main() {
    struct ibv_exp_device_attr exp_dev_attr;
    struct ibv_device **device_list;
    struct ibv_context *ib_dev_context;
    struct ompi_device *device;
    device=malloc(sizeof(struct ompi_device));

    device_list = ibv_get_device_list(NULL);
    if (!device_list)
        return -1;

    device->ib_dev=device_list[0];

    device->ib_dev_context = ibv_open_device(device->ib_dev);
    if (!device->ib_dev_context) {
        fprintf(stderr, "Error, failed to open the device '%s'\n",
                ibv_get_device_name(device->ib_dev));
        return -1;
    }

/** Begin code snippet from OpenMPI **/

    device->ib_exp_dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1;
    if(ibv_exp_query_device(device->ib_dev_context, &device->ib_exp_dev_attr)){
        BTL_ERROR(("error obtaining device attributes for %s errno says %s",
                    ibv_get_device_name(device->ib_dev), strerror(errno)));
        goto error;
    }

/** End code snippet from OpenMPI **/

    printf("hca_id: %s\n", ibv_get_device_name(device_list[0]));
    printf("\tfw ver: %s\n", device->ib_exp_dev_attr.fw_ver); 
    printf("\tnode guid: %02x%02x:%02x%02x:%02x%02x:%02x%02x\n",
	(device->ib_exp_dev_attr.node_guid & 0xFF),
	(( device->ib_exp_dev_attr.node_guid >> 8 ) & 0xFF),
	(( device->ib_exp_dev_attr.node_guid >> 16 ) & 0xFF),
	(( device->ib_exp_dev_attr.node_guid >> 24 ) & 0xFF),
	(( device->ib_exp_dev_attr.node_guid >> 32 ) & 0xFF),
	(( device->ib_exp_dev_attr.node_guid >> 40 ) & 0xFF),
	(( device->ib_exp_dev_attr.node_guid >> 48 ) & 0xFF),
	(( device->ib_exp_dev_attr.node_guid >> 56 ) & 0xFF)
   );

   return 0;

   error:
	return -1;
}

Reply via email to