Re: [pocl-devel] NVIDIA device backend for POCL

Peter Colberg Fri, 22 Mar 2013 14:13:14 -0700

On Fri, Mar 22, 2013 at 02:54:41PM -0400, Peter Colberg wrote:
> I was curious to see how the NVIDIA OpenCL driver handles __local and
> __constant kernel parameters. The result is surprising.


I wrote a small program to reproduce this issue with the CUDA driver.

gcc -lcuda -O2 -std=c99 -Wall -o module module.c

=== PTX with __global kernel arguments ===

  # cat cuda.ptx
  .version 3.0
  .target sm_20, texmode_independent
  .address_size 32
  .entry incr(.param .u32 .ptr .global .align 4 incr_param_0,
              .param .u32 .ptr .global .align 4 incr_param_1)
  {
  }

  # ./module cuda.ptx
  #

=== PTX with __global and __constant kernel argument ===

  # cat cuda.ptx
  .version 3.0
  .target sm_20, texmode_independent
  .address_size 32
  .entry incr(.param .u32 .ptr .global .align 4 incr_param_0,
              .param .u32 .ptr .const .align 4 incr_param_1)
  {
  }

  # ./module cuda.ptx

  cuModuleLoadDataEx failed {200}
  #

=== PTX with __global and __local kernel argument ===

  # cat cuda.ptx
  .version 3.0
  .target sm_20, texmode_independent
  .address_size 32
  .entry incr(.param .u32 .ptr .global .align 4 incr_param_0,
              .param .u32 .ptr .shared .align 4 incr_param_1)
  {
  }

  # ./module cuda.ptx

  cuModuleLoadDataEx failed {200}
  #

=== PTX with intentional syntax error ===

  # cat cuda.ptx
  .version 3.0
  .target sm_20, texmode_independent
  .address_size 32
  .entry incr(.param .u32 .ptr .global .align 4 incr_param_0,
              .param .u32 .ptr .shrred .align 4 incr_param_1)
  {
  }

  # ./module cuda.ptx
  ptxas application ptx input, line 5; fatal   : Parsing error near '.shrred': 
syntax error
  ptxas fatal   : Ptx assembly aborted due to errors
  cuModuleLoadDataEx failed {209}
  #


So while the NVIDIA OpenCL driver does produce PTX code using .const
and .shared kernel parameters, and the NVIDIA GPU driver does accept
these PTX codes somehow when using OpenCL, the CUDA driver functions
cuModuleLoad*() reject these argument types, silently.

Maybe there is a magic switch in the NVIDIA driver to enable the
JIT compilation of this kind of PTX code? In any case, it is not
exposed by the CUDA driver API.

Peter

#include <cuda.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>

int main(int argc, char **argv)
{
  CUdevice dev;
  CUcontext ctx;
  CUmodule mod;
  int err, fd;
  struct stat st;
  void *src;
  const size_t len = 4096;
  char error_buf[len];
  char info_buf[len];
  CUjit_option options[5] = {
    CU_JIT_FALLBACK_STRATEGY,
    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
    CU_JIT_INFO_LOG_BUFFER,
    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
    CU_JIT_ERROR_LOG_BUFFER,
  };
  void *option_values[5] = {
    (void *) CU_PREFER_PTX,
    (void *) len,
    info_buf,
    (void *) len,
    error_buf,
  };
  if ((err = cuInit(0)) != CUDA_SUCCESS)
  {
    fprintf(stderr, "cuInit failed {%d}\n", err);
    return 1;
  }
  if ((err = cuDeviceGet(&dev, 0)) != CUDA_SUCCESS)
  {
    fprintf(stderr, "cuCtxCreate failed {%d}\n", err);
    return 1;
  }
  if ((err = cuCtxCreate(&ctx, 0, dev)) != CUDA_SUCCESS)
  {
    fprintf(stderr, "cuCtxCreate failed {%d}\n", err);
    return 1;
  }
  if ((fd = open(argv[1], O_RDONLY)) < 0) {
    perror("open failed\n");
    return 1;
  }
  if (fstat(fd, &st) < 0) {
    perror("fstat failed\n");
    return 1;
  }
  if ((src = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == (void *)-1) {
    perror("mmap failed\n");
    return 1;
  }
  if ((err = cuModuleLoadDataEx(&mod, src, 5, options, option_values)) != CUDA_SUCCESS) {
    fwrite(info_buf, (size_t) option_values[1], 1, stderr);
    fwrite(error_buf, (size_t) option_values[3], 1, stderr);
    fprintf(stderr, "\ncuModuleLoadDataEx failed {%d}\n", err);
    return 1;
  }
  return 0;
}

------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_mar

_______________________________________________
pocl-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/pocl-devel

Re: [pocl-devel] NVIDIA device backend for POCL

Reply via email to