Changeset: b02d018c0d3b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b02d018c0d3b
Modified Files:
        monetdb5/extras/bwd/bwd.c
        monetdb5/extras/bwd/operations.c
        monetdb5/extras/bwd/utilities.c
Branch: bwd
Log Message:

* lots of bitmagic
        * using byte-instructions instead of integer instructions on the device 
because nvidia gpus don't support non-aligned int access
  * made bwdevices report the endianness of the device
        * printing debug info to stdout rather than gdkout


Unterschiede (gekürzt von 371 auf 300 Zeilen):

diff --git a/monetdb5/extras/bwd/bwd.c b/monetdb5/extras/bwd/bwd.c
--- a/monetdb5/extras/bwd/bwd.c
+++ b/monetdb5/extras/bwd/bwd.c
@@ -63,8 +63,10 @@ str deviceInfo(bat * resPlatform, bat * 
                                {
                                        
clGetDeviceInfo(devices[j],CL_DEVICE_TYPE,sizeof(cl_ulong),&device_type,NULL);
                                        char deviceName[1024];
+                                       cl_bool endianLittle;
                                        if((err = 
clGetDeviceInfo(devices[j],CL_DEVICE_NAME,sizeof(deviceName),&deviceName,NULL)))
 die ("clGetDeviceInfo error: %s\n",clError(err));
-                                       snprintf (deviceNameBuffer, 1024, "%d: 
%s (%s)\n", j, deviceName, clDeviceString(device_type));
+                                       if((err = 
clGetDeviceInfo(devices[j],CL_DEVICE_ENDIAN_LITTLE,sizeof(cl_bool),&endianLittle,NULL)))
 die ("clGetDeviceInfo error: %s\n",clError(err));
+                                       snprintf (deviceNameBuffer, 1024, "%d: 
%s (%s), endianness: %s\n", j, deviceName, clDeviceString(device_type), 
(str[]){[CL_TRUE] = "little", [CL_FALSE] = "big"}[endianLittle]);
                                }
                                char* platformVersionBuffer;
                                {
diff --git a/monetdb5/extras/bwd/operations.c b/monetdb5/extras/bwd/operations.c
--- a/monetdb5/extras/bwd/operations.c
+++ b/monetdb5/extras/bwd/operations.c
@@ -18,10 +18,10 @@
 
 str BWDLeftJoinApproximate(bat * res, bat * l, bat * r){
 
-       ALGODEBUG THRprintf(GDKout, "#BWDfetchjoin: approximating;\n");
+       ALGODEBUG printf("#BWDfetchjoin: approximating;\n");
        BAT* right = BATdescriptor(*r);
        if(!batTailIsDecomposed(right)){
-               THRprintf(GDKout, "bwd.%s : bat is not decomposed: %d, no 
approximation can be provided", __func__, *r);
+               printf("bwd.%s : bat is not decomposed: %d, no approximation 
can be provided", __func__, *r);
                BBPreleaseref(right->batCacheid);
                return MAL_SUCCEED;
        }
@@ -54,17 +54,17 @@ str BWDLeftJoinApproximate(bat * res, ba
                BBPreleaseref(right->batCacheid);
                return MAL_SUCCEED;
        }       else
-               THRprintf(GDKout, "bwd.BWDLeftJoinApproximate, %s 
(BAThvoid(left): %d, BATtvoid(left): %d, BAThvoid(right): %d, right->tseqbase: 
%ld)", "this case isn't implemented yet",      BAThvoid(left), BATtvoid(left), 
BATtvoid(right), left->tseqbase);
+               printf("bwd.BWDLeftJoinApproximate, %s (BAThvoid(left): %d, 
BATtvoid(left): %d, BAThvoid(right): %d, right->tseqbase: %ld)", "this case 
isn't implemented yet", BAThvoid(left), BATtvoid(left), BATtvoid(right), 
left->tseqbase);
        return MAL_SUCCEED;
 };
 
 
 str BWDLeftJoinRefine(bat * res, bat * l, bat * r, bat * approx){
        if (!approx || !*approx) {
-               THRprintf(GDKout, "bwd.%s : no approximation provided, falling 
back to normal leftjoin", __func__);
+               printf("bwd.%s : no approximation provided, falling back to 
normal leftjoin", __func__);
     return ALGleftjoin(res, l, r);
        }
-       ALGODEBUG THRprintf(GDKout, "#BWDfetchjoin: approximating;\n");
+       ALGODEBUG printf("#BWDfetchjoin: approximating;\n");
        BAT* left = BATdescriptor(*l);
        BAT* right = BATdescriptor(*r);
 
@@ -93,7 +93,7 @@ static inline size_t clDeviceAddressByte
        static size_t CL_DEVICE_ADDRESS_BYTES = 0;
        if (!CL_DEVICE_ADDRESS_BYTES) {
                cl_int err = clGetDeviceInfo(getDeviceID(),     
CL_DEVICE_ADDRESS_BITS, sizeof(size_t), &CL_DEVICE_ADDRESS_BYTES,       NULL);
-               if(err) THRprintf(GDKout, "#%s, %s;\n", __func__, clError(err));
+               if(err) printf("#%s, %s;\n", __func__, clError(err));
                CL_DEVICE_ADDRESS_BYTES /= 8;
        }
        return CL_DEVICE_ADDRESS_BYTES;                         //TODO: check 
that CL_DEVICE_ADDRESS_BITS 
@@ -143,24 +143,33 @@ str BWDThetauselectApproximate(bat *res,
                        void* zeros = 
calloc((BATcount(data)+1)*(clDeviceAddressBytes() + 
slot->approximationBits/8)+sizeof(clBAT), 1); //TODO: migrate buffer 
initialization to GPU
                        clEnqueueWriteBuffer(getCommandQueue(), 
slot->approximation, CL_TRUE, 0, (BATcount(data)+1)*(clDeviceAddressBytes() + 
slot->approximationBits/8)+sizeof(clBAT), zeros, 0, NULL, NULL);
                        free(zeros);
-                       if(err) THRprintf(GDKout, "#%s, clCreateBuffer: %s;\n", 
__func__, clError(err));
+                       if(err) printf("#%s, clCreateBuffer: %s;\n", __func__, 
clError(err));
 
                }
 
                const char* sourceCodeTemplate = "__kernel void uselect ("
                        "__global struct{int count; int padding; char 
elements[];}* output,"
-                       "__global const void* approximation,"
+                       "__global const char* approximation,"
                        "const %1$s operand"
                        ") {"
                        /* "  printf(\"%%d:\", get_global_id(0));" */
-                       "  const %1$s value  = (*(int*)(approximation + 
get_global_id(0)*%3$d) << %4$d);"
+                       "  %1$s value  = 0;"
+                       "  const size_t inputOffset = get_global_id(0)*%3$d;"
+                       "  {"
+                       "    for(int i = 0; i < %3$d; i++)"
+                       "      value += approximation[inputOffset + i] << i*8;"
+                       "  }"
                        "  if(value %2$s operand)"
                        "  {"
-                       "    const int slotNumber = 
atomic_inc(&(output->count));"
-                       "    size_t* outputSlot = (size_t*)(output->elements + 
slotNumber*(%3$d + sizeof(size_t)));"
-                       "    *outputSlot |= get_global_id(0);"
-                       "    outputSlot[1] |= (value >> %4$d);"
-                       "    printf(\"value: %%d, value in slot(%%d) %%d, 
global_id: %%d\\n\", value, slotNumber, outputSlot[1], get_global_id(0));"
+                       "    const size_t offset = atomic_inc(&(output->count)) 
* (sizeof(size_t) + %3$d);"
+                       "    for(int i = 0; i < sizeof(size_t); i++)"
+                       "      output->elements[offset+i] = get_global_id(0) >> 
i*8;"
+                       "    for(int i = 0; i < sizeof(size_t); i++)"
+                       "      output->elements[offset+sizeof(size_t)+i] = 
approximation[inputOffset + i];"
+                       /* "    size_t* outputSlot = (size_t*)(output->elements 
+ slotNumber*(%3$d + sizeof(size_t)));" */
+                       /* "    *outputSlot |= get_global_id(0);" */
+                       /* "    outputSlot[1] |= (value >> %4$d);" */
+                       /* "    printf(\"value: %%d, value in slot(%%d) %%d, 
global_id: %%d\\n\", value, slotNumber, outputSlot[1], get_global_id(0));" */
                        "  }"
                        "}";
                char* sourceCode = malloc(16384);
@@ -217,10 +226,9 @@ str BWDThetauselectRefine(bat *res, bat 
        BATseqbase(result, 0);
 
        clBAT* resultClBAT = (clBAT*) malloc(approximationSize);
-               /* Tloc(result, BUNfirst(result)); */
-
        cl_int err = clEnqueueReadBuffer(getCommandQueue(), 
batTailApproximation(approximation), CL_TRUE, 0, approximationSize, 
resultClBAT, 0, NULL, NULL);    
        if(err) THRprintf(GDKout, "#%s, clEnqueueReadBuffer: %s;\n", __func__, 
clError(err));
+               /* Tloc(result, BUNfirst(result)); */
        int* resultRegion = (int*) Tloc(result, BUNfirst(result)); // type 
specific
        oid* positionRegion = (oid*) Hloc(result, BUNfirst(result)); // type 
specific
        size_t candidateCount = resultClBAT->count;
@@ -239,15 +247,25 @@ str BWDThetauselectRefine(bat *res, bat 
                if(deCompressedValue comparator *(int*)val) {                   
                                                                                
        \
                positionRegion[j] = index; \
     resultRegion[j++] = deCompressedValue; \
-               printf ("compressed value: %d, index: %d, value: %d\n", 
compressedValue, index, resultRegion[j-1]); \
                }\
 }
+               /* printf ("compressed value: %d, index: %d, value: %d\n", 
compressedValue, index, resultRegion[j-1]); \ */
 
        switch (*OP[0]){
-       case '<': 
-               refineLoop(<);
+       case '<':
+               switch((*OP)[1]){
+               case '\0':
+                       refineLoop(<);
+               case '=':
+                       refineLoop(<=);
+               }
        case '>': 
-               refineLoop(>);
+               switch((*OP)[1]){
+               case '\0':
+                       refineLoop(<=);
+               case '=':
+                       refineLoop(<=);
+               }
        case '=': 
                refineLoop(==);
        }
diff --git a/monetdb5/extras/bwd/utilities.c b/monetdb5/extras/bwd/utilities.c
--- a/monetdb5/extras/bwd/utilities.c
+++ b/monetdb5/extras/bwd/utilities.c
@@ -18,97 +18,104 @@
 #pragma mark OpenCL Utitily Functions
 
 const char* clError(int err){
-       static char* cl_Errors[64];
-  if (err == -1001)
-    return "CL_PLATFORM_NOT_FOUND_KHR";
-  if (!cl_Errors[0]){
-    cl_Errors[0] = "CL_SUCCESS";
-    cl_Errors[1] = "CL_DEVICE_NOT_FOUND";
-    cl_Errors[2] = "CL_DEVICE_NOT_AVAILABLE";
-    cl_Errors[3] = "CL_COMPILER_NOT_AVAILABLE";
-    cl_Errors[4] = "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-    cl_Errors[5] = "CL_OUT_OF_RESOURCES";
-    cl_Errors[6] = "CL_OUT_OF_HOST_MEMORY";
-    cl_Errors[7] = "CL_PROFILING_INFO_NOT_AVAILABLE";
-    cl_Errors[8] = "CL_MEM_COPY_OVERLAP";
-    cl_Errors[9] = "CL_IMAGE_FORMAT_MISMATCH";
-    cl_Errors[10] = "CL_IMAGE_FORMAT_NOT_SUPPORTED";
-    cl_Errors[11] = "CL_BUILD_PROGRAM_FAILURE";
-    cl_Errors[12] = "CL_MAP_FAILURE";
-
-    cl_Errors[30] = "CL_INVALID_VALUE";
-    cl_Errors[31] = "CL_INVALID_DEVICE_TYPE";
-    cl_Errors[32] = "CL_INVALID_PLATFORM";
-    cl_Errors[33] = "CL_INVALID_DEVICE";
-    cl_Errors[34] = "CL_INVALID_CONTEXT";
-    cl_Errors[35] = "CL_INVALID_QUEUE_PROPERTIES";
-    cl_Errors[36] = "CL_INVALID_COMMAND_QUEUE";
-    cl_Errors[37] = "CL_INVALID_HOST_PTR";
-    cl_Errors[38] = "CL_INVALID_MEM_OBJECT";
-    cl_Errors[39] = "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
-    cl_Errors[40] = "CL_INVALID_IMAGE_SIZE";
-    cl_Errors[41] = "CL_INVALID_SAMPLER";
-    cl_Errors[42] = "CL_INVALID_BINARY";
-    cl_Errors[43] = "CL_INVALID_BUILD_OPTIONS";
-    cl_Errors[44] = "CL_INVALID_PROGRAM";
-    cl_Errors[45] = "CL_INVALID_PROGRAM_EXECUTABLE";
-    cl_Errors[46] = "CL_INVALID_KERNEL_NAME";
-    cl_Errors[47] = "CL_INVALID_KERNEL_DEFINITION";
-    cl_Errors[48] = "CL_INVALID_KERNEL";
-    cl_Errors[49] = "CL_INVALID_ARG_INDEX";
-    cl_Errors[50] = "CL_INVALID_ARG_VALUE";
-    cl_Errors[51] = "CL_INVALID_ARG_SIZE";
-    cl_Errors[52] = "CL_INVALID_KERNEL_ARGS";
-    cl_Errors[53] = "CL_INVALID_WORK_DIMENSION";
-    cl_Errors[54] = "CL_INVALID_WORK_GROUP_SIZE";
-    cl_Errors[55] = "CL_INVALID_WORK_ITEM_SIZE";
-    cl_Errors[56] = "CL_INVALID_GLOBAL_OFFSET";
-    cl_Errors[57] = "CL_INVALID_EVENT_WAIT_LIST";
-    cl_Errors[58] = "CL_INVALID_EVENT";
-    cl_Errors[59] = "CL_INVALID_OPERATION";
-    cl_Errors[60] = "CL_INVALID_GL_OBJECT";
-    cl_Errors[61] = "CL_INVALID_BUFFER_SIZE";
-    cl_Errors[62] = "CL_INVALID_MIP_LEVEL";
-    cl_Errors[63] = "CL_INVALID_GLOBAL_WORK_SIZE";
-
-  }
-  return cl_Errors[-err];
+       const str result = (str[]){
+               [-CL_SUCCESS] = "CL_SUCCESS",
+               [-CL_DEVICE_NOT_FOUND] = "CL_DEVICE_NOT_FOUND",
+               [-CL_DEVICE_NOT_AVAILABLE] = "CL_DEVICE_NOT_AVAILABLE",
+               [-CL_COMPILER_NOT_AVAILABLE] = "CL_COMPILER_NOT_AVAILABLE",
+               [-CL_MEM_OBJECT_ALLOCATION_FAILURE] = 
"CL_MEM_OBJECT_ALLOCATION_FAILURE",
+               [-CL_OUT_OF_RESOURCES] = "CL_OUT_OF_RESOURCES",
+               [-CL_OUT_OF_HOST_MEMORY] = "CL_OUT_OF_HOST_MEMORY",
+               [-CL_PROFILING_INFO_NOT_AVAILABLE] = 
"CL_PROFILING_INFO_NOT_AVAILABLE",
+               [-CL_MEM_COPY_OVERLAP] = "CL_MEM_COPY_OVERLAP",
+               [-CL_IMAGE_FORMAT_MISMATCH] = "CL_IMAGE_FORMAT_MISMATCH",
+               [-CL_IMAGE_FORMAT_NOT_SUPPORTED] = 
"CL_IMAGE_FORMAT_NOT_SUPPORTED",
+               [-CL_BUILD_PROGRAM_FAILURE] = "CL_BUILD_PROGRAM_FAILURE",
+               [-CL_MAP_FAILURE] = "CL_MAP_FAILURE",
+               [-CL_INVALID_VALUE] = "CL_INVALID_VALUE",
+               [-CL_INVALID_DEVICE_TYPE] = "CL_INVALID_DEVICE_TYPE",
+               [-CL_INVALID_PLATFORM] = "CL_INVALID_PLATFORM",
+               [-CL_INVALID_DEVICE] = "CL_INVALID_DEVICE",
+               [-CL_INVALID_CONTEXT] = "CL_INVALID_CONTEXT",
+               [-CL_INVALID_QUEUE_PROPERTIES] = "CL_INVALID_QUEUE_PROPERTIES",
+               [-CL_INVALID_COMMAND_QUEUE] = "CL_INVALID_COMMAND_QUEUE",
+               [-CL_INVALID_HOST_PTR] = "CL_INVALID_HOST_PTR",
+               [-CL_INVALID_MEM_OBJECT] = "CL_INVALID_MEM_OBJECT",
+               [-CL_INVALID_IMAGE_FORMAT_DESCRIPTOR] = 
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
+               [-CL_INVALID_IMAGE_SIZE] = "CL_INVALID_IMAGE_SIZE",
+               [-CL_INVALID_SAMPLER] = "CL_INVALID_SAMPLER",
+               [-CL_INVALID_BINARY] = "CL_INVALID_BINARY",
+               [-CL_INVALID_BUILD_OPTIONS] = "CL_INVALID_BUILD_OPTIONS",
+               [-CL_INVALID_PROGRAM] = "CL_INVALID_PROGRAM",
+               [-CL_INVALID_PROGRAM_EXECUTABLE] = 
"CL_INVALID_PROGRAM_EXECUTABLE",
+               [-CL_INVALID_KERNEL_NAME] = "CL_INVALID_KERNEL_NAME",
+               [-CL_INVALID_KERNEL_DEFINITION] = 
"CL_INVALID_KERNEL_DEFINITION",
+               [-CL_INVALID_KERNEL] = "CL_INVALID_KERNEL",
+               [-CL_INVALID_ARG_INDEX] = "CL_INVALID_ARG_INDEX",
+               [-CL_INVALID_ARG_VALUE] = "CL_INVALID_ARG_VALUE",
+               [-CL_INVALID_ARG_SIZE] = "CL_INVALID_ARG_SIZE",
+               [-CL_INVALID_KERNEL_ARGS] = "CL_INVALID_KERNEL_ARGS",
+               [-CL_INVALID_WORK_DIMENSION] = "CL_INVALID_WORK_DIMENSION",
+               [-CL_INVALID_WORK_GROUP_SIZE] = "CL_INVALID_WORK_GROUP_SIZE",
+               [-CL_INVALID_WORK_ITEM_SIZE] = "CL_INVALID_WORK_ITEM_SIZE",
+               [-CL_INVALID_GLOBAL_OFFSET] = "CL_INVALID_GLOBAL_OFFSET",
+               [-CL_INVALID_EVENT_WAIT_LIST] = "CL_INVALID_EVENT_WAIT_LIST",
+               [-CL_INVALID_EVENT] = "CL_INVALID_EVENT",
+               [-CL_INVALID_OPERATION] = "CL_INVALID_OPERATION",
+               [-CL_INVALID_GL_OBJECT] = "CL_INVALID_GL_OBJECT",
+               [-CL_INVALID_BUFFER_SIZE] = "CL_INVALID_BUFFER_SIZE",
+               [-CL_INVALID_MIP_LEVEL] = "CL_INVALID_MIP_LEVEL",
+               [-CL_INVALID_GLOBAL_WORK_SIZE] = "CL_INVALID_GLOBAL_WORK_SIZE"
+       }[-err];
+       if(result) return result;
+       return "unknown error";
 }
 
 
 cl_device_id getDeviceID(){
+       static int initialized = 0;
+       static cl_device_id result;
        cl_platform_id platforms[4];
        cl_uint foundPlatforms;
-       if(clGetPlatformIDs(4,platforms,&foundPlatforms) != CL_SUCCESS) 
THRprintf(GDKout, "problem when finding the platforms");
-       if (!foundPlatforms) THRprintf(GDKout, "didn't find any OpenCL 
Platforms");
+       if(clGetPlatformIDs(4,platforms,&foundPlatforms) != CL_SUCCESS) 
printf("problem when finding the platforms");
+       if (!foundPlatforms) printf("didn't find any OpenCL Platforms");
 
-       cl_device_id result;
-       if (clGetDeviceIDs(*platforms,CL_DEVICE_TYPE_ALL, 1, &result, NULL) == 
CL_SUCCESS) return result;
-       else THRprintf(GDKout, "error getting GPU device id\n");
-
-  return NULL;
+       if (clGetDeviceIDs(*platforms,CL_DEVICE_TYPE_ALL, 1, &result, NULL) != 
CL_SUCCESS)  printf("error getting GPU device id\n");
+       else initialized = 1;
+       return result;
 }
 
 
-static cl_context clContextSingleton = NULL;
 cl_context getCLContext(){
-       if(clContextSingleton) return clContextSingleton;
+       static int initialized = 0;
+       static cl_context clContextSingleton;
+       if(initialized) {
+               printf("#%s, returning cl_context: %p;\n", __func__, 
clContextSingleton);
+               return clContextSingleton;
+       }
        int err;
        clContextSingleton = clCreateContext(0,1, 
(cl_device_id[]){getDeviceID()}, NULL, NULL, &err);
-       if (err == CL_SUCCESS) return clContextSingleton;
-       else THRprintf(GDKout, "failure when creating the context\n");
-  return NULL;
+       if (err != CL_SUCCESS) printf("failure when creating the context\n");
+       else initialized= 1;
+       printf("#%s, returning cl_context: %p;\n", __func__, 
clContextSingleton);
+       return clContextSingleton;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to