Changeset: f6300313c8f7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f6300313c8f7
Modified Files:
        monetdb5/extras/bwd/operations.c
        monetdb5/extras/bwd/utilities.c
Branch: bwd
Log Message:

* fixed the select approximation kernel (bitshifting was wrong)


Unterschiede (149 Zeilen):

diff --git a/monetdb5/extras/bwd/operations.c b/monetdb5/extras/bwd/operations.c
--- a/monetdb5/extras/bwd/operations.c
+++ b/monetdb5/extras/bwd/operations.c
@@ -147,30 +147,23 @@ str BWDThetauselectApproximate(bat *res,
 
                }
 
-               const char* sourceCodeTemplate = "__kernel void uselect ("
-                       "__global struct{int count; int padding; char 
elements[];}* output,"
-                       "__global const char* approximation,"
-                       "const %1$s operand"
-                       ") {"
-                       /* "  printf(\"%%d:\", get_global_id(0));" */
-                       "  %1$s value  = 0;"
-                       "  const size_t inputOffset = get_global_id(0)*%3$d;"
-                       "  {"
-                       "    for(int i = 0; i < %3$d; i++)"
-                       "      value += approximation[inputOffset + i] << i*8;"
-                       "  }"
-                       "  if(value %2$s operand)"
-                       "  {"
-                       "    const size_t offset = atomic_inc(&(output->count)) 
* (sizeof(size_t) + %3$d);"
-                       "    for(int i = 0; i < sizeof(size_t); i++)"
-                       "      output->elements[offset+i] = get_global_id(0) >> 
i*8;"
-                       "    for(int i = 0; i < sizeof(size_t); i++)"
-                       "      output->elements[offset+sizeof(size_t)+i] = 
approximation[inputOffset + i];"
-                       /* "    size_t* outputSlot = (size_t*)(output->elements 
+ slotNumber*(%3$d + sizeof(size_t)));" */
-                       /* "    *outputSlot |= get_global_id(0);" */
-                       /* "    outputSlot[1] |= (value >> %4$d);" */
-                       /* "    printf(\"value: %%d, value in slot(%%d) %%d, 
global_id: %%d\\n\", value, slotNumber, outputSlot[1], get_global_id(0));" */
-                       "  }"
+               const char* sourceCodeTemplate = "__kernel void uselect (\n"
+                       "__global struct{int count; int padding; char 
elements[];}* output,\n"
+                       "__global const char* approximation,\n"
+                       "const %1$s operand\n"
+                       ") {\n"
+                       "  %1$s value  = 0;\n"
+                       "  const size_t inputOffset = get_global_id(0)*%3$d;\n"
+                       "  for(int i = 0; i < %3$d; i++)\n"
+                       "    value += (approximation[inputOffset + i] << 
((i+sizeof(%1$s) - %3$d)*8));\n"
+                       "\n"
+                       "  if(value %2$s operand){\n"
+                       "    const size_t offset = atomic_inc(&(output->count)) 
* (sizeof(size_t) + %3$d);\n"
+                       "    for(int i = 0; i < sizeof(size_t); i++)\n"
+                       "      output->elements[offset+i] = (get_global_id(0) 
>> (i*8));\n"
+                       "    for(int i = 0; i < %3$d; i++)\n"
+                       "      output->elements[offset+sizeof(size_t)+i] = 
approximation[inputOffset + i];\n"
+                       "  }\n"
                        "}";
                char* sourceCode = malloc(16384);
                snprintf(sourceCode, 16384, sourceCodeTemplate, 
typeNames[BATttype(data)], approximateOperation(*OP), 
batTailApproximationBits(data)/8, 32-batTailApproximationBits(data));
@@ -186,7 +179,6 @@ str BWDThetauselectApproximate(bat *res,
                        clGetProgramBuildInfo(program, getDeviceID(), 
CL_PROGRAM_BUILD_LOG, 0, NULL, &bufferSize);
                        clGetProgramBuildInfo(program, getDeviceID(), 
CL_PROGRAM_BUILD_LOG, bufferSize, (buffer = alloca(bufferSize)), NULL);
                        THRprintf(GDKout, "#%s, clBuildProgram log: %s;\n", 
__func__, buffer);
-                       /* return "build failure"; */
                }
 
                cl_kernel selectKernel = clCreateKernel(program, "uselect", 
&err);
@@ -195,13 +187,6 @@ str BWDThetauselectApproximate(bat *res,
                err = clSetKernelArg(selectKernel, 1, sizeof(cl_mem), 
(cl_mem[]){batTailApproximation(data)}); if (err) THRprintf(GDKout, "#%s, 
clSetKernelArg(%d): %s;\n", __func__, 1, clError(err));
                err = clSetKernelArg(selectKernel, 2, sizeof(int), 
(int[]){((*(int*)val)>>batTailResidualBits(data))<<batTailResidualBits(data)}); 
if (err) THRprintf(GDKout, "#%s, clSetKernelArg(%d): %s;\n", __func__, 2, 
clError(err)); // type specific
 
-               /* cl_mem outputCursor; */
-               /* { */
-               /*      outputCursor =  clCreateBuffer(getCLContext(), 
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(int), (int[]){0}, &err); */
-               /*      if(err) THRprintf(GDKout, "#%s, clCreateBuffer, cursor: 
%s;\n", __func__, clError(err)); */
-               /* } */
-
-               /* err = clSetKernelArg(selectKernel, 3, sizeof(cl_mem), 
&outputCursor); if (err) THRprintf(GDKout, "#%s, clSetKernelArg(%d): %s;\n", 
__func__, 3, clError(err)); */
                err = clEnqueueNDRangeKernel(getCommandQueue(), selectKernel, 
1, (const size_t[]){0}, (const size_t[]){BATcount(data)}, (const size_t[]){1}, 
0, NULL, NULL);
                if(err) THRprintf(GDKout, "#%s, clEnqueueNDRangeKernel: %s;\n", 
__func__, clError(err));
                free(sourceCode);
@@ -228,11 +213,9 @@ str BWDThetauselectRefine(bat *res, bat 
        clBAT* resultClBAT = (clBAT*) malloc(approximationSize);
        cl_int err = clEnqueueReadBuffer(getCommandQueue(), 
batTailApproximation(approximation), CL_TRUE, 0, approximationSize, 
resultClBAT, 0, NULL, NULL);    
        if(err) THRprintf(GDKout, "#%s, clEnqueueReadBuffer: %s;\n", __func__, 
clError(err));
-               /* Tloc(result, BUNfirst(result)); */
        int* resultRegion = (int*) Tloc(result, BUNfirst(result)); // type 
specific
        oid* positionRegion = (oid*) Hloc(result, BUNfirst(result)); // type 
specific
        size_t candidateCount = resultClBAT->count;
-       /* const register unsigned int residualBits = 32-approximationBits; */
        const unsigned int approximationMask = ~((1 << (32 - 
batTailApproximationBits(approximation)))-1);
        const unsigned char* residuals = batTailResiduals(data);
        const unsigned int residualMask = (1 << batTailResidualBits(data))-1;
@@ -249,8 +232,6 @@ str BWDThetauselectRefine(bat *res, bat 
     resultRegion[j++] = deCompressedValue; \
                }\
 }
-               /* printf ("compressed value: %d, index: %d, value: %d\n", 
compressedValue, index, resultRegion[j-1]); \ */
-
        switch (*OP[0]){
        case '<':
                switch((*OP)[1]){
@@ -262,9 +243,9 @@ str BWDThetauselectRefine(bat *res, bat 
        case '>': 
                switch((*OP)[1]){
                case '\0':
-                       refineLoop(<=);
+                       refineLoop(>);
                case '=':
-                       refineLoop(<=);
+                       refineLoop(>=);
                }
        case '=': 
                refineLoop(==);
diff --git a/monetdb5/extras/bwd/utilities.c b/monetdb5/extras/bwd/utilities.c
--- a/monetdb5/extras/bwd/utilities.c
+++ b/monetdb5/extras/bwd/utilities.c
@@ -85,19 +85,20 @@ cl_device_id getDeviceID(){
        return result;
 }
 
+static inline void superverboseprintf(const char * format, ... ){};
 
 cl_context getCLContext(){
        static int initialized = 0;
        static cl_context clContextSingleton;
        if(initialized) {
-               printf("#%s, returning cl_context: %p;\n", __func__, 
clContextSingleton);
+               superverboseprintf("#%s, returning cl_context: %p;\n", 
__func__, clContextSingleton);
                return clContextSingleton;
        }
        int err;
        clContextSingleton = clCreateContext(0,1, 
(cl_device_id[]){getDeviceID()}, NULL, NULL, &err);
        if (err != CL_SUCCESS) printf("failure when creating the context\n");
        else initialized= 1;
-       printf("#%s, returning cl_context: %p;\n", __func__, 
clContextSingleton);
+       superverboseprintf("#%s, returning cl_context: %p;\n", __func__, 
clContextSingleton);
        return clContextSingleton;
 }
 
@@ -106,14 +107,14 @@ cl_command_queue getCommandQueue(){
        static int initialized = 0;
        static cl_command_queue queue;
        if(initialized) {
-                       printf("#%s, returning queue pointer: %p;\n", __func__, 
queue);
+                       superverboseprintf("#%s, returning queue pointer: 
%p;\n", __func__, queue);
                        return queue;
        }
        int err;
        queue = clCreateCommandQueue(getCLContext(),getDeviceID(),0,&err);
        if (err != CL_SUCCESS) printf("failed to create queue");
        else initialized = 1;
-       printf("#%s, returning queue pointer: %p;\n", __func__, queue);
+       superverboseprintf("#%s, returning queue pointer: %p;\n", __func__, 
queue);
 
        return queue;
 }
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to