Changeset: 911928dfa2ff for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=911928dfa2ff
Modified Files:
        monetdb5/extras/bwd/cl_program_utilities.c
        monetdb5/extras/bwd/operations.c
        monetdb5/extras/bwd/utilities.c
        monetdb5/extras/bwd/utilities.h
Branch: bwd
Log Message:

* projective leftjoins actually access the decomposed data now instead of the 
normal bats


Unterschiede (226 Zeilen):

diff --git a/monetdb5/extras/bwd/cl_program_utilities.c 
b/monetdb5/extras/bwd/cl_program_utilities.c
--- a/monetdb5/extras/bwd/cl_program_utilities.c
+++ b/monetdb5/extras/bwd/cl_program_utilities.c
@@ -30,8 +30,10 @@ cl_program getProjectionLeftjoinProgram(
                        "__global const char* approximation\n"
                        ") {\n"
                        "  const int offset = 
inputTail->positions[get_global_id(0)]*%1$d;\n"
-                       "  for(int i = 0; i < %1$d; i++)\n"
-                       "    outputTail->values[get_global_id(0)*%1$d+i] = 
approximation[offset + i];\n"
+                       "  printf(\"projecting value %%d, position: %%d\\n\", 
get_global_id(0), inputTail->positions[get_global_id(0)]);"
+                       "  for(int i = 0; i < %1$d; i++){\n"
+                               "    
outputTail->values[get_global_id(0)*%1$d+i] = approximation[offset + i];\n"
+                               "}"
                        "}";
                char* sourceCode = malloc(16384);
                snprintf(sourceCode, 16384, sourceCodeTemplate, 
approximationBits/8);
diff --git a/monetdb5/extras/bwd/operations.c b/monetdb5/extras/bwd/operations.c
--- a/monetdb5/extras/bwd/operations.c
+++ b/monetdb5/extras/bwd/operations.c
@@ -15,7 +15,7 @@
 #include "utilities.h"
 #include "cl_program_utilities.h"
 
-static const int activateWorkInProgress = 0;
+static const int activateWorkInProgress = 1;
 
 #pragma mark Actual MAL Operations Implementation
 
@@ -33,6 +33,26 @@ typedef struct {
        int positions[];
 } clHead;
 
+clHead* getPositionsColumn(cl_mem memoryObject, clHead* buffer, size_t* 
bufferSize){
+       cl_int err;
+       if(!buffer)
+               clGetMemObjectInfo(memoryObject, CL_MEM_SIZE, sizeof(size_t), 
bufferSize, NULL);
+       else
+               err = clEnqueueReadBuffer(getCommandQueue(), memoryObject, 
CL_TRUE, 0, *bufferSize, buffer, 0, NULL, NULL);     
+       if(err) THRprintf(GDKout, "#%s, clEnqueueReadBuffer: %s;\n", __func__, 
clError(err));
+       return buffer;
+}
+
+clTail* getApproximateValuesColumn(cl_mem memoryObject, clTail* buffer, 
size_t* bufferSize){
+       cl_int err;
+       if(!buffer)
+               clGetMemObjectInfo(memoryObject, CL_MEM_SIZE, sizeof(size_t), 
bufferSize, NULL);
+       else
+               err = clEnqueueReadBuffer(getCommandQueue(), memoryObject, 
CL_TRUE, 0, *bufferSize, buffer, 0, NULL, NULL);     
+       if(err) THRprintf(GDKout, "#%s, clEnqueueReadBuffer: %s;\n", __func__, 
clError(err));
+       return buffer;
+}
+
 str BWDLeftJoinApproximate(bat * res, bat * l, bat * r){
 
        ALGODEBUG printf("#BWDfetchjoin: approximating;\n");
@@ -70,7 +90,7 @@ str BWDLeftJoinApproximate(bat * res, ba
                BBPreleaseref(left->batCacheid);
                BBPreleaseref(right->batCacheid);
                return MAL_SUCCEED;
-       }       else    if(activateWorkInProgress && BAThvoid(left) && 
BAThvoid(right) && right->tseqbase != oid_nil){
+       }       else    if(BAThvoid(left) && BAThvoid(right) && right->tseqbase 
!= oid_nil){
                cl_mem leftColumn = batTailApproximation(left);
                if(!leftColumn) leftColumn = 
batHeadApproximation(BATmirror(left));
                cl_mem rightColumn = batTailApproximation(right);
@@ -99,6 +119,7 @@ str BWDLeftJoinApproximate(bat * res, ba
                        slot->residuals = NULL;
                        cl_int err;
                        slot->tailApproximation = 
clCreateBuffer(getCLContext(), CL_MEM_READ_WRITE, 
headCount*slot->approximationBits/8+sizeof(clTail), NULL, &err);
+                       slot->tailPositions = leftColumn;
 
                        clEnqueueWriteBuffer(getCommandQueue(), 
slot->tailApproximation, CL_TRUE, 0, sizeof(int), &headCount, 0, NULL, NULL); 
// I wonder what is faster transfering a single integer to the GPU or running a 
kernel that initializes a value
                        if(err) printf("#%s, clCreateBuffer: %s;\n", __func__, 
clError(err));
@@ -118,6 +139,10 @@ str BWDLeftJoinApproximate(bat * res, ba
                if((err = clEnqueueNDRangeKernel(getCommandQueue(), 
projectKernel, 1, (const size_t[]){0}, (const size_t[]){headCount}, (const 
size_t[]){1}, 0, NULL, NULL)))
                        THRprintf(GDKout, "#%s, clEnqueueNDRangeKernel: %s;\n", 
__func__, clError(err));
 
+               BBPkeepref((*res = result->batCacheid));
+               BBPreleaseref(left->batCacheid);
+               BBPreleaseref(right->batCacheid);
+               return MAL_SUCCEED;
 
                
        } else
@@ -125,6 +150,18 @@ str BWDLeftJoinApproximate(bat * res, ba
        return MAL_SUCCEED;
 };
 
+static inline int decompressIntValue(int i, int approximationbits, clTail* 
compressedTail, char* residuals, int residualI){
+       /* const int index = compressedHead->positions[i];                      
                 */
+       const int residualBits = 32-approximationbits;
+       const unsigned int residualMask = (1 << residualBits)-1;
+       const unsigned int residualBytes = residualBits/8;
+
+
+       const int offset = (approximationbits/8)*i; 
+       const int compressedValue = *(int*)&(compressedTail->elements[offset]); 
+       const int deCompressedValue = (compressedValue << residualBits) + 
(*(int*)&residuals[residualI*residualBytes] & residualMask);
+       return deCompressedValue;
+}
 
 str BWDLeftJoinRefine(bat * res, bat * l, bat * r, bat * approx){
        if (!approx || !*approx) {
@@ -136,13 +173,14 @@ str BWDLeftJoinRefine(bat * res, bat * l
        BAT* right = BATdescriptor(*r);
 
        BAT* approximation = BATdescriptor(*approx);
-       BAT* refinement = BATnew(approximation->htype, approximation->ttype, 
approximation->batCount);
+       const register size_t approximationBits = 
batTailApproximationBits(right);
+       BAT* refinement = BATnew(ATOMtype(left->htype), ATOMtype(right->ttype), 
left->batCount);
+               /* BATnew(approximation->htype, approximation->ttype, 
approximation->batCount); */
        BATsetcount(refinement, approximation->batCount);
        if(BAThvoid(left) && BATtvoid(left) && BAThvoid(right) && 
right->tseqbase != oid_nil){
                if(!batTailIsDecomposed(right))
                        throw (MAL, "bwd.BWDLeftJoinApproximate", "bat is not 
decomposed: %d", *r);
                const unsigned char* residuals = batTailResiduals(right);
-               const register size_t approximationBits = 
batTailApproximationBits(right);
                const register unsigned int residualBytes = 
(32-approximationBits)/8;
                int i;
                register int* outputRegion = (int*) Tloc(refinement, 
BUNfirst(refinement));
@@ -150,7 +188,50 @@ str BWDLeftJoinRefine(bat * res, bat * l
                const register size_t offset = left->tseqbase;
                for (i = 0; i < approximation->batCount; ++i) 
                        outputRegion[i] = approximationRegion[i] + 
((*((unsigned int*) (residuals + (i+offset)*residualBytes))) >> 
approximationBits);
+       } else if(BAThvoid(left) && BAThvoid(right) && right->tseqbase != 
oid_nil){
+               // we essentially perform a merge-join of the precise positions 
(left tail) and the approximate positions to get the approximate values of the 
super result set
+               // this is okay because the approximate positions are a 
superset of the precise positions and the position lists have the same 
permutation
+               // we reconstruct the precise values in the same loop
+               /* cl_mem leftColumn = batTailApproximation(left); */
+               /* if(!leftColumn) leftColumn = 
batHeadApproximation(BATmirror(left)); */
+               size_t bufferSize;
+               getPositionsColumn(batTailPositions(approximation), NULL, 
&bufferSize);
+               clHead* supersetPositionsColumn = 
getPositionsColumn(batTailPositions(approximation), malloc(bufferSize), 
&bufferSize);
+
+               getApproximateValuesColumn(batTailApproximation(approximation), 
NULL, &bufferSize);
+
+               clTail* supersetApproximateValuesColumn = 
getApproximateValuesColumn(batTailApproximation(approximation), 
malloc(bufferSize), &bufferSize);
+
+
+               int* refinementRegion = (int*) Tloc(refinement, 
BUNfirst(refinement));
+
+               oid* positionRegion = (oid*) Tloc(left, BUNfirst(left));
+               size_t refinementCount = 0;
+               const unsigned char* residuals = batTailResiduals(right);
+
+               for (int i = 0; i < supersetPositionsColumn->count; ++i) {
+                       if(supersetPositionsColumn->positions[i] == 
*positionRegion){
+                               refinementRegion[refinementCount++] = 
decompressIntValue(i, approximationBits, supersetApproximateValuesColumn, 
residuals, *positionRegion);
+                               positionRegion++;
+                       }
+                       /* while() */
+               }
+               BATsetcount(refinement, refinementCount);
+               
+               
+               free(supersetPositionsColumn);
+               free(supersetApproximateValuesColumn);
+
+               
+
+               
+               /* cl_mem leftColumn = batTailApproximation(left); */
+
+               
+               
+
        }
+
        BBPkeepref(*res = refinement->batCacheid);
        BBPreleaseref(approximation->batCacheid);
        return MAL_SUCCEED;
diff --git a/monetdb5/extras/bwd/utilities.c b/monetdb5/extras/bwd/utilities.c
--- a/monetdb5/extras/bwd/utilities.c
+++ b/monetdb5/extras/bwd/utilities.c
@@ -138,6 +138,16 @@ const cl_mem batTailApproximation(const 
                return 
getDecomposedBATSlot(rightTailApproximationProperty->v.val.ival)->tailApproximation;
 }
 
+
+const cl_mem batTailPositions(const BAT* subject){
+               PROPrec* rightTailApproximationProperty;
+               if(!(rightTailApproximationProperty = BATgetprop(subject, 
batRegistryIndex))){
+                       printf("#batTailApproximation: bat hasn't been 
decomposed;\n");
+                       return NULL;
+               }
+               return 
getDecomposedBATSlot(rightTailApproximationProperty->v.val.ival)->tailPositions;
+}
+
 const cl_mem batHeadApproximation(const BAT* subject){
                PROPrec* rightTailApproximationProperty;
                if(!(rightTailApproximationProperty = BATgetprop(subject, 
batRegistryIndex))){
diff --git a/monetdb5/extras/bwd/utilities.h b/monetdb5/extras/bwd/utilities.h
--- a/monetdb5/extras/bwd/utilities.h
+++ b/monetdb5/extras/bwd/utilities.h
@@ -6,6 +6,7 @@
 typedef struct {
        cl_mem headApproximation;
        cl_mem tailApproximation;
+       cl_mem tailPositions;
        size_t approximationBits;
        unsigned char* residuals;
 } DecomposedBATSlot;
@@ -15,16 +16,17 @@ extern const char* batRegistryIndex;
 DecomposedBATSlot* getDecomposedBATSlot(const unsigned int);
 const unsigned int getNextFreeDecomposedBATSlotIndex();
 
-const char batTailIsDecomposed(const BAT*);
-const cl_mem batHeadApproximation(const BAT*);
-const cl_mem batTailApproximation(const BAT*);
-const size_t batTailApproximationBits(const BAT*);
-const size_t batTailResidualBits(const BAT* subject);
-const unsigned char* batTailResiduals(const BAT*);
+const char batTailIsDecomposed(const BAT*) __attribute__((pure));
+const cl_mem batHeadApproximation(const BAT*) __attribute__((pure));
+const cl_mem batTailApproximation(const BAT*) __attribute__((pure));
+const cl_mem batTailPositions(const BAT*) __attribute__((pure));
+const size_t batTailApproximationBits(const BAT*) __attribute__((pure));
+const size_t batTailResidualBits(const BAT* subject) __attribute__((pure));
+const unsigned char* batTailResiduals(const BAT*) __attribute__((pure));
 
 const unsigned int decomposeIntArray(const int* subject, const size_t size, 
const size_t approximationBits);
 
-const char* clError(int);
+const char* clError(int) __attribute__((pure));
 cl_device_id getDeviceID();
 cl_context getCLContext();
 cl_command_queue getCommandQueue();
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to