Author: ghuck
Date: 2009-06-10 10:45:07 -0700 (Wed, 10 Jun 2009)
New Revision: 16907

Added:
   csplugins/trunk/soc/ghuck/gpuGraphDrawing/copyright.h
   csplugins/trunk/soc/ghuck/gpuGraphDrawing/licence.h
Modified:
   csplugins/trunk/soc/ghuck/gpuGraphDrawing/Makefile
   csplugins/trunk/soc/ghuck/gpuGraphDrawing/kdNode.cu
   csplugins/trunk/soc/ghuck/gpuGraphDrawing/simpleCUDPP.cu
Log:
Corrected Makefile
Added copyright.h
Added licence.h
Changed the use of CUDPP Library, now it "works" with the new interface 
provided in release 1.0a of CUDPP

This version compiles, but has runtime errors! 




Modified: csplugins/trunk/soc/ghuck/gpuGraphDrawing/Makefile
===================================================================
--- csplugins/trunk/soc/ghuck/gpuGraphDrawing/Makefile  2009-06-10 05:15:05 UTC 
(rev 16906)
+++ csplugins/trunk/soc/ghuck/gpuGraphDrawing/Makefile  2009-06-10 17:45:07 UTC 
(rev 16907)
@@ -1,16 +1,21 @@
-gpuGraphDrawing : simpleCUDPP.cu
-       nvcc -o gpuGraphDrawing $(INC) $(LIB) simpleCUDPP.cu  
+gpuGraphDrawing : simpleCUDPP.o
+       g++ -o gpuGraphDrawing $(INC) $(LIB) simpleCUDPP.o -lglut -lcudart 
-lcudpp64
 
+simpleCUDPP.o : simpleCUDPP.cu
+       nvcc -c $(INC) -o simpleCUDPP.o simpleCUDPP.cu
+
 clean :
-       rm simpleGraphDrawing simpleGraphDrawing.o
+       rm gpuGraphDrawing simpleCUDPP.o
 
-CUDA_SDK := /home/gerardo/NVIDIA_CUDA_SDK
+CUDA := /usr/local/cuda
 
-INC :=         -I$(CUDA_SDK)/common/inc \
-        -I$(CUDA_SDK)/common/inc/cudpp 
+CUDPP := /home/gerardo/cudpp_1.0a
 
-LIB := -L$(CUDA_SDK)/lib 
+INC :=         -I$(CUDPP)/common/inc \
+        -I$(CUDPP)/cudpp/include
 
+LIB := -L$(CUDA)/lib \
+       -L$(CUDPP)/lib
 
 
 

Added: csplugins/trunk/soc/ghuck/gpuGraphDrawing/copyright.h
===================================================================

Modified: csplugins/trunk/soc/ghuck/gpuGraphDrawing/kdNode.cu
===================================================================
--- csplugins/trunk/soc/ghuck/gpuGraphDrawing/kdNode.cu 2009-06-10 05:15:05 UTC 
(rev 16906)
+++ csplugins/trunk/soc/ghuck/gpuGraphDrawing/kdNode.cu 2009-06-10 17:45:07 UTC 
(rev 16907)
@@ -1,424 +1,426 @@
-#include "kdNode.h"
-#include "partition.cu"
-#include "cudpp.h"
-#include "complexDevice.h"
+#include "licence.h"
+#include "copyright.h"
+
+#include "kdNode.h"
+#include "partition.cu"
+#include "cudpp.h"
+#include "complexDevice.h"
 #include "common.h"
 
-//added by me!
-#include <stdio.h>
-//
 
-complexDevice * centerD;
-
-void kdNodeInitD(kdNodeInt * n,kdNodeFloat * nf,int i,unsigned int a,float 
d,float u,float l,float r){
-       int k;
-       (n+i-1)->i=i;
-       (n+i-1)->numElems=0;
-       (n+i-1)->axis=(a);
-       (nf+i-1)->up=(u);
-       (nf+i-1)->down=(d);
-       (nf+i-1)->left=(l);
-       (nf+i-1)->right=(r);
-       (nf+i-1)->center = Complex((l+r)/2,(u+d)/2); 
-       (n+i-1)->child=0;
-       (n+i-1)->next=0;
-       (nf+i-1)->radius = 
sqrt(((nf+i-1)->left-(nf+i-1)->center.r)*((nf+i-1)->left-(nf+i-1)->center.r) + 
((nf+i-1)->right-(nf+i-1)->center.i)*((nf+i-1)->right-(nf+i-1)->center.i));
-       
-       for(k = 0; k < 4; k++){
-               (nf+i-1)->Outer[k]=Complex(0,0);
-       }
-}
-
-void printD(kdNodeInt * n, kdNodeFloat * nf,int i){
-       if(!n)
-               return;
-       if(!nf)
-               return;
-       
-       if(((n+i-1)->child==0)){
-               printf("%d, %f, %f, %f %f 
%f\n",(n+i-1)->numElems,(nf+i-1)->down,(nf+i-1)->up,(nf+i-1)->left,(nf+i-1)->right,(nf+i-1)->radius);
-               printf("(");
-               for(int j = 0; j < (n+i-1)->numElems; j++)
-                       printf("%d ",(n+i-1)->graphNodes[j]);
-               printf(")\n");
-               if((n+i-1)->next)
-                       printD(n,nf,(n+i-1)->next);
-       }
-       else{
-               if((n+i-1)->child)
-                       printD(n,nf,(n+i-1)->child);
-       }
-}
-
-void insertListD(kdNodeInt * n,int x,float * NodePos,float pos, int index,int 
limit){
-       int i,j;
-
-       for( i = 0; (i < (n+x-1)->numElems) && 
((NodePos[2*(n+x-1)->graphNodes[i]+(n+x-1)->axis]) <= pos); i++);
-       for(j = (n+x-1)->numElems; j > i; j--){
-               (n+x-1)->graphNodes[j] = (n+x-1)->graphNodes[j-1];
-       }
-       (n+x-1)->graphNodes[i] = index; 
-}
-
-void updateElemsD(kdNodeInt * n, int i, int* start, int* end){
-       int j;
-       for(j=0;start <=end; start ++,j++){
-               (n+i-1)->graphNodes[j] = *start;
-       }
-       (n+i-1)->numElems = j;
-}
-
-
-void InsertElemD(kdNodeInt * n, kdNodeFloat * nf,int i,float * NodePos,float 
x,float y, int index, int limit, int init){
-       static int nIntAvail=2;
-       if(init)
-               nIntAvail=2;
-       int k;
-       float pos;
-
-       fcomplex zi = Complex((x),(y));
-       fcomplex z_v_minus_z_0_over_k = Csub(zi,((nf+i-1)->center));
-       for(k = 0; k < 4; k++){
-               fcomplex temp = Complex(-1/(float)(k+1),0);
-               (nf+i-1)->Outer[k] = Cadd((nf+i-1)->Outer[k], 
Cmul(z_v_minus_z_0_over_k,temp));
-               z_v_minus_z_0_over_k = 
Cmul(z_v_minus_z_0_over_k,Csub(zi,(nf+i-1)->center));
-               //Outer[k]+=  ((float(-1))*z_v_minus_z_0_over_k)/float(k+1);
-               //z_v_minus_z_0_over_k *= zi - (nf+i-1)->center;
-       }
-
-       if((n+i-1)->axis)
-               pos = y;
-       else
-               pos = x;
-
-       if((n+i-1)->child==0){
-               insertListD((n),i,NodePos,pos,index,limit);
-               if((n+i-1)->numElems >= limit){
-                       float u1,d1,l1,r1,u2,d2,l2,r2;
-                       int numElems = (n+i-1)->numElems+1;
-                       int splitting_node = 
(n+i-1)->graphNodes[((numElems)/2)-1];
-                       (nf+i-1)->split = 
NodePos[2*splitting_node+(n+i-1)->axis];
-                       if((n+i-1)->axis){
-                               l1 =(nf+i-1)->left;  l2=(nf+i-1)->left;
-                               r1 =(nf+i-1)->right; r2 = (nf+i-1)->right;
-                               u1 =(nf+i-1)->split; u2 = (nf+i-1)->up;
-                               d1 =(nf+i-1)->down; d2 = (nf+i-1)->split;
-                       }
-                       else{
-                               l1 =(nf+i-1)->left; l2=(nf+i-1)->split;
-                               r1 =(nf+i-1)->split; r2 = (nf+i-1)->right;
-                               u1 = (nf+i-1)->up; u2 = (nf+i-1)->up;
-                               d1 = (nf+i-1)->down; d2 = (nf+i-1)->down;
-                       }
-                       (n+i-1)->child = nIntAvail; 
kdNodeInitD(n,nf,nIntAvail,1-(n+i-1)->axis,d1,u1,l1,r1);
-                       nIntAvail++;
-                       (n+nIntAvail-2)->next = nIntAvail; 
kdNodeInitD(n,nf,nIntAvail,1-(n+i-1)->axis,d2,u2,l2,r2);
-                       
updateElemsD(n,nIntAvail-1,&((n+i-1)->graphNodes[0]),&((n+i-1)->graphNodes[(numElems/2)-1]));
-                       
updateElemsD(n,nIntAvail,&((n+i-1)->graphNodes[(numElems/2)]),&((n+i-1)->graphNodes[limit]));
-                       (n+nIntAvail-1)->next = (n+i-1)->next;
-                       nIntAvail++;
-               }
-       }
-       else {
-               int c = (n+i-1)->child;
-               if(pos <= (nf+i-1)->split)
-                       InsertElemD((n),nf,c,NodePos,x,y,index,limit,0);
-               else
-                       
InsertElemD((n),nf,(n+c-1)->next,NodePos,x,y,index,limit,0);
-       }
-       (n+i-1)->numElems++;
-}
-
-void swapD(float3 * greater,float3 * less){
-       float3 temp = *greater;
-       *greater = *less;
-       *less    = temp;
-}
-
-float3 * partition(float3 * a, float3 * b, float pivot,int axis){
-       
-       int i = 0;
-       int n = b-a+1;
-       if(axis){
-               while((i<n) && ((b-i)->y > pivot))
-                       i++;
-               float3 * greater = b-i;
-               i = 0;
-               while((i<n) && ((a+i)->y <= pivot))
-                       i++;
-               float3 * less = a+i; 
-               for(; less < greater; ){
-                               swapD(greater,less);
-                               greater--;
-                               less++;
-                               while(less->y <= pivot)
-                                       less++;
-                               while(greater->y > pivot)
-                                       greater--;
-               }
-               return greater;
-       }
-       else{
-               while((i<n) && ((b-i)->x > pivot))
-                       i++;
-               float3 * greater = b-i;
-               i = 0;
-               while((i<n) && ((a+i)->x <= pivot))
-                       i++;
-               float3 * less = a+i; 
-               for(; less < greater; ){
-                               swapD(greater,less);
-                               greater--;
-                               less++;
-                               while(less->x <= pivot)
-                                       less++;
-                               while(greater->x > pivot)
-                                       greater--;
-               }
-               return greater;
-       }
-}
-
-float3 * medianD(float3 * a, float3 * b, int axis,int k){
-       
-       if(!(a-b))
-               return a;
-       
-       float pivot;
-       if(axis){
-               pivot = (a)->y;
-       }
-       else{
-               pivot = (a)->x;
-       }
-
-       float3 * p = partition(a,b,pivot,axis);
-       int i = int(p-a)+1;
-       if(i==k)
-               return p;
-       else if(i < k)
-               return medianD(p+1,b,axis,k-i);
-       else
-               return medianD(a,p-1,axis,k);
-}
-
-float3 * radixPartitionD(float3 * a, float3 * b, unsigned int bit, int axis){
-       // y axis
-       unsigned int o = 1;
-       o = o<<(bit);
-       int n = (int)(b-a)+1;
-       if(axis){
-               int i=0;
-               
-               while((i<n) && (((unsigned int)(b-i)->y) & o) )
-                       i++;
-               float3 * greater = b-i;
-               if(i==n){
-                       return greater;
-               }
-               i = 0;
-               while((i<n) && !(((unsigned int)(a+i)->y)&o))
-                       i++;
-               float3 * less = a+i; 
-               if(i==n){
-                       return b;
-                       //less--;
-               }
-               for(; less < greater; ){
-                               swapD(greater,less);
-                               greater--;
-                               less++;
-                               while(!((unsigned int)(less->y)&o))
-                                       less++;
-                               while(((unsigned int)(greater->y)&o))
-                                       greater--;
-               }
-               return greater;
-       }
-       // x axis
-       else{
-               int i=0;
-               while((i<n) && (((unsigned int)(b-i)->x)&o) )
-                       i++;
-               float3 * greater = b-i;
-               if(i==n)
-                       return greater;
-               i = 0;
-               while((i<n) && !(((unsigned int)(a+i)->x)&o))
-                       i++;
-               float3 * less = a+i;
-               if(i==n)
-                       return b;
-               for(; less < greater; ){
-                               swapD(greater,less);
-                               greater--;
-                               less++;
-                               while(!((unsigned int)(less->x)&o))
-                                       less++;
-                               while(((unsigned int)(greater->x)&o))
-                                       greater--;
-               }
-               return greater;
-       }
-}
-
-float3 * radixMedianD(float3 * a, float3 * b, int axis,int k, unsigned int bit,
-               unsigned int * data_out, unsigned int * d_temp_addr_uint, 
float3 * d_out,CUDPPScanConfig * config,unsigned int * nD){
-
-       float3 * s;
-               
-       if(a==b)
-               return a;
-       else{
-               int n = b - a + 1; 
-               dim3  threads,blocks;
-               unsigned m_chunks = n / maxThreadsThisBlock;
-               unsigned m_leftovers = n % maxThreadsThisBlock;
-           
-               if ((m_chunks == 0) && (m_leftovers > 0))
-               {
-                       // can't even fill a block
-                       blocks = dim3(1, 1, 1); 
-                       threads = dim3((m_leftovers), 1, 1);
-               } 
-               else 
-               {
-                       // normal case
-                       if (m_leftovers > 0)
-                       {
-                               // not aligned, add an additional block for 
leftovers
-                               blocks = dim3(m_chunks + 1, 1, 1);
-                       }
-                       else
-                       {
-                               // aligned on block boundary
-                               blocks = dim3(m_chunks, 1, 1);
-                       }
-                       threads = dim3(maxThreadsThisBlock , 1, 1);
-               }
-               
-               radixGlobalSetup_kernel<<<blocks, threads>>>(data_out, a, 
bit,axis);
-               CUT_CHECK_ERROR("Kernel execution failed");
-               
-               cudppScan(d_temp_addr_uint, data_out, n, config);
-               CUT_CHECK_ERROR("Kernel execution failed");
-       
-    
-        radixGlobalSplit_kernel<<<blocks, threads>>>(d_out, 
d_temp_addr_uint,a, bit,n- 1,axis,nD);
-        
-               int ns;
-               cudaMemcpy(&ns,nD,sizeof(unsigned int),cudaMemcpyDeviceToHost);
-               s = a + ns - 1 ;
-               if(bit==0)
-               return s;       
-               if( ns > k )
-                       return 
radixMedianD(a,s,axis,k,bit>>1,data_out,d_temp_addr_uint,d_out,config,nD);
-               else
-                       return 
radixMedianD(s+1,b,axis,k-ns,bit>>1,data_out,d_temp_addr_uint,d_out,config,nD);
-       }
-}
-
-void constructD(float3 * NodePos1, float3 * NodePos2, kdNodeInt * 
n,kdNodeFloat * nf,
-                          int i,int a,float d,float u,float l,float r, int 
limit,
-                          unsigned int * data_out, unsigned int * 
d_temp_addr_uint, float3 * d_out,CUDPPScanConfig * config,unsigned int * 
nD,complexDevice * OuterD){
-       
-       static int avail = 2;
-       if(i==1)
-               avail = 2;
-       (n+i-1)->numElems = int(NodePos2-NodePos1)+1;
-       dim3 blocks, threads;
-       
-       unsigned m_chunkSize = maxThreadsThisBlock;
-    unsigned m_chunks = (n+i-1)->numElems / m_chunkSize;
-    unsigned m_leftovers = (n+i-1)->numElems % m_chunkSize;
-    
-    if ((m_chunks == 0) && (m_leftovers > 0))
-    {
-        // can't even fill a block
-        blocks = dim3(1, 1, 1); 
-        threads = dim3((m_chunkSize), 1, 1);
-    } 
-    else 
-    {
-        // normal case
-        if (m_leftovers > 0)
-        {
-            // not aligned, add an additional block for leftovers
-            blocks = dim3(m_chunks + 1, 1, 1);
-        }
-        else
-        {
-            // aligned on block boundary
-            blocks = dim3(m_chunks, 1, 1);
-        }
-        threads = dim3(m_chunkSize, 1, 1);
-    }
-       
-    fcomplex * Outer = (fcomplex *) malloc( 4*(m_chunks+1)*sizeof(fcomplex));
-    
-       // check if kernel execution generated and error
-    CUT_CHECK_ERROR("Kernel execution failed");
-    
-       complexDevice center; center.x = ((nf+i-1)->center).r; center.y = 
((nf+i-1)->center).i;
-       cudaMalloc((void**)&OuterD, 4*(m_chunks+1)*sizeof(fcomplex));
-       calcOuter<<<blocks, 
threads,4*sizeof(fcomplex)*threads.x>>>(NodePos1,OuterD,(n+i-1)->numElems,center);
-       CUT_CHECK_ERROR("Kernel execution failed");
-       
-       cudaMemcpy(Outer, OuterD, 4*(m_chunks+1)*sizeof(fcomplex), 
cudaMemcpyDeviceToHost);
-       for(int k = 0; k < m_chunks+1; k+=4)
-               for(int j = 0; j < 4; j++)
-                       (nf+i-1)->Outer[j]= Cadd(Outer[k+j],(nf+i-1)->Outer[j]);
-       
-       cudaFree(OuterD);
-       free(Outer);
-       
-       // check if kernel execution generated and error
-    CUT_CHECK_ERROR("Kernel execution failed");
-       
-       if((n+i-1)->numElems <= limit){
-               float3 *start = 
(float3*)malloc(((n+i-1)->numElems)*sizeof(float3));
-               cudaMemcpy(start, NodePos1, ((n+i-1)->numElems)*sizeof(float3), 
cudaMemcpyDeviceToHost);
-               for(int j = 0; j < (n+i-1)->numElems; j++,start++)
-               {
-                       (n+i-1)->graphNodes[j]= (int)start->z;
-               }       
-               return;
-       }
-       
-       //CUT_CHECK_ERROR("Kernel execution failed");
-       float l1,l2,r1,r2,u1,u2,d1,d2;
-               
-       unsigned int bit = 1<<(sizeof(float)*8-1);
-       float3 * mid = 
radixMedianD(NodePos1,NodePos2,a,(int)((NodePos2-NodePos1)/2)+1,bit,data_out,d_temp_addr_uint,d_out,config,nD);
-       
-       float3 midElem;
-       cudaError_t err = cudaMemcpy(&(midElem), mid, 
sizeof(float3),cudaMemcpyDeviceToHost);   
-       CUT_CHECK_ERROR("Kernel execution failed");     
-       if(a){
-               (nf+i-1)->split = midElem.y;
-               l1 =(nf+i-1)->left;  l2=(nf+i-1)->left;
-               r1 =(nf+i-1)->right; r2 = (nf+i-1)->right;
-               u1 =(nf+i-1)->split; u2 = (nf+i-1)->up;
-               d1 =(nf+i-1)->down; d2 = (nf+i-1)->split;
-       }
-       else{
-               (nf+i-1)->split = midElem.x;
-               l1 =(nf+i-1)->left; l2=(nf+i-1)->split;
-               r1 =(nf+i-1)->split; r2 = (nf+i-1)->right;
-               u1 = (nf+i-1)->up; u2 = (nf+i-1)->up;
-               d1 = (nf+i-1)->down; d2 = (nf+i-1)->down;
-       }
-       
-       int c = avail++; 
-       int c2 = avail++;
-
-       kdNodeInitD(n,nf,c,1-a,d1,u1,l1,r1);
-       kdNodeInitD(n,nf,c2,1-a,d2,u2,l2,r2);
-
-       (n+i-1)->child = c;
-       (n+c-1)->next = c2;
-       (n+c2-1)->next = (n+i-1)->next;
-
-       constructD(NodePos1,mid-1,n,nf,c,1-a,d1,u1,l1,r1,limit,data_out, 
d_temp_addr_uint, d_out,config,nD,OuterD);
-       constructD(mid,NodePos2 ,n,nf,c2,1-a,d2,u2,l2,r2,limit,data_out, 
d_temp_addr_uint, d_out,config,nD,OuterD);      
+#include <stdio.h>
+
+
+complexDevice * centerD;
+
+void kdNodeInitD(kdNodeInt * n,kdNodeFloat * nf,int i,unsigned int a,float 
d,float u,float l,float r){
+  int k;
+  (n+i-1)->i=i;
+  (n+i-1)->numElems=0;
+  (n+i-1)->axis=(a);
+  (nf+i-1)->up=(u);
+  (nf+i-1)->down=(d);
+  (nf+i-1)->left=(l);
+  (nf+i-1)->right=(r);
+  (nf+i-1)->center = Complex((l+r)/2,(u+d)/2); 
+  (n+i-1)->child=0;
+  (n+i-1)->next=0;
+  (nf+i-1)->radius = 
sqrt(((nf+i-1)->left-(nf+i-1)->center.r)*((nf+i-1)->left-(nf+i-1)->center.r) + 
((nf+i-1)->right-(nf+i-1)->center.i)*((nf+i-1)->right-(nf+i-1)->center.i));
+       
+  for(k = 0; k < 4; k++){
+    (nf+i-1)->Outer[k]=Complex(0,0);
+  }
 }
+
+void printD(kdNodeInt * n, kdNodeFloat * nf,int i){
+  if(!n)
+    return;
+  if(!nf)
+    return;
+       
+  if(((n+i-1)->child==0)){
+    printf("%d, %f, %f, %f %f 
%f\n",(n+i-1)->numElems,(nf+i-1)->down,(nf+i-1)->up,(nf+i-1)->left,(nf+i-1)->right,(nf+i-1)->radius);
+    printf("(");
+    for(int j = 0; j < (n+i-1)->numElems; j++)
+      printf("%d ",(n+i-1)->graphNodes[j]);
+    printf(")\n");
+    if((n+i-1)->next)
+      printD(n,nf,(n+i-1)->next);
+  }
+  else{
+    if((n+i-1)->child)
+      printD(n,nf,(n+i-1)->child);
+  }
+}
+
+void insertListD(kdNodeInt * n,int x,float * NodePos,float pos, int index,int 
limit){
+  int i,j;
+  
+  for( i = 0; (i < (n+x-1)->numElems) && 
((NodePos[2*(n+x-1)->graphNodes[i]+(n+x-1)->axis]) <= pos); i++)
+    ;
+  for(j = (n+x-1)->numElems; j > i; j--){
+    (n+x-1)->graphNodes[j] = (n+x-1)->graphNodes[j-1];
+  }
+  (n+x-1)->graphNodes[i] = index; 
+}
+
+void updateElemsD(kdNodeInt * n, int i, int* start, int* end){
+  int j;
+  for(j=0;start <=end; start ++,j++){
+    (n+i-1)->graphNodes[j] = *start;
+  }
+  (n+i-1)->numElems = j;
+}
+
+
+void InsertElemD(kdNodeInt * n, kdNodeFloat * nf,int i,float * NodePos,float 
x,float y, int index, int limit, int init){
+  static int nIntAvail=2;
+  if(init)
+    nIntAvail=2;
+  int k;
+  float pos;
+  
+  fcomplex zi = Complex((x),(y));
+  fcomplex z_v_minus_z_0_over_k = Csub(zi,((nf+i-1)->center));
+  for(k = 0; k < 4; k++){
+    fcomplex temp = Complex(-1/(float)(k+1),0);
+    (nf+i-1)->Outer[k] = Cadd((nf+i-1)->Outer[k], 
Cmul(z_v_minus_z_0_over_k,temp));
+    z_v_minus_z_0_over_k = 
Cmul(z_v_minus_z_0_over_k,Csub(zi,(nf+i-1)->center));
+    //Outer[k]+=  ((float(-1))*z_v_minus_z_0_over_k)/float(k+1);
+    //z_v_minus_z_0_over_k *= zi - (nf+i-1)->center;
+  }
+  
+  if((n+i-1)->axis)
+    pos = y;
+  else
+    pos = x;
+  
+  if((n+i-1)->child==0){
+    insertListD((n),i,NodePos,pos,index,limit);
+    if((n+i-1)->numElems >= limit){
+      float u1,d1,l1,r1,u2,d2,l2,r2;
+      int numElems = (n+i-1)->numElems+1;
+      int splitting_node = (n+i-1)->graphNodes[((numElems)/2)-1];
+      (nf+i-1)->split = NodePos[2*splitting_node+(n+i-1)->axis];
+      if((n+i-1)->axis){
+       l1 =(nf+i-1)->left;  l2=(nf+i-1)->left;
+       r1 =(nf+i-1)->right; r2 = (nf+i-1)->right;
+       u1 =(nf+i-1)->split; u2 = (nf+i-1)->up;
+       d1 =(nf+i-1)->down; d2 = (nf+i-1)->split;
+      }
+      else{
+       l1 =(nf+i-1)->left; l2=(nf+i-1)->split;
+       r1 =(nf+i-1)->split; r2 = (nf+i-1)->right;
+       u1 = (nf+i-1)->up; u2 = (nf+i-1)->up;
+       d1 = (nf+i-1)->down; d2 = (nf+i-1)->down;
+      }
+      (n+i-1)->child = nIntAvail; 
kdNodeInitD(n,nf,nIntAvail,1-(n+i-1)->axis,d1,u1,l1,r1);
+      nIntAvail++;
+      (n+nIntAvail-2)->next = nIntAvail; 
kdNodeInitD(n,nf,nIntAvail,1-(n+i-1)->axis,d2,u2,l2,r2);
+      
updateElemsD(n,nIntAvail-1,&((n+i-1)->graphNodes[0]),&((n+i-1)->graphNodes[(numElems/2)-1]));
+      
updateElemsD(n,nIntAvail,&((n+i-1)->graphNodes[(numElems/2)]),&((n+i-1)->graphNodes[limit]));
+      (n+nIntAvail-1)->next = (n+i-1)->next;
+      nIntAvail++;
+    }
+  }
+  else {
+    int c = (n+i-1)->child;
+    if(pos <= (nf+i-1)->split)
+      InsertElemD((n),nf,c,NodePos,x,y,index,limit,0);
+    else
+      InsertElemD((n),nf,(n+c-1)->next,NodePos,x,y,index,limit,0);
+  }
+  (n+i-1)->numElems++;
+}
+
+void swapD(float3 * greater,float3 * less){
+  float3 temp = *greater;
+  *greater = *less;
+  *less    = temp;
+}
+
+float3 * partition(float3 * a, float3 * b, float pivot,int axis){
+  
+  int i = 0;
+  int n = b-a+1;
+  if(axis){
+    while((i<n) && ((b-i)->y > pivot))
+      i++;
+    float3 * greater = b-i;
+    i = 0;
+    while((i<n) && ((a+i)->y <= pivot))
+      i++;
+    float3 * less = a+i; 
+    for(; less < greater; ){
+      swapD(greater,less);
+      greater--;
+      less++;
+      while(less->y <= pivot)
+       less++;
+      while(greater->y > pivot)
+       greater--;
+    }
+    return greater;
+  }
+  else{
+    while((i<n) && ((b-i)->x > pivot))
+      i++;
+    float3 * greater = b-i;
+    i = 0;
+    while((i<n) && ((a+i)->x <= pivot))
+      i++;
+    float3 * less = a+i; 
+    for(; less < greater; ){
+      swapD(greater,less);
+      greater--;
+      less++;
+      while(less->x <= pivot)
+       less++;
+      while(greater->x > pivot)
+       greater--;
+    }
+    return greater;
+  }
+}
+
+float3 * medianD(float3 * a, float3 * b, int axis,int k){
+  
+  if(!(a-b))
+    return a;
+  
+  float pivot;
+  if(axis){
+    pivot = (a)->y;
+  }
+  else{
+    pivot = (a)->x;
+  }
+  
+  float3 * p = partition(a,b,pivot,axis);
+  int i = int(p-a)+1;
+  if(i==k)
+    return p;
+  else if(i < k)
+    return medianD(p+1,b,axis,k-i);
+  else
+    return medianD(a,p-1,axis,k);
+}
+
+float3 * radixPartitionD(float3 * a, float3 * b, unsigned int bit, int axis){
+  // y axis
+  unsigned int o = 1;
+  o = o<<(bit);
+  int n = (int)(b-a)+1;
+  if(axis){
+    int i=0;
+    
+    while((i<n) && (((unsigned int)(b-i)->y) & o) )
+      i++;
+    float3 * greater = b-i;
+    if(i==n){
+      return greater;
+    }
+    i = 0;
+    while((i<n) && !(((unsigned int)(a+i)->y)&o))
+      i++;
+    float3 * less = a+i; 
+    if(i==n){
+      return b;
+      //less--;
+    }
+    for(; less < greater; ){
+      swapD(greater,less);
+      greater--;
+      less++;
+      while(!((unsigned int)(less->y)&o))
+       less++;
+      while(((unsigned int)(greater->y)&o))
+       greater--;
+    }
+    return greater;
+  }
+  // x axis
+  else{
+    int i=0;
+    while((i<n) && (((unsigned int)(b-i)->x)&o) )
+      i++;
+    float3 * greater = b-i;
+    if(i==n)
+      return greater;
+    i = 0;
+    while((i<n) && !(((unsigned int)(a+i)->x)&o))
+      i++;
+    float3 * less = a+i;
+    if(i==n)
+      return b;
+    for( ; less < greater ; ){
+      swapD(greater,less);
+      greater--;
+      less++;
+      while(!((unsigned int)(less->x)&o))
+       less++;
+      while(((unsigned int)(greater->x)&o))
+       greater--;
+    }
+    return greater;
+  }
+}
+
+float3 * radixMedianD(float3* a, float3* b, int axis,int k, unsigned int 
bit,unsigned int* data_out, unsigned int* d_temp_addr_uint, 
+                     float3* d_out,CUDPPHandle planHandle, unsigned int* nD)
+{
+  float3 * s;
+  
+  if(a==b)
+    return a;
+  else{
+    int n = b - a + 1; 
+    dim3  threads,blocks;
+    unsigned m_chunks = n / maxThreadsThisBlock;
+    unsigned m_leftovers = n % maxThreadsThisBlock;
+    
+    if ((m_chunks == 0) && (m_leftovers > 0)){
+      // can't even fill a block
+      blocks = dim3(1, 1, 1); 
+      threads = dim3((m_leftovers), 1, 1);
+    } 
+    else {
+      // normal case
+      if (m_leftovers > 0){
+       // not aligned, add an additional block for leftovers
+       blocks = dim3(m_chunks + 1, 1, 1);
+      }
+      else{
+       // aligned on block boundary
+       blocks = dim3(m_chunks, 1, 1);
+      }
+      threads = dim3(maxThreadsThisBlock , 1, 1);
+    }
+    
+    radixGlobalSetup_kernel<<<blocks, threads>>>(data_out, a, bit,axis);
+    CUT_CHECK_ERROR("Kernel execution failed");
+
+    // Call to CUDPP Scan function
+    //cudppScan(d_temp_addr_uint, data_out, n, config);                        
        Deprecated
+    CUDPPResult cudpp_result = cudppScan (planHandle, (void*)d_temp_addr_uint, 
(void*)data_out, n);
+
+    CUT_CHECK_ERROR("Kernel execution failed");
+    
+    if (cudpp_result != CUDPP_SUCCESS)
+      printf ("Error while performing CUDPP Scan\n");
+
+    radixGlobalSplit_kernel<<<blocks, threads>>>(d_out, d_temp_addr_uint,a, 
bit,n- 1,axis,nD);
+    
+    int ns;
+    cudaMemcpy(&ns,nD,sizeof(unsigned int),cudaMemcpyDeviceToHost);
+    s = a + ns - 1 ;
+    if(bit==0)
+      return s;        
+    if( ns > k )
+      return 
radixMedianD(a,s,axis,k,bit>>1,data_out,d_temp_addr_uint,d_out,planHandle,nD);
+    else
+      return 
radixMedianD(s+1,b,axis,k-ns,bit>>1,data_out,d_temp_addr_uint,d_out,planHandle,nD);
+  }
+}
+
+void constructD(float3 * NodePos1, float3 * NodePos2, kdNodeInt * 
n,kdNodeFloat * nf,
+               int i,int a,float d,float u,float l,float r, int limit,
+               unsigned int * data_out, unsigned int * d_temp_addr_uint, 
float3 * d_out,CUDPPHandle planHandle, unsigned int * nD,
+               complexDevice * OuterD)
+{
+  static int avail = 2;
+  if(i==1)
+    avail = 2;
+  (n+i-1)->numElems = int(NodePos2-NodePos1)+1;
+  dim3 blocks, threads;
+  
+  unsigned m_chunkSize = maxThreadsThisBlock;
+  unsigned m_chunks = (n+i-1)->numElems / m_chunkSize;
+  unsigned m_leftovers = (n+i-1)->numElems % m_chunkSize;
+  
+  if ((m_chunks == 0) && (m_leftovers > 0)){
+    // can't even fill a block
+    blocks = dim3(1, 1, 1); 
+    threads = dim3((m_chunkSize), 1, 1);
+  } 
+  else{
+    // normal case
+    if (m_leftovers > 0){
+      // not aligned, add an additional block for leftovers
+      blocks = dim3(m_chunks + 1, 1, 1);
+    }
+    else{
+      // aligned on block boundary
+      blocks = dim3(m_chunks, 1, 1);
+    }
+    threads = dim3(m_chunkSize, 1, 1);
+  }
+  
+  fcomplex * Outer = (fcomplex *) malloc( 4*(m_chunks+1)*sizeof(fcomplex));
+  
+  // check if kernel execution generated and error
+  CUT_CHECK_ERROR("Kernel execution failed");
+  
+  complexDevice center; center.x = ((nf+i-1)->center).r; center.y = 
((nf+i-1)->center).i;
+  cudaMalloc((void**)&OuterD, 4*(m_chunks+1)*sizeof(fcomplex));
+  calcOuter<<<blocks, 
threads,4*sizeof(fcomplex)*threads.x>>>(NodePos1,OuterD,(n+i-1)->numElems,center);
+  CUT_CHECK_ERROR("Kernel execution failed");
+       
+  cudaMemcpy(Outer, OuterD, 4*(m_chunks+1)*sizeof(fcomplex), 
cudaMemcpyDeviceToHost);
+  for(int k = 0; k < m_chunks+1; k+=4)
+    for(int j = 0; j < 4; j++)
+      (nf+i-1)->Outer[j]= Cadd(Outer[k+j],(nf+i-1)->Outer[j]);
+  
+  cudaFree(OuterD);
+  free(Outer);
+  
+  // check if kernel execution generated and error
+  CUT_CHECK_ERROR("Kernel execution failed");
+       
+  if((n+i-1)->numElems <= limit){
+    float3 *start = (float3*)malloc(((n+i-1)->numElems)*sizeof(float3));
+    cudaMemcpy(start, NodePos1, ((n+i-1)->numElems)*sizeof(float3), 
cudaMemcpyDeviceToHost);
+    for(int j = 0; j < (n+i-1)->numElems; j++,start++){
+      (n+i-1)->graphNodes[j]= (int)start->z;
+    }  
+    return;
+  }
+       
+  //CUT_CHECK_ERROR("Kernel execution failed");
+  float l1,l2,r1,r2,u1,u2,d1,d2;
+               
+  unsigned int bit = 1<<(sizeof(float)*8-1);
+
+  float3 * mid = 
radixMedianD(NodePos1,NodePos2,a,(int)((NodePos2-NodePos1)/2)+1,bit,data_out,d_temp_addr_uint,d_out,planHandle,nD);
+  
+  float3 midElem;
+  cudaError_t err = cudaMemcpy(&(midElem), mid, 
sizeof(float3),cudaMemcpyDeviceToHost);        
+  CUT_CHECK_ERROR("Kernel execution failed");  
+  if(a){
+    (nf+i-1)->split = midElem.y;
+    l1 =(nf+i-1)->left;  l2=(nf+i-1)->left;
+    r1 =(nf+i-1)->right; r2 = (nf+i-1)->right;
+    u1 =(nf+i-1)->split; u2 = (nf+i-1)->up;
+    d1 =(nf+i-1)->down; d2 = (nf+i-1)->split;
+  }
+  else{
+    (nf+i-1)->split = midElem.x;
+    l1 =(nf+i-1)->left; l2=(nf+i-1)->split;
+    r1 =(nf+i-1)->split; r2 = (nf+i-1)->right;
+    u1 = (nf+i-1)->up; u2 = (nf+i-1)->up;
+    d1 = (nf+i-1)->down; d2 = (nf+i-1)->down;
+  }
+  
+  int c = avail++; 
+  int c2 = avail++;
+  
+  kdNodeInitD(n,nf,c,1-a,d1,u1,l1,r1);
+  kdNodeInitD(n,nf,c2,1-a,d2,u2,l2,r2);
+  
+  (n+i-1)->child = c;
+  (n+c-1)->next = c2;
+  (n+c2-1)->next = (n+i-1)->next;
+  
+  constructD(NodePos1,mid-1,n,nf,c,1-a,d1,u1,l1,r1,limit,data_out, 
d_temp_addr_uint, d_out,planHandle,nD,OuterD);
+  constructD(mid,NodePos2 ,n,nf,c2,1-a,d2,u2,l2,r2,limit,data_out, 
d_temp_addr_uint, d_out,planHandle,nD,OuterD);       
+}

Added: csplugins/trunk/soc/ghuck/gpuGraphDrawing/licence.h
===================================================================

Modified: csplugins/trunk/soc/ghuck/gpuGraphDrawing/simpleCUDPP.cu
===================================================================
--- csplugins/trunk/soc/ghuck/gpuGraphDrawing/simpleCUDPP.cu    2009-06-10 
05:15:05 UTC (rev 16906)
+++ csplugins/trunk/soc/ghuck/gpuGraphDrawing/simpleCUDPP.cu    2009-06-10 
17:45:07 UTC (rev 16907)
@@ -1,3 +1,6 @@
+#include "licence.h"
+#include "copyright.h"
+
 // includes, system
 #include <stdlib.h>
 #include <stdio.h>
@@ -43,6 +46,7 @@
 int            levelConvergence;
 float3 *       a;
 //CUDPPScanConfig config;                          Deprecated
+CUDPPConfiguration config; // This struct replaces the old CUDPPScanConfig in 
the release 1.0a of CUDPP
 unsigned int * data_out;
 unsigned int * d_temp_addr_uint; 
 float3 *       d_out;
@@ -68,8 +72,11 @@
   
   //config.maxNumElements = g->numVertices;        Deprecated
   //cudppInitializeScan(&config);                  Deprecated
+      
+  // Configure CUDPP Scan Plan
+  CUDPPHandle planHandle;
+  cudppPlan (&planHandle, config, g->numVertices, 1, 0); // rows = 1, rowPitch 
= 0
   
-  
   int sizeInt = g->numVertices*sizeof(kdNodeInt);
   int sizeFloat = g->numVertices*sizeof(kdNodeFloat);
   
@@ -81,9 +88,9 @@
       kdNodeInit(rootInt,rootFloat,1,0,0,SCREEN_W,0,SCREEN_H);
       construct(NodeTemp, NodeTemp+g->numVertices-1, rootInt,rootFloat, 
1,0,0,SCREEN_W,0,SCREEN_H,3);
     }
-    else{ //GPU
+    else{ //GPU   
       kdNodeInitD(rootInt,rootFloat,1,0,0,SCREEN_W,0,SCREEN_H);
-      constructD(a, a+g->numVertices-1, rootInt,rootFloat, 
1,0,0,SCREEN_W,0,SCREEN_H,3,data_out,d_temp_addr_uint, d_out,&config,nD,OuterD 
);
+      constructD(a, a+g->numVertices-1, rootInt,rootFloat, 
1,0,0,SCREEN_W,0,SCREEN_H,3,data_out,d_temp_addr_uint, 
d_out,planHandle,nD,OuterD );
     }
   }
        
@@ -121,6 +128,9 @@
 
   // Decrease the temperature of graph g
   cool(g); 
+
+  // Destroy CUDPP Scan Plan
+  cudppDestroyPlan(planHandle);
 }
 
 
@@ -357,12 +367,21 @@
   cudaMalloc((void**)&AdjMatValsD, (g.numEdges)*sizeof(int));
   cudaMalloc((void**)&edgeLenD, (g.numEdges)*sizeof(int));
   
+  // Initialize parameters for config (see CUDPP in cudpp.h)
+
+  config.algorithm = CUDPP_SCAN;
+  config.op        = CUDPP_ADD;
+  config.datatype  = CUDPP_INT;
+  config.options   = CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE; 
+
   //config.direction      = CUDPP_SCAN_FORWARD;                  Deprecated
   //config.exclusivity    = CUDPP_SCAN_EXCLUSIVE;                Deprecated
-  //config.op          = CUDPP_ADD;                             Deprecated
+  //config.op            = CUDPP_ADD;                           Deprecated
   //config.datatype       = CUDPP_INT;                           Deprecated
-  //config.maxNumRows  = 1;                                     Deprecated
+  //config.maxNumRows    = 1;                                   Deprecated
   //config.rowPitch       = 0;                                   Deprecated
+
+
   
   cudaMalloc((void**)&data_out,sizeof(unsigned int)* g.numVertices);
   cudaMalloc((void**)&d_temp_addr_uint,sizeof(unsigned int)* g.numVertices);


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"cytoscape-cvs" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to 
[email protected]
For more options, visit this group at 
http://groups.google.com/group/cytoscape-cvs?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to