#include <stdio.h>
#include <cuda.h>

extern "C" void setDeviceCUDA(int procId)
{
	int devCount=0, devSelected = -1;
	cudaGetDeviceCount(&devCount);
	devSelected = procId%devCount; 
	cudaSetDevice(devSelected);
	printf("Dev: %d selected by process %d\n",devSelected,procId);
}

extern "C" void allocRemoteBuffer(void ** buf, int size){

	cudaMalloc(buf,size);

}

extern "C" void freeRemoteBuffer(void * buf){

	cudaFree(buf);

}

extern "C" void allocPinnedMemory(void ** buf,long size){
	cudaHostAlloc(buf,size,cudaHostAllocMapped);
}

extern "C" void getBufferPointer(void * buf, void ** pointer){

	cudaHostGetDevicePointer(pointer,buf,0);

}

extern "C" void freeRemotePinnedBuffer(void * buf){
	cudaFreeHost(buf);
}

extern "C" void writeRemoteBuffer(void * buf_dev, void * buf_host, long size){

	cudaMemcpy(buf_dev, buf_host, size,cudaMemcpyHostToDevice);

}

extern "C" void readRemoteBuffer(void * buf_host, void * buf_dev, long size){

	cudaMemcpy(buf_host, buf_dev, size,cudaMemcpyDeviceToHost);

}

__global__ void printDBuf(double *buf, int dim)
{
  int i=0;
  for(i=0;i<dim;i++)
    printf("printfDBuf: %lf\n",buf[i]);
}

extern "C" void printDoubleBuffer(void *buf, int dim)
{
  printDBuf<<<1,1>>>((double *)buf,dim);
}
