James Shirley edited CUDA_Implementation.tex  about 11 years ago

Commit id: e34317ce03e4fc65f208762b8283c139b1ff8158

deletions | additions      

       

\begin{verbatim}  cudaMalloc((void**)&devCurrentDataLocus,numObs*sizeof(float));  cudaMalloc((void**)&devRInverseY,numObs*sizeof(float)); cudaMemcpy(devCurrentDataLocus,currentDataLocus.data(),numObs*sizeof(float),cudaMemcpyHostToDevice); cudaMemcpy(devRInverseY,RInverseY.data(),numObs*sizeof(float),cudaMemcpyHostToDevice); float rhsModeli; cublasSdot(cublasHandle,numObs,currentDataLocus.data(),0,RInverseY.data(),0,&rhsModeli); rhsModeli += (diagLhs(i) * oldSamplei); \end{verbatim}  You must also setup the handle that cuBLAS uses to access the GPU, this can be done like so: