diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e69de29 diff --git a/.gitignore b/.gitignore index a75452f..f5d15ff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .DS_Store .*~ __MACOSX +#* diff --git a/bsglmm/covarGPU.cu b/bsglmm/covarGPU.cu index 8c46122..9bc4a44 100644 --- a/bsglmm/covarGPU.cu +++ b/bsglmm/covarGPU.cu @@ -213,7 +213,7 @@ __global__ void Spat_Coef_for_Spat_Covar(float *covar,float *alpha,float *Z,floa float var[20*20]; float tmp=0; float tmp2[20]; -// float alph[20]; + float alph[20]; int voxel=0,IDX=0,IDXSC=0; int current_covar=0; @@ -302,8 +302,8 @@ __global__ void Spat_Coef_Probit(float *covar,float *covF,float *fix,float *alph float var[20*20]; float tmp=0; float tmp2[20]; -// float alph[20]; -// float fixed[20]; + float alph[20]; + float fixed[20]; int voxel=0,IDX=0,IDXSC=0; float bwhtmat=0; @@ -407,7 +407,7 @@ __global__ void Spat_Coef(float *covar,float *covF,float *fix,float *alpha,float float var[20*20]; float tmp=0; float tmp2[20]; -// float alph[20]; + float alph[20]; float fixed[20]; int voxel=0,IDX=0,IDXSC=0; @@ -1018,7 +1018,7 @@ void updateSpatCoefPrecGPU_Laplace(float *SpatCoefPrec,unsigned long *seed) double *var; float *dbeta; double betaSqr = 2.0;//0.2; -// double tau; + double tau; var = (double *)calloc(NCOVAR*NCOVAR,sizeof(double)); hbeta = (float*)calloc(BPG,sizeof(float)); @@ -1372,7 +1372,7 @@ __global__ void Z_GPU1(float *dZ,unsigned char *data,float *covar,float *covarF, int localsize = blockDim.x; int localid = threadIdx.x; - float mean[300]; + float mean[2000]; float tmp=0; float SC[20]; int voxel=0,IDX=0,IDXSC=0; @@ -1421,7 +1421,7 @@ __global__ void Z_GPU1(float *dZ,unsigned char *data,float *covar,float *covarF, int itmp = (int)data[isub*TOTVOX+IDX]; dZ[isub*TOTVOX+IDX] = truncNormGPU(mean[isub],1.0f,lim[itmp].x,lim[itmp].y,&localState); // dZ[isub*TOTVOX+IDX] = ((float)data[isub*TOTVOX+IDX]-1.0f)*truncNormGPU(fabsf(mean[isub]),1.0f,0.0f,500.0f,&localState); - dR[isub*TOTVOX+IDX] += (fabsf(dZ[isub*TOTVOX+IDX] - mean[isub]) > 4.0f); +// dR[isub*TOTVOX+IDX] += (fabsf(dZ[isub*TOTVOX+IDX] - mean[isub]) > 4.0f); } state[idx] = localState; } @@ -1435,7 +1435,7 @@ __global__ void Z_GPU2(float *dZ,unsigned char *data,float *covar,float *covarF, int localid = threadIdx.x; float a = alp; - float mean[300]; + float mean[2000]; float tmp=0; float SC[20]; int voxel=0,IDX=0,IDXSC=0; @@ -1489,7 +1489,7 @@ __global__ void Z_GPU2(float *dZ,unsigned char *data,float *covar,float *covarF, int itmp = (int)data[isub*TOTVOX+IDX]; Z = truncNormGPU(M,P,lim[itmp].x,lim[itmp].y,&localState); // Z = ((float)data[isub*TOTVOX+IDX]-1.0f)*truncNormGPU(fabsf(M),rsqrtf(P),0.0f,500.0f,&localState); - dR[isub*TOTVOX+IDX] += (fabsf(Z - M) > 4.0f); +// dR[isub*TOTVOX+IDX] += (fabsf(Z - M) > 4.0f); b = Z - M; b = (df + b*b)/2.0f; @@ -1511,7 +1511,7 @@ __global__ void Phi_GPU(float *dZ,unsigned char *data,float *covar,float *covarF int localid = threadIdx.x; const float a = alp; - float beta2[300]; + float beta2[2000]; float tmp=0; float SC[20]; int voxel=0,IDX=0,IDXSC=0; diff --git a/bsglmm/main.cu b/bsglmm/main.cu index 1d65db8..285a5bf 100644 --- a/bsglmm/main.cu +++ b/bsglmm/main.cu @@ -27,8 +27,8 @@ float *covar; int NSUBTYPES; int NCOVAR; int NCOV_FIX = 0; -int MAXITER = 1000000; -int BURNIN = 500000; +int MAXITER = 100000; +int BURNIN = 50000; int RESTART; float *deviceCovar; float *hostCovar; @@ -69,12 +69,31 @@ int main (int argc, char * const argv[]) { void write_empir_prb(unsigned char *,float *,unsigned char *,int *); unsigned char *get_WM_mask(float *,unsigned char *); - if (argc !=5) - exit(0); + if (argc !=5 && argc !=7) { + printf("%s: Usage\n",argv[0]); + printf("%s NTypes NCov GPU Design [MaxIter BurnIn] \n",argv[0]); + printf(" NTypes - Number of groups\n",argv[0]); + printf(" NCov - Number of covariates (count must include groups)\n",argv[0]); + printf(" GPU - 1 use GPU; 0 use CPU\n",argv[0]); + printf(" Design - Text file, tab or space separated data file\n",argv[0]); + printf(" MaxIter - Number of iterations (defaults to 1,000,000)\n",argv[0]); + printf(" BurnIn - Number of burn-in iterations (defaults to 500,000)\n",argv[0]); + printf("For documentation see: http://warwick.ac.uk/tenichols/BSGLMM\n",argv[0]); + exit(1); + } NSUBTYPES = atoi(argv[1]); NCOVAR = atoi(argv[2]); GPU = atoi(argv[3]); + if (argc >5) { + MAXITER = atoi(argv[5]); + BURNIN = atoi(argv[6]); + if (MAXITER<1000) + printf("WARNING: Silly-small number of iterations used; recommend abort and use more\n"); + if (BURNIN>MAXITER) + printf("WARNING: Burn-in exceeds MAXITER, no results will be saved\n"); + } + RESTART = 0; int deviceCount = 0; @@ -113,6 +132,18 @@ int main (int argc, char * const argv[]) { seed = (unsigned long *)calloc(3,sizeof(unsigned long)); fseed = fopen("seed.dat","r"); + if (fseed == NULL){ + printf("No seed.dat found; using [ 1 2 3 ]\n"); + seed[0]=1L;seed[1]=2L;seed[2]=2L; + } else { + rtn = fscanf(fseed,"%lu %lu %lu\n",&(seed[0]),&(seed[1]),&(seed[2])); + if (rtn==0) + exit(1); + rtn = fclose(fseed); + if (rtn != 0) + exit(1); + } + logit_factor = 1.0f; t_df = 1000.0f; @@ -124,13 +155,6 @@ int main (int argc, char * const argv[]) { t_df = 3.0f; - rtn = fscanf(fseed,"%lu %lu %lu\n",&(seed[0]),&(seed[1]),&(seed[2])); - if (rtn==0) - exit(0); - rtn = fclose(fseed); - if (rtn != 0) - exit(0); - // msk = read_nifti1_mask("./images/avg152T1_highres_brain.hdr", // "./images/avg152T1_highres_brain.img"); @@ -250,6 +274,9 @@ int main (int argc, char * const argv[]) { } else { + fprintf(stderr,"WARNING!!!\n"); + fprintf(stderr,"WARNING!!! CPU code not tested! Results might not be right!\n"); + fprintf(stderr,"WARNING!!!\n"); for (int i=0;i<2;i++) { INDX[i].hostVox = (int *)calloc(INDX[i].hostN,sizeof(int)); } diff --git a/bsglmm/mcmc.cpp b/bsglmm/mcmc.cpp index 553d2f0..bf8e614 100644 --- a/bsglmm/mcmc.cpp +++ b/bsglmm/mcmc.cpp @@ -87,10 +87,10 @@ void mcmc(float *covar,float *covar_fix,unsigned char *data,float *WM,unsigned c float Prec = 0.000001f; SUB *subj; char *S; - + // float *hostWM; FILE *out,*out2,*fWM,*fcoef,*fcoefsd,*fBF,*fresid; - + void itoa(int,char *); // unsigned char *get_neighbors(); void initializeAlpha(unsigned char *,float *,float *,float *,unsigned long *); @@ -99,7 +99,7 @@ void mcmc(float *covar,float *covar_fix,unsigned char *data,float *WM,unsigned c void updateZGPU(unsigned char *,float,unsigned long *); void updateZ(float *,float *,unsigned char *,unsigned char *,float, float *,float *,float *,unsigned long *); void updatePhiGPU(float,float *); - void updatePhi(unsigned char *data,float *Z,float *Phi,unsigned char *msk,float beta,float *WM,float *spatCoef,float *covar,unsigned long *seed); + void updatePhi(unsigned char *data,float *Z,float *Phi,unsigned char *msk,float beta,float *WM,float *spatCoef,float *covar,unsigned long *seed); void updateAlpha(float *,float ,float ,float *,unsigned char *,float,float *,float *,float *,int,unsigned long *); void updateAlphaGPU(float *,float *,float *,float *,unsigned char *,float,float *,float *,float *,unsigned long *); void updateAlphaPrecGPU(float *,float *,float,float,float,unsigned long *); @@ -124,9 +124,8 @@ void mcmc(float *covar,float *covar_fix,unsigned char *data,float *WM,unsigned c double compute_prb_DIC(double *,double *,float *,unsigned char *,unsigned char *,int); // void tmp(float *,float *); + fresid = fopen("resid.dat","w"); -// fresid = fopen("resid.dat","w"); - NSIZE = (NROW+2)*(NCOL+2)*(NDEPTH+2); int RSIZE = NROW*NCOL*NDEPTH; // printf("%d %d \n",TOTVOX,NSIZE); @@ -139,11 +138,11 @@ void mcmc(float *covar,float *covar_fix,unsigned char *data,float *WM,unsigned c // if (data[i] == 1) // data2[i] = 2; // } - // CUDA_CALL( cudaMemcpy(deviceData,data2,NSUBS*TOTVOX*sizeof(unsigned char),cudaMemcpyHostToDevice) ); - CUDA_CALL( cudaMemcpy(deviceData,data,NSUBS*TOTVOX*sizeof(unsigned char),cudaMemcpyHostToDevice) ); + // CUDA_CALL( cudaMemcpy(deviceData,data2,NSUBS*TOTVOX*sizeof(unsigned char),cudaMemcpyHostToDevice) ); + CUDA_CALL( cudaMemcpy(deviceData,data,NSUBS*TOTVOX*sizeof(unsigned char),cudaMemcpyHostToDevice) ); // free(data2); } - + SpatCoefMean = (float *)calloc(NCOVAR,sizeof(float)); SpatCoefPrec = (float *)calloc(NCOVAR*NCOVAR,sizeof(float)); for (int i=0;i (double)(spatCoef[4*TOTVOXp+hostIdxSC[IDX]]/logit_factor)) ? max:(double)(spatCoef[4*TOTVOXp+hostIdxSC[IDX]]/logit_factor); } } } - printf("max = %lf\n",max); */ - + printf("max = %lf\n",max); */ + double *V = (double *)calloc(NCOVAR*NCOVAR,sizeof(double)); for (int ii=0;ii 1) { if (GPU) { CUDA_CALL( cudaMemcpy(Z,deviceZ,NSUBS*TOTVOX*sizeof(float),cudaMemcpyDeviceToHost) ); @@ -658,23 +651,22 @@ void mcmc(float *covar,float *covar_fix,unsigned char *data,float *WM,unsigned c // CUDA_CALL( cudaEventElapsedTime(&time, start, stop) ); // printf ("Time for the updateZGPU kernel: %f sec.\n", time/1000.0); -// fclose(fBF); - - unsigned int *Resid; - double *ResidMap; - Resid = (unsigned int *)calloc(NSUBS*TOTVOX,sizeof(unsigned int)); - ResidMap = (double *)calloc(RSIZE,sizeof(double)); + fclose(fBF); + +// unsigned int *Resid; +// double *ResidMap; +// Resid = (unsigned int *)calloc(NSUBS*TOTVOX,sizeof(unsigned int)); +// ResidMap = (double *)calloc(RSIZE,sizeof(double)); if (GPU) { CUDA_CALL( cudaMemcpy(Z,deviceZ,NSUBS*TOTVOX*sizeof(float),cudaMemcpyDeviceToHost) ); - CUDA_CALL( cudaMemcpy(Phi,devicePhi,NSUBS*TOTVOX*sizeof(float),cudaMemcpyDeviceToHost) ); +// CUDA_CALL( cudaMemcpy(Phi,devicePhi,NSUBS*TOTVOX*sizeof(float),cudaMemcpyDeviceToHost) ); // cudaMemcpy(alpha,deviceAlpha,NSUBTYPES*TOTVOX*sizeof(float),cudaMemcpyDeviceToHost); CUDA_CALL( cudaMemcpy(spatCoef,deviceSpatCoef,NCOVAR*TOTVOXp*sizeof(float),cudaMemcpyDeviceToHost) ); - - CUDA_CALL( cudaMemcpy(Resid,deviceResid,NSUBS*TOTVOX*sizeof(unsigned int),cudaMemcpyDeviceToHost) ); + +// CUDA_CALL( cudaMemcpy(Resid,deviceResid,NSUBS*TOTVOX*sizeof(unsigned int),cudaMemcpyDeviceToHost) ); } // save_iter(beta,fixMean,alphaMean,alphaPrec,SpatCoefMean,SpatCoefPrec,spatCoef,alpha,Z,Phi); - - for (int isub=0;isub 0.05) { -// fprintf(fresid,"%d %d %d %d %lf\n",isub,i,j,k,tmp); + fprintf(fresid,"%d %d %d %d %lf\n",isub,i,j,k,tmp); ResidMap[IDX2]++; } } @@ -695,23 +687,23 @@ void mcmc(float *covar,float *covar_fix,unsigned char *data,float *WM,unsigned c } } free(Resid); -// fclose(out); -// fclose(out2); -// fclose(fresid); + fclose(out); + fclose(out2); + fclose(fresid);*/ char *RR = (char *)calloc(500,sizeof(char)); S = (char *)calloc(100,sizeof(char)); // S = strcpy(S,"ResidMap.nii"); // int rtn = write_nifti_file(NROW,NCOL,NDEPTH,1,S,S,ResidMap); - free(ResidMap); - +// free(ResidMap); + int rtn; // RR = strcpy(RR,"gzip -f "); // RR = strcat(RR,(const char *)S); // rtn = system(RR); - -// printf("TOTSAV = %d\n",TOTSAV); - out = fopen("Qhat.dat","w"); + printf("TOTSAV = %d\n",TOTSAV); + + out = fopen("Qhat.dat","w"); for (int isub=0;isub