diff --git a/cuda.mk b/cuda.mk index 59c91c4f..5f29ec46 100644 --- a/cuda.mk +++ b/cuda.mk @@ -5,7 +5,7 @@ CUDA_ROOT = /usr/local/cuda CUDA_LIB ?= $(CUDA_ROOT)/lib64 CUDA_INCLUDE ?= $(CUDA_ROOT)/include CURTFLAGS = -L$(CUDA_LIB) -lcudart_static -lrt -NVCCFLAGS ?= -std=c++11 -I. -I$(CUDA_INCLUDE) -O3 -use_fast_math --default-stream per-thread -restrict +NVCCFLAGS ?= -g -lineinfo -Xcompiler -Wall -std=c++11 -I. -I$(CUDA_INCLUDE) -O3 -use_fast_math --default-stream per-thread -restrict CPPFLAGS += -I$(CUDA_INCLUDE) CPPFLAGS += -DHAVE_CUDA=1 diff --git a/src/cuda_kernels/gpu_aligner.cu b/src/cuda_kernels/gpu_aligner.cu index f3317697..1db32777 100644 --- a/src/cuda_kernels/gpu_aligner.cu +++ b/src/cuda_kernels/gpu_aligner.cu @@ -4,6 +4,8 @@ #include #include "nanopolish_profile_hmm_r9.h" +int gpu_aligner_debug = 0; + #define MAX_STATES 256 #define EXPAND_TO_STRING(X) #X @@ -686,8 +688,16 @@ std::vector GpuAligner::variantScoresThresholded(std::vector scoreSets; scoreSets.resize(numScoreSets); + if(gpu_aligner_debug){ + fprintf(stderr,"Generating variants:\n"); + } + for(int scoreSetIdx=0; scoreSetIdx GpuAligner::variantScoresThresholded(std::vector sequences; - HMMInputSequence base_sequence = generate_methylated_alternatives(base_haplotype.get_sequence(), - methylation_types)[0]; - + std::vector base_sequence_vector = generate_methylated_alternatives(base_haplotype.get_sequence(),methylation_types); + +#ifdef MULTI_MODEL + std::vector num_models_vector; + std::vector score_offsets_vector; + size_t offset = 0; + size_t num_models = base_sequence_vector.size(); + num_models_vector.push_back(num_models); + score_offsets_vector.push_back(offset); + if(gpu_aligner_debug){ + fprintf(stderr,"num_models_base=%ld,offset_base=%ld\t",num_models,offset); + } + offset += num_models; + for (auto base_sequence: base_sequence_vector){ + sequences.push_back(base_sequence); + } +#else + HMMInputSequence base_sequence = base_sequence_vector[0]; sequences.push_back(base_sequence); +#endif for (auto v: variant_haplotypes){ - auto variant_sequence = generate_methylated_alternatives(v.get_sequence(), methylation_types)[0]; + auto variant_sequence_vector = generate_methylated_alternatives(v.get_sequence(), methylation_types); +#ifdef MULTI_MODEL + size_t num_models = variant_sequence_vector.size(); + num_models_vector.push_back(num_models); + score_offsets_vector.push_back(offset); + if(gpu_aligner_debug){ + fprintf(stderr,"num_models_var=%ld,offset_var=%ld\t",num_models,offset); + } + offset += num_models; + for (auto variant_sequence: variant_sequence_vector){ + sequences.push_back(variant_sequence); + } +#else + auto variant_sequence = variant_sequence_vector[0]; sequences.push_back(variant_sequence); +#endif } ScoreSet s = { sequences, event_sequences +#ifdef MULTI_MODEL + , + num_models_vector, + score_offsets_vector +#endif }; scoreSets[scoreSetIdx] = s; + if(gpu_aligner_debug){ + fprintf(stderr,"\n"); + } + } + if(gpu_aligner_debug){ + fprintf(stderr,"\n"); } std::vector v; if (!event_sequences_vector.empty()) { + if(gpu_aligner_debug){ + fprintf(stderr,"Calling scoreKernelMod\n"); + } auto scoresMod = scoreKernelMod(scoreSets, alignment_flags); + if(gpu_aligner_debug){ + fprintf(stderr,"Unpacking scores\n"); + } // results are now ready, need to unpack them for (int scoreSetIdx=0; scoreSetIdx> scores = scoresMod[scoreSetIdx]; // scores for this candidate, including all variants and base(zeroth) - int numVariants = scores.size() - 1; // subtract one for the base + #ifdef MULTI_MODEL + ScoreSet s = scoreSets[scoreSetIdx]; + int numVariants = s.num_models_vector.size() -1; // subtract one for the base sequence + #else + int numVariants = scores.size() - 1; // subtract one for the base sequence + #endif int numScores = scores[0].size(); - for (int variantIndex = 0; variantIndex < numVariants; variantIndex++) { // index 0 is the base scores double totalScore = 0.0; for (int k = 0; k < numScores; k++) { if (fabs(totalScore) < screen_score_threshold) { + #ifdef MULTI_MODEL + + //compute the base score based on the base sequences + size_t num_models = s.num_models_vector[0]; + double num_model_penalty = log(num_models); + double score = scores[0][k] - num_model_penalty; + for(size_t seq_idx = 1; seq_idx < num_models; ++seq_idx) { + double alt_score = scores[seq_idx][k] - num_model_penalty; + score = add_logs(score, alt_score); + } + double baseScore = score; + if (k==0 && variantIndex==0 && gpu_aligner_debug){ + fprintf(stderr,"num_models_base=%ld,offset_base=%ld\t",num_models,0); + } + + if(variantIndex+1 >= s.num_models_vector.size()){ //a sanity check + fprintf(stderr,"\nAn invalid memory access occured\nscoreSetIdx=%d, variantIndex=%d, k=%d, \n",scoreSetIdx,variantIndex,k); + assert(0); + } + + //compute the variant score based on the variant sequences + num_models = s.num_models_vector[variantIndex+1]; + size_t score_offset = s.score_offsets_vector[variantIndex+1]; + num_model_penalty = log(num_models); + score = scores[score_offset][k] - num_model_penalty; + for(size_t seq_idx = 1; seq_idx < num_models; ++seq_idx) { + double alt_score = scores[score_offset + seq_idx][k] - num_model_penalty; + score = add_logs(score, alt_score); + } + double variantScore = score; + if (k==0 && gpu_aligner_debug) { + fprintf(stderr,"num_models_var=%ld,offset_var=%ld\t",num_models,score_offset); + } + + #else double baseScore = scores[0][k]; - totalScore += (scores[variantIndex + 1][k] - baseScore); + double variantScore = scores[variantIndex + 1][k]; + #endif + totalScore += (variantScore - baseScore); } } // get the old variant: @@ -751,6 +852,12 @@ std::vector GpuAligner::variantScoresThresholded(std::vector stateSequences; std::vector rawData; +#ifdef MULTI_MODEL + std::vector num_models_vector; //store the number of models for base sequence and then variant sequences + std::vector score_offsets_vector; //store the offsets based on number of models +#endif } ScoreSet; class GpuAligner