Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gpu dev #743

Open
wants to merge 2 commits into
base: gpu-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cuda.mk
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ CUDA_ROOT = /usr/local/cuda
CUDA_LIB ?= $(CUDA_ROOT)/lib64
CUDA_INCLUDE ?= $(CUDA_ROOT)/include
CURTFLAGS = -L$(CUDA_LIB) -lcudart_static -lrt
NVCCFLAGS ?= -std=c++11 -I. -I$(CUDA_INCLUDE) -O3 -use_fast_math --default-stream per-thread -restrict
NVCCFLAGS ?= -g -lineinfo -Xcompiler -Wall -std=c++11 -I. -I$(CUDA_INCLUDE) -O3 -use_fast_math --default-stream per-thread -restrict

CPPFLAGS += -I$(CUDA_INCLUDE)
CPPFLAGS += -DHAVE_CUDA=1
Expand Down
121 changes: 114 additions & 7 deletions src/cuda_kernels/gpu_aligner.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <vector>
#include "nanopolish_profile_hmm_r9.h"

int gpu_aligner_debug = 0;

#define MAX_STATES 256

#define EXPAND_TO_STRING(X) #X
Expand Down Expand Up @@ -686,8 +688,16 @@ std::vector<Variant> GpuAligner::variantScoresThresholded(std::vector<std::vecto
std::vector<ScoreSet> scoreSets;
scoreSets.resize(numScoreSets);

if(gpu_aligner_debug){
fprintf(stderr,"Generating variants:\n");
}

for(int scoreSetIdx=0; scoreSetIdx<numScoreSets;scoreSetIdx++){

if(gpu_aligner_debug){
fprintf(stderr,"scoreSetIdx=%d\t",scoreSetIdx);
}

auto input_variants = input_variants_vector[scoreSetIdx];
auto base_haplotype = base_haplotypes[scoreSetIdx];
auto event_sequences = event_sequences_vector[scoreSetIdx];
Expand All @@ -708,41 +718,132 @@ std::vector<Variant> GpuAligner::variantScoresThresholded(std::vector<std::vecto

// Make methylated versions of each input sequence. Once for the base haplotype and once each for each variant
std::vector<HMMInputSequence> sequences;
HMMInputSequence base_sequence = generate_methylated_alternatives(base_haplotype.get_sequence(),
methylation_types)[0];

std::vector<HMMInputSequence> base_sequence_vector = generate_methylated_alternatives(base_haplotype.get_sequence(),methylation_types);

#ifdef MULTI_MODEL
std::vector<size_t> num_models_vector;
std::vector<size_t> score_offsets_vector;
size_t offset = 0;
size_t num_models = base_sequence_vector.size();
num_models_vector.push_back(num_models);
score_offsets_vector.push_back(offset);
if(gpu_aligner_debug){
fprintf(stderr,"num_models_base=%ld,offset_base=%ld\t",num_models,offset);
}
offset += num_models;
for (auto base_sequence: base_sequence_vector){
sequences.push_back(base_sequence);
}
#else
HMMInputSequence base_sequence = base_sequence_vector[0];
sequences.push_back(base_sequence);
#endif

for (auto v: variant_haplotypes){
auto variant_sequence = generate_methylated_alternatives(v.get_sequence(), methylation_types)[0];
auto variant_sequence_vector = generate_methylated_alternatives(v.get_sequence(), methylation_types);
#ifdef MULTI_MODEL
size_t num_models = variant_sequence_vector.size();
num_models_vector.push_back(num_models);
score_offsets_vector.push_back(offset);
if(gpu_aligner_debug){
fprintf(stderr,"num_models_var=%ld,offset_var=%ld\t",num_models,offset);
}
offset += num_models;
for (auto variant_sequence: variant_sequence_vector){
sequences.push_back(variant_sequence);
}
#else
auto variant_sequence = variant_sequence_vector[0];
sequences.push_back(variant_sequence);
#endif
}

ScoreSet s = {
sequences,
event_sequences
#ifdef MULTI_MODEL
,
num_models_vector,
score_offsets_vector
#endif
};

scoreSets[scoreSetIdx] = s;
if(gpu_aligner_debug){
fprintf(stderr,"\n");
}
}
if(gpu_aligner_debug){
fprintf(stderr,"\n");
}

std::vector<Variant> v;
if (!event_sequences_vector.empty()) {

if(gpu_aligner_debug){
fprintf(stderr,"Calling scoreKernelMod\n");
}
auto scoresMod = scoreKernelMod(scoreSets, alignment_flags);

if(gpu_aligner_debug){
fprintf(stderr,"Unpacking scores\n");
}
// results are now ready, need to unpack them
for (int scoreSetIdx=0; scoreSetIdx<numScoreSets; scoreSetIdx++){
if(gpu_aligner_debug) {
fprintf(stderr,"scoreSetIdx=%d\t",scoreSetIdx);\
}
std::vector<std::vector<double>> scores = scoresMod[scoreSetIdx]; // scores for this candidate, including all variants and base(zeroth)
int numVariants = scores.size() - 1; // subtract one for the base
#ifdef MULTI_MODEL
ScoreSet s = scoreSets[scoreSetIdx];
int numVariants = s.num_models_vector.size() -1; // subtract one for the base sequence
#else
int numVariants = scores.size() - 1; // subtract one for the base sequence
#endif
int numScores = scores[0].size();

for (int variantIndex = 0; variantIndex < numVariants; variantIndex++) { // index 0 is the base scores
double totalScore = 0.0;
for (int k = 0; k < numScores; k++) {
if (fabs(totalScore) < screen_score_threshold) {
#ifdef MULTI_MODEL

//compute the base score based on the base sequences
size_t num_models = s.num_models_vector[0];
double num_model_penalty = log(num_models);
double score = scores[0][k] - num_model_penalty;
for(size_t seq_idx = 1; seq_idx < num_models; ++seq_idx) {
double alt_score = scores[seq_idx][k] - num_model_penalty;
score = add_logs(score, alt_score);
}
double baseScore = score;
if (k==0 && variantIndex==0 && gpu_aligner_debug){
fprintf(stderr,"num_models_base=%ld,offset_base=%ld\t",num_models,0);
}

if(variantIndex+1 >= s.num_models_vector.size()){ //a sanity check
fprintf(stderr,"\nAn invalid memory access occured\nscoreSetIdx=%d, variantIndex=%d, k=%d, \n",scoreSetIdx,variantIndex,k);
assert(0);
}

//compute the variant score based on the variant sequences
num_models = s.num_models_vector[variantIndex+1];
size_t score_offset = s.score_offsets_vector[variantIndex+1];
num_model_penalty = log(num_models);
score = scores[score_offset][k] - num_model_penalty;
for(size_t seq_idx = 1; seq_idx < num_models; ++seq_idx) {
double alt_score = scores[score_offset + seq_idx][k] - num_model_penalty;
score = add_logs(score, alt_score);
}
double variantScore = score;
if (k==0 && gpu_aligner_debug) {
fprintf(stderr,"num_models_var=%ld,offset_var=%ld\t",num_models,score_offset);
}

#else
double baseScore = scores[0][k];
totalScore += (scores[variantIndex + 1][k] - baseScore);
double variantScore = scores[variantIndex + 1][k];
#endif
totalScore += (variantScore - baseScore);
}
}
// get the old variant:
Expand All @@ -751,6 +852,12 @@ std::vector<Variant> GpuAligner::variantScoresThresholded(std::vector<std::vecto
unScoredVariant.info = "";
v.push_back(unScoredVariant);
}
if(gpu_aligner_debug){
fprintf(stderr,"\n");
}
}
if(gpu_aligner_debug){
fprintf(stderr,"\n");
}
}
return v;
Expand Down
6 changes: 6 additions & 0 deletions src/cuda_kernels/gpu_aligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,16 @@
#define MAX_NUM_VARIANTS_PER_LOCUS 10
#define MAX_NUM_WORKERS 16

#define MULTI_MODEL 1

//Data to be scored
typedef struct {
std::vector<HMMInputSequence> stateSequences;
std::vector<HMMInputData> rawData;
#ifdef MULTI_MODEL
std::vector<size_t> num_models_vector; //store the number of models for base sequence and then variant sequences
std::vector<size_t> score_offsets_vector; //store the offsets based on number of models
#endif
} ScoreSet;

class GpuAligner
Expand Down