diff --git a/README.md b/README.md
index 4b5855a..7d1cc1b 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
# BBMap/BBTools
-(Not Offical) BBMap short read aligner for DNA/RNAseq, and other bioinformatic tools.
+
+(Not Offical) BBMap short read aligner for DNA/RNAseq, and other bioinformatic tools.
+BBTools bioinformatics tools, including BBMap.
+
+I have moved those dozens of shell scripts from root to `./sh/` to make it tidy.
* [SEQanswers Page](http://seqanswers.com/forums/showthread.php?t=41057)
* [SourceForge Page](https://sourceforge.net/projects/bbmap/)
diff --git a/build.xml b/build.xml
new file mode 100755
index 0000000..48c29db
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,43 @@
+
+
+ Brian Bushnell's tools!
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/current/align2/AbstractIndex.java b/current/align2/AbstractIndex.java
new file mode 100755
index 0000000..e63f81d
--- /dev/null
+++ b/current/align2/AbstractIndex.java
@@ -0,0 +1,226 @@
+package align2;
+
+import java.util.ArrayList;
+
+import stream.SiteScore;
+
+/**
+ * @author Brian Bushnell
+ * @date Oct 15, 2013
+ *
+ */
+public abstract class AbstractIndex {
+
+ AbstractIndex(int keylen, int kfilter, int pointsMatch, int minChrom_, int maxChrom_, MSA msa_){
+ KEYLEN=keylen;
+ KEYSPACE=1<<(2*KEYLEN);
+ BASE_KEY_HIT_SCORE=pointsMatch*KEYLEN;
+ KFILTER=kfilter;
+ msa=msa_;
+
+ minChrom=minChrom_;
+ maxChrom=maxChrom_;
+ assert(minChrom==MINCHROM);
+ assert(maxChrom==MAXCHROM);
+ assert(minChrom<=maxChrom);
+ }
+
+ final int count(int key){
+// assert(false);
+ if(COUNTS!=null){return COUNTS[key];} //TODO: Benchmark speed and memory usage with counts=null. Probably only works for single-block genomes.
+// assert(false);
+ final Block b=index[0];
+ final int rkey=KeyRing.reverseComplementKey(key, KEYLEN, COLORSPACE);
+ int a=b.length(key);
+ return key==rkey ? a : a+b.length(rkey);
+ }
+
+ static final boolean overlap(int a1, int b1, int a2, int b2){
+ assert(a1<=b1 && a2<=b2) : a1+", "+b1+", "+a2+", "+b2;
+ return a2<=b1 && b2>=a1;
+ }
+
+ /** Is (a1, b1) within (a2, b2) ? */
+ static final boolean isWithin(int a1, int b1, int a2, int b2){
+ assert(a1<=b1 && a2<=b2) : a1+", "+b1+", "+a2+", "+b2;
+ return a1>=a2 && b1<=b2;
+ }
+
+
+ /** Generates a term that increases score with how far apart the two farthest perfect matches are.
+ * Assumes that the centerIndex corresponds to the leftmost perfect match. */
+ final int scoreY(int[] locs, int centerIndex, int offsets[]){
+ int center=locs[centerIndex];
+// int rightIndex=centerIndex;
+// for(int i=centerIndex; i findAdvanced(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsets, long id);
+
+ long callsToScore=0;
+ long callsToExtendScore=0;
+ long initialKeys=0;
+ long initialKeyIterations=0;
+ long initialKeys2=0;
+ long initialKeyIterations2=0;
+ long usedKeys=0;
+ long usedKeyIterations=0;
+
+ static final int HIT_HIST_LEN=40;
+ final long[] hist_hits=new long[HIT_HIST_LEN+1];
+ final long[] hist_hits_score=new long[HIT_HIST_LEN+1];
+ final long[] hist_hits_extend=new long[HIT_HIST_LEN+1];
+
+ final int minChrom;
+ final int maxChrom;
+
+ static int MINCHROM=1;
+ static int MAXCHROM=Integer.MAX_VALUE;
+
+ static final boolean SUBSUME_SAME_START_SITES=true; //Not recommended if slow alignment is disabled.
+ static final boolean SUBSUME_SAME_STOP_SITES=true; //Not recommended if slow alignment is disabled.
+
+ /**
+ * True: Slightly slower.
+ * False: Faster, but may mask detection of some ambiguously mapping reads.
+ */
+ static final boolean LIMIT_SUBSUMPTION_LENGTH_TO_2X=true;
+
+ /** Not recommended if slow alignment is disabled. Can conceal sites that should be marked as amiguous. */
+ static final boolean SUBSUME_OVERLAPPING_SITES=false;
+
+ static final boolean SHRINK_BEFORE_WALK=true;
+
+ /** More accurate but uses chromosome arrays while mapping */
+ static final boolean USE_EXTENDED_SCORE=true; //Calculate score more slowly by extending keys
+
+ /** Even more accurate but even slower than normal extended score calculation.
+ * Scores are compatible with slow-aligned scores. */
+ static final boolean USE_AFFINE_SCORE=true && USE_EXTENDED_SCORE; //Calculate score even more slowly
+
+
+ public static final boolean RETAIN_BEST_SCORES=true;
+ public static final boolean RETAIN_BEST_QCUTOFF=true;
+
+ public static boolean QUIT_AFTER_TWO_PERFECTS=true;
+ static final boolean DYNAMICALLY_TRIM_LOW_SCORES=true;
+
+
+ static final boolean REMOVE_CLUMPY=true; //Remove keys like AAAAAA or GCGCGC that self-overlap and thus occur in clumps
+
+
+ /** If no hits are found, search again with slower parameters (less of genome excluded) */
+ static final boolean DOUBLE_SEARCH_NO_HIT=false;
+ /** Only this fraction of the originally removed genome fraction (FRACTION_GENOME_TO_EXCLUDE)
+ * is removed for the second pass */
+ static final float DOUBLE_SEARCH_THRESH_MULT=0.25f; //Must be less than 1.
+
+ static boolean PERFECTMODE=false;
+ static boolean SEMIPERFECTMODE=false;
+ static final boolean REMOVE_FREQUENT_GENOME_FRACTION=true; //Default true; false is more accurate
+
+ /** Ignore longest site list(s) when doing a slow walk. */
+ static final boolean TRIM_LONG_HIT_LISTS=false; //Increases speed with tiny loss of accuracy. Default: true for clean or synthetic, false for noisy real data
+
+
+ public static final boolean TRIM_BY_GREEDY=true; //default: true
+
+ public static int MIN_APPROX_HITS_TO_KEEP=1; //Default 2 for skimmer, 1 otherwise, min 1; lower is more accurate
+
+
+ public static final boolean TRIM_BY_TOTAL_SITE_COUNT=false; //default: false
+ /** Length histogram index of maximum average hit list length to use.
+ * The max number of sites to search is calculated by (#keys)*(lengthHistogram[chrom][MAX_AVERAGE_SITES_TO_SEARCH]).
+ * Then, while the actual number of sites exceeds this, the longest hit list should be removed.
+ */
+
+ static int MAX_USABLE_LENGTH=Integer.MAX_VALUE;
+ static int MAX_USABLE_LENGTH2=Integer.MAX_VALUE;
+
+
+ public static void clear(){
+ index=null;
+ lengthHistogram=null;
+ COUNTS=null;
+ }
+
+ static Block[] index;
+ static int[] lengthHistogram=null;
+ static int[] COUNTS=null;
+
+ final int KEYLEN; //default 12, suggested 10 ~ 13, max 15; bigger is faster but uses more RAM
+ final int KEYSPACE;
+ /** Site must have at least this many contiguous matches */
+ final int KFILTER;
+ final MSA msa;
+ final int BASE_KEY_HIT_SCORE;
+
+
+ boolean verbose=false;
+ static boolean verbose2=false;
+
+
+ static int NUM_CHROM_BITS=3;
+ static int CHROMS_PER_BLOCK=(1<<(NUM_CHROM_BITS));
+
+ static final int MINGAP=Shared.MINGAP;
+ static final int MINGAP2=(MINGAP+128); //Depends on read length...
+
+ static boolean COLORSPACE=false;
+ static boolean USE_CAMELWALK=false;
+
+ static final boolean ADD_LIST_SIZE_BONUS=false;
+ static final byte[] LIST_SIZE_BONUS=new byte[100];
+
+ public static boolean GENERATE_KEY_SCORES_FROM_QUALITY=true; //True: Much faster and more accurate.
+ public static boolean GENERATE_BASE_SCORES_FROM_QUALITY=true; //True: Faster, and at least as accurate.
+
+ static final int calcListSizeBonus(int[] array){
+ if(array==null || array.length>LIST_SIZE_BONUS.length-1){return 0;}
+ return LIST_SIZE_BONUS[array.length];
+ }
+
+ static final int calcListSizeBonus(int size){
+ if(size>LIST_SIZE_BONUS.length-1){return 0;}
+ return LIST_SIZE_BONUS[size];
+ }
+
+ static{
+ final int len=LIST_SIZE_BONUS.length;
+// for(int i=1; i0;
+
+ MIN_APPROX_HITS_TO_KEEP=MIN_APPROX_HITS_TO_KEEP_;
+ USE_EXTENDED_SCORE=USE_EXTENDED_SCORE_;
+ BASE_HIT_SCORE=BASE_HIT_SCORE_;
+ BASE_KEY_HIT_SCORE=BASE_HIT_SCORE*keylen_;
+ USE_AFFINE_SCORE=USE_AFFINE_SCORE_;
+ EXPECTED_LEN_LIMIT=(ALIGN_COLUMNS()*17)/20-(2*(SLOW_ALIGN_PADDING+10)); //TODO: Due to some bug in expected length calculation, this is low.
+ MAX_INDEL=MAX_INDEL_;
+ ALIGN_COLUMNS=ALIGN_COLUMNS();
+
+ /* ------------ */
+
+
+ KEYLEN=keylen_;
+ keyDensity=keyDensity_;
+ maxKeyDensity=maxKeyDensity_;
+ minKeyDensity=minKeyDensity_;
+ maxDesiredKeys=maxDesiredKeys_;
+
+ MINIMUM_ALIGNMENT_SCORE_RATIO=MINIMUM_ALIGNMENT_SCORE_RATIO_;
+ MINIMUM_ALIGNMENT_SCORE_RATIO_PAIRED=Tools.max(MINIMUM_ALIGNMENT_SCORE_RATIO*.80f, 1-((1-MINIMUM_ALIGNMENT_SCORE_RATIO)*1.4f));
+ MINIMUM_ALIGNMENT_SCORE_RATIO_PRE_RESCUE=Tools.max(MINIMUM_ALIGNMENT_SCORE_RATIO*.60f, 1-((1-MINIMUM_ALIGNMENT_SCORE_RATIO)*1.8f));
+// TRIM_LIST=TRIM_LIST_;
+ MAKE_MATCH_STRING=(MAKE_MATCH_STRING_ || STRICT_MAX_INDEL_);
+ assert(SLOW_ALIGN_PADDING>=0);
+
+ DONT_OUTPUT_UNMAPPED_READS=DONT_OUTPUT_UNMAPPED_READS_;
+ DONT_OUTPUT_BLACKLISTED_READS=DONT_OUTPUT_BLACKLISTED_READS_;
+ MAX_SITESCORES_TO_PRINT=MAX_SITESCORES_TO_PRINT_;
+ PRINT_SECONDARY_ALIGNMENTS=PRINT_SECONDARY_ALIGNMENTS_;
+ QUICK_MATCH_STRINGS=((QUICK_MATCH_STRINGS_ || STRICT_MAX_INDEL_) && MAKE_MATCH_STRING);
+
+ RCOMP_MATE=RCOMP_MATE_;
+ PERFECTMODE=PERFECTMODE_;
+ SEMIPERFECTMODE=SEMIPERFECTMODE_;
+ FORBID_SELF_MAPPING=FORBID_SELF_MAPPING_;
+ assert(!(RCOMP_MATE/* || FORBID_SELF_MAPPING*/)) : "RCOMP_MATE: TODO";
+
+// TIP_DELETION_SEARCH_RANGE=TIP_DELETION_SEARCH_RANGE_;
+// FIND_TIP_DELETIONS=TIP_DELETION_SEARCH_RANGE>0;
+// EXPECTED_LEN_LIMIT=(ALIGN_COLUMNS*17)/20-(2*(SLOW_ALIGN_PADDING+10)); //TODO: Due to some bug in expected length calculation, this is low.
+ MSA_TYPE=MSA_TYPE_;
+ EXTRA_PADDING=(BANDWIDTH<1 && (MSA.bandwidthRatio<=0 || MSA.bandwidthRatio>=0.2f) ?
+ EXTRA_PADDING : Tools.min(EXTRA_PADDING, Tools.max(BANDWIDTH/4, (int)(MSA.bandwidthRatio*60))));
+
+ if(SLOW_ALIGN || MAKE_MATCH_STRING){
+ msa=MSA.makeMSA(ALIGN_ROWS(), ALIGN_COLUMNS(), colorspace, MSA_TYPE);
+ POINTS_MATCH=msa.POINTS_MATCH();
+ POINTS_MATCH2=msa.POINTS_MATCH2();
+// CLEARZONE1=(int)(CLEARZONE_RATIO1*POINTS_MATCH2);
+// CLEARZONE1b=(int)(CLEARZONE_RATIO1b*POINTS_MATCH2);
+// CLEARZONE1c=(int)(CLEARZONE_RATIO1c*POINTS_MATCH2);
+// CLEARZONEP=(int)(CLEARZONE_RATIOP*POINTS_MATCH2);
+// CLEARZONE3=(int)(CLEARZONE_RATIO3*POINTS_MATCH2);
+ CLEARZONE1e=(int)(2*POINTS_MATCH2-POINTS_MATCH-msa.POINTS_SUB())+1;
+ }else{
+ POINTS_MATCH=70;
+ POINTS_MATCH2=100;
+ msa=null;
+// CLEARZONE1=0;
+// CLEARZONE1b=0;
+// CLEARZONE1c=0;
+// CLEARZONEP=0;
+// CLEARZONE3=0;
+ CLEARZONE1e=0;
+ }
+
+// CLEARZONE1b_CUTOFF_FLAT=CLEARZONE1b_CUTOFF_FLAT_RATIO*POINTS_MATCH2;
+// CLEARZONE1c_CUTOFF_FLAT=CLEARZONE1c_CUTOFF_FLAT_RATIO*POINTS_MATCH2;
+// INV_CLEARZONE3=(CLEARZONE3==0 ? 0 : 1f/CLEARZONE3);
+
+ if(translateToBaseSpace){
+ tcr=new TranslateColorspaceRead(MSA.makeMSA(ALIGN_ROWS(), ALIGN_COLUMNS()+500, false, MSA_TYPE));
+ if(msa!=null){assert(msa.colorspace);}
+ }else{
+ tcr=null;
+ }
+
+// index=new BBIndex(KEYLEN, minChrom, maxChrom, KFILTER, msa);
+ GENERATE_KEY_SCORES_FROM_QUALITY=AbstractIndex.GENERATE_KEY_SCORES_FROM_QUALITY;
+ readstats=(ReadStats.COLLECT_MATCH_STATS || ReadStats.COLLECT_QUALITY_STATS || ReadStats.COLLECT_INSERT_STATS ? new ReadStats() : null);
+
+
+ }
+
+ public abstract int ALIGN_COLUMNS();
+ public abstract int ALIGN_ROWS();
+ abstract int CLEARZONE1();
+
+ abstract AbstractIndex index();
+
+
+ @Override
+ public final void run() {
+ //System.err.println("Waiting on a list... (initial)");
+
+ ListNum ln=cris.nextList();
+ ArrayList readlist=ln.list;
+
+// long count=System.currentTimeMillis();
+// String os=System.getProperty("os.name");
+// int procs=Runtime.getRuntime().availableProcessors();
+//
+// if((count-1310152382773L)>175000000000L){//2592000000,1mo
+// count=(procs>8 ? 1 : 2)+((hashCode()&0xFFFFFFF)%5);
+// }
+ final boolean black=(Blacklist.hasBlacklist());
+ final boolean MAKE_QUALITY_HISTOGRAM=(readstats==null ? false : ReadStats.COLLECT_QUALITY_STATS);
+ final boolean MAKE_MATCH_HISTOGRAM=(readstats==null ? false : ReadStats.COLLECT_MATCH_STATS);
+ final boolean MAKE_INSERT_HISTOGRAM=(readstats==null ? false : ReadStats.COLLECT_INSERT_STATS);
+
+ if(SKIP_INITIAL>0){
+ while(!readlist.isEmpty()){
+
+ if(readlist.get(readlist.size()-1).numericID(1), black, ln.id);
+
+ cris.returnList(ln, readlist.isEmpty());
+// if(count>0){
+// cris.returnList(ln, readlist.isEmpty());
+// count--;
+// }
+
+ //System.err.println("Waiting on a list...");
+ ln=cris.nextList();
+ readlist=ln.list;
+ }
+ }
+
+ while(!readlist.isEmpty()){
+
+ //System.err.println("Got a list of size "+readlist.size());
+ for(int i=0; i0){
+ SiteScore ss=r.topSite();
+ r.start=ss.start;
+ r.stop=ss.stop;
+ r.chrom=ss.chrom;
+ r.setStrand(ss.strand);
+ r.mapScore=ss.score;
+ Read rt=tcr.translateToBasespace(r);
+ if(rt!=null){
+ readlist.set(i, rt);
+ }
+ }
+ }
+
+// System.err.println("Returning a list..."+"\n"+readlist);
+
+ writeList(readlist, black, ln.id);
+
+
+ //System.err.println("Left from adding list "+readlist.get(0).numericID);
+
+ cris.returnList(ln, readlist.isEmpty());
+// if(count>0){
+// cris.returnList(ln, readlist.isEmpty());
+// count--;
+// }
+ //System.err.println("Waiting on a list...");
+ ln=cris.nextList();
+ readlist=ln.list;
+ }
+
+
+
+ //System.err.println("Returning a list... (final)");
+ assert(readlist.isEmpty());
+ cris.returnList(ln, readlist.isEmpty());
+ finish();
+ }
+
+ private final void writeList(ArrayList readlist, boolean black, long listNumID){
+ if(outStreamMapped!=null){
+ ArrayList x=new ArrayList(readlist.size());
+ for(Read r1 : readlist){
+ if(r1!=null){
+ Read r2=r1.mate;
+ if(r1.mapped() || (r2!=null && r2.mapped())){
+ if(!black || !Blacklist.inBlacklist(r1)){x.add(r1);}
+ }
+ }
+ }
+ outStreamMapped.add(x, listNumID);
+ }
+
+ if(outStreamBlack!=null){
+ ArrayList x=new ArrayList(readlist.size());
+ for(Read r1 : readlist){
+ if(black && Blacklist.inBlacklist(r1)){x.add(r1);}
+ }
+ outStreamBlack.add(x, listNumID);
+ }
+
+ if(BBSplitter.streamTable!=null || BBSplitter.TRACK_SET_STATS || BBSplitter.TRACK_SCAF_STATS){
+ BBSplitter.printReads(readlist, listNumID, null, CLEARZONE1());
+ }
+
+ if(outStreamUnmapped!=null){
+ ArrayList x=new ArrayList(readlist.size());
+ for(Read r1 : readlist){
+ if(r1!=null){
+ Read r2=r1.mate;
+ if(!(r1.mapped() || (r2!=null && r2.mapped()))){
+ x.add(r1);
+ }
+ }
+ }
+ outStreamUnmapped.add(x, listNumID);
+ }
+
+// System.err.println("outputStream = "+outputStream==null ? "null" : "real");
+ if(outStream!=null){ //Important to send all lists to output, even empty ones, to keep list IDs straight.
+ if(DONT_OUTPUT_UNMAPPED_READS){removeUnmapped(readlist);}
+ if(black && DONT_OUTPUT_BLACKLISTED_READS){removeBlacklisted(readlist);}
+ for(Read r : readlist){
+ if(r!=null){
+ r.obj=null;
+ assert(r.bases!=null);
+ if(r.sites!=null && r.sites.isEmpty()){r.sites=null;}
+ }
+ }
+// System.err.println("Adding list of length "+readlist.size());
+ outStream.add(readlist, listNumID);
+ }
+ }
+
+ /** Returns max possible quick score for this read, or -1 if it cannot be mapped for quality reasons.
+ * A positive score will be returned if it CAN be mapped, but no hits are found. */
+ public final int quickMap(final Read r, final byte[] basesM){
+ final AbstractIndex index=index();
+ byte[] basesP=r.bases;
+ basesAtQuickmap+=basesP.length;
+ if(basesP.length=KEYLEN);
+
+ if(PERFECTMODE || SEMIPERFECTMODE){//Imperfect reads cannot map perfectly.
+ if(r.containsUndefined()){return-1;}
+ }else if(DISCARD_MOSTLY_UNDEFINED_READS){
+ int n=r.countUndefined();
+ if(n>25 && basesP.length-n=200){
+ keyDen3=maxKeyDensity-0.5f;
+ }else{
+ keyDen3=maxKeyDensity-0.003333333333f*(basesP.length-50); //0.003333... = 0.5/150
+ }
+
+ keyDen3=Tools.max(keyDensity, keyDen3);
+
+ if(GENERATE_KEY_SCORES_FROM_QUALITY){
+ QualityTools.makeKeyProbs(r.quality, KEYLEN, keyProbs);
+
+ boolean offsetsMode3=true;
+ if(offsetsMode3){
+ offsets=KeyRing.makeOffsets3(keyProbs, r.bases.length, KEYLEN, keyDen2, keyDen3, 2, (PERFECTMODE || SEMIPERFECTMODE));
+ }else{
+ //Old version; usually worse.
+ offsets=KeyRing.makeOffsets2(keyProbs, r.bases.length, KEYLEN, keyDen2, keyDen3, 2);
+ int numKeys=(offsets==null ? 0 : offsets.length+1);
+ int maxRounds=0;//(PERFECTMODE || SEMIPERFECTMODE) ? 0 : 9999;//(numKeys)/2;
+ while(maxRounds>0 && offsets!=null && offsets.length0.50f){return -1;} //Default .5f; higher gives more false positives, lower gives more false negatives
+ if(verbose){System.err.println("Prob all errors = "+probAllErrors+"\n\n");}
+ }else{
+ Arrays.fill(keyScoresP, BASE_KEY_HIT_SCORE);
+ }
+ if(verbose){System.err.println("Made key scores: "+Arrays.toString(keyScoresP));}
+
+ keysUsed+=offsets.length;
+ int maxScore=index.maxScore(offsets, baseScoresP, keyScoresP, basesP.length, true);
+ if(verbose){System.err.println("Max Score: "+maxScore);}
+ assert(maxScore>0);
+
+ ArrayList list=index.findAdvanced(basesP, basesM, r.quality, baseScoresP, keyScoresP, offsets, r.numericID);
+ if(verbose){System.err.println("list: "+list);}
+
+ r.sites=list;
+ removeOutOfBounds(r, DONT_OUTPUT_UNMAPPED_READS, (outStream!=null && outStream.SAM), EXPECTED_LEN_LIMIT);
+ assert(Read.CHECKSITES(list, r.bases, basesM, r.numericID));
+ if(FORBID_SELF_MAPPING){forbidSelfMapping(list, r.originalSite);}
+
+ if(list==null || list.isEmpty()){
+ r.sites=null;
+ }else{
+ r.sites=list;
+ if(!SLOW_ALIGN && AbstractIndex.USE_AFFINE_SCORE){
+ for(SiteScore ss : list){ss.slowScore=ss.quickScore;}
+ }
+ }
+// assert(r.list!=null); //Less efficient, but easier to code later.
+
+ return maxScore;
+ }
+
+
+ /**
+ * Returns number of scores of at least maxImperfectSwScore.
+ * If problems are encountered such that it is prudent to do slow-alignment, a number lower than 1 will be returned.
+ */
+ final int scoreNoIndels(final Read r, final byte[] basesP, final byte[] basesM, final int maxSwScore, final int maxImperfectSwScore){
+
+ if(!SLOW_ALIGN || r.numSites()==0){return 0;}
+
+ int numPerfectScores=0;
+ int numNearPerfectScores=0;
+ int bestScoreNoIndel=Integer.MIN_VALUE;
+
+ boolean forceSlow=false;
+
+ for(int j=0; j=maxImperfectSwScore && ss.stop-ss.start+1!=bases.length){
+ int slowScoreNoIndel2=msa.scoreNoIndels(bases, cha.array, ss.stop-bases.length+1, null);
+ if(slowScoreNoIndel2>=maxImperfectSwScore){
+ slowScoreNoIndel=slowScoreNoIndel2;
+ ss.start=ss.stop-bases.length+1;
+ ss.setPerfect(bases);
+ }
+ }
+
+ ss.slowScore=slowScoreNoIndel;
+ ss.score=slowScoreNoIndel;
+
+ if(slowScoreNoIndel>=maxImperfectSwScore){
+ if(verbose){System.err.print("C3");}
+ numNearPerfectScores++;
+
+ ss.stop=ss.start+bases.length-1;
+ ss.gaps=null;
+ if(slowScoreNoIndel>=maxSwScore){
+ if(verbose){System.err.print("C4");}
+ assert(slowScoreNoIndel==maxSwScore) : slowScoreNoIndel+">"+maxSwScore;
+ numPerfectScores++;
+ ss.perfect=ss.semiperfect=true;
+ }else{
+ if(verbose){System.err.print("C5");}
+ assert(!ss.perfect);
+ ss.setPerfect(bases);
+ assert(!ss.perfect);
+ }
+ if(QUICK_MATCH_STRINGS && !ss.perfect && (PRINT_SECONDARY_ALIGNMENTS || slowScoreNoIndel>=bestScoreNoIndel)){
+ ss.match=msa.genMatchNoIndels(bases, cha.array, ss.start);
+ }
+ }else if(oldScore>=maxImperfectSwScore){
+ if(verbose){System.err.print("C6");}
+ forceSlow=true;
+ }
+ }
+
+ if(verbose){System.err.print("\nto "+ss+"\n");}
+
+ bestScoreNoIndel=Tools.max(ss.slowScore, bestScoreNoIndel);
+// assert(CHECKSITE(ss, bases));
+ }
+ return (forceSlow ? -numNearPerfectScores : numNearPerfectScores);
+ }
+
+// @Deprecated
+// /** Assumes list is sorted */
+// public final void genMatchString_old(final Read r, final byte[] basesP, final byte[] basesM, final int maxImperfectSwScore, final int maxSwScore, boolean setSSScore, final boolean recur){
+// assert(Read.CHECKSITES(r, basesM));
+// assert(checkTopSite(r));
+// assert(r.mate!=null || r.list==null || r.list.size()==0 || r.list.get(0).score==r.mapScore) : "\n"+r.toText(false)+"\n"; //Came from BBMapAcc; not sure if it is correct
+// assert(msa!=null);
+// if(r.list==null || r.list.isEmpty()){
+// r.chrom=-1;
+// assert(r.mate!=null || r.list==null || r.list.size()==0 || r.list.get(0).score==r.mapScore) : "\n"+r.toText(false)+"\n";
+// return;
+// }
+//
+// final SiteScore ss=r.list.get(0);
+// assert(r.start==ss.start);
+// assert(r.stop==ss.stop);
+// assert(r.chrom==ss.chrom);
+// assert(r.strand()==ss.strand);
+//
+// final int minMsaLimit;
+// if(PAIRED){
+//// minMsaLimit=-100+(int)(MINIMUM_ALIGNMENT_SCORE_RATIO_PRE_RESCUE*maxSwScore);
+// minMsaLimit=-1+(int)(MINIMUM_ALIGNMENT_SCORE_RATIO_PAIRED*maxSwScore);
+//// minMsaLimit=0;
+// }else{
+// minMsaLimit=-1+(int)(MINIMUM_ALIGNMENT_SCORE_RATIO*maxSwScore);
+//// minMsaLimit=0;
+// }
+//
+// if(GEN_MATCH_FAST){
+//// r.start=ss.start;
+//// r.stop=ss.stop;
+//// r.chrom=ss.chrom;
+//// r.strand=ss.strand;
+//
+// assert(!(SLOW_ALIGN || AbstractIndex.USE_EXTENDED_SCORE) || AbstractIndex.GENERATE_BASE_SCORES_FROM_QUALITY ||
+// (ss.slowScore==maxSwScore) == r.perfect()) :
+// r.bases.length+", "+ss.toText()+", "+maxSwScore+", "+ss.slowScore+", "+r.perfect();
+//
+// //TODO: This WAS disabled because I saw a read marked perfect with a sub in it, probably with quality 0 at that point.
+// if((SLOW_ALIGN || AbstractIndex.USE_EXTENDED_SCORE) && r.perfect()){
+// assert(r.stop-r.start==(r.bases.length-1));
+// r.match=ss.match=makePerfectMatchString(r.bases.length);
+// assert(ss.isPerfect(ss.plus() ? basesP : basesM)) : r; //TODO: Slow assertion
+// assert(Read.CHECKSITES(r, basesM));
+// assert(checkTopSite(r)); // TODO remove this
+// }else
+// {
+// int oldScore=ss.slowScore;
+// assert(r.start==ss.start && r.stop==ss.stop);
+// assert(r.gaps==null || r.gaps[0]==r.start && r.gaps[r.gaps.length-1]==r.stop);
+// int padding=(ss.perfect || ss.semiperfect ? 0 : Tools.max(SLOW_ALIGN_PADDING, 6));
+//
+// if(verbose){System.err.println("Attempting to realign read:\n"+r+"\npadding="+padding+"\nrescued="+r.rescued());}
+//
+// TranslateColorspaceRead.realign_new(r, msa, padding, true, minMsaLimit, MAX_INDEL<1); //Also generates the match string
+// r.gaps=ss.gaps=GapTools.fixGaps(r.start, r.stop, r.gaps, Shared.MINGAP);
+//
+// if(verbose){System.err.println("Realigned read:\n"+r+"\npadding="+padding+"\nrescued="+r.rescued());}
+// assert(Read.CHECKSITES(r, basesM)); //***123
+// assert(ss==r.list.get(0)) : "Site order changed";
+//
+// if(r.mapScore0 ? 80 : 20)+SLOW_ALIGN_PADDING;
+// int expectedLen=GapTools.calcGrefLen(r.start, r.stop, r.gaps); //TODO Gaps should be correct here!!!
+// int remaining=(msa.maxColumns-expectedLen-2);
+// extra=Tools.max(0, Tools.min(remaining/2, extra));
+// TranslateColorspaceRead.realign_new(r, msa, extra, true, minMsaLimit, false);
+// r.gaps=ss.gaps=GapTools.fixGaps(r.start, r.stop, r.gaps, Shared.MINGAP);
+// assert(Read.CHECKSITES(r, basesM));
+//
+// if(verbose){
+// System.err.println(" -> "+r.start+","+r.stop+","+r.mapScore+
+// (r.originalSite==null ? "" : "\t*"+r.originalSite)+"\t(extra = "+extra+")");
+// }
+// }
+// if(verbose){System.err.println("---- B ----");}
+// assert(ss==r.list.get(0)) : "Site order changed";
+// ss.match=r.match;
+//
+// //TODO: This is new, make sure it does not break anything (Note: It did, but should be fixed now)
+// assert(Read.CHECKSITES(r, basesM));
+// {
+// ss.slowScore=r.mapScore;
+// if(setSSScore){ss.score=r.mapScore;}
+// assert(r.mate!=null || r.list==null || r.list.size()==0 || r.list.get(0).score==r.mapScore) : "\n"+r.toText(false)+"\n";
+// if(ss.start!=r.start || ss.stop!=r.stop){
+// if(verbose){
+// System.err.println("---- C ----");
+// System.err.println(ss);
+// System.err.println(r.list.get(0));
+// System.err.println(r.start+","+r.stop+","+r.mapScore);
+// }
+// ss.start=r.start;
+// ss.stop=r.stop;
+// ss.match=r.match;
+// if(!AMBIGUOUS_RANDOM || !r.ambiguous()){
+// if(verbose){
+// System.err.println("---- D ----");
+// System.err.println(ss);
+// System.err.println(r.list.get(0));
+// System.err.println(r.start+","+r.stop+","+r.mapScore);
+// }
+// assert(ss==r.list.get(0)) : "Site order changed\n"+ss+"\n"+r.list.get(0)+"\n"; assert(checkTopSite(r)); // TODO remove this
+// if(!r.paired()){
+// Tools.mergeDuplicateSites(r.list, false, false);
+// Collections.sort(r.list);
+// final SiteScore ss2=r.list.get(0);
+// if(ss!=ss2){//Fixes a super rare case
+// ss.setPerfect(ss.plus() ? basesP : basesM, false);
+//// System.err.println("**********************\n\nCalled setPerfect on "+ss+"\tp="+ss.perfect+", sp="+ss.semiperfect);
+//// assert(Read.CHECKSITE(ss, ss.plus() ? basesP : basesM, r.numericID));
+//// System.err.println("INDEX = "+r.list.indexOf(ss));
+//// ss2.setPerfect(r.bases, false);
+//// assert(Read.CHECKSITE(ss2, ss2.plus() ? basesP : basesM, r.numericID));
+// r.setFromSite(ss2);
+//// System.err.println("**********************\n\nCalled setPerfect on "+ss+"\tp="+ss.perfect+", sp="+ss.semiperfect);
+//// assert(Read.CHECKSITE(ss, ss.plus() ? basesP : basesM, r.numericID));
+//// assert(Read.CHECKSITES(r, basesM));
+// genMatchString(r, basesP, basesM, maxImperfectSwScore, maxSwScore, setSSScore, recur);
+//// r.setPerfectFlag(maxSwScore);
+// assert(checkTopSite(r));//124
+// assert(Read.CHECKSITES(r, basesM));//124
+// return;
+// }
+// }else{
+// for(int i=r.list.size()-1; i>0; i--){
+// if(ss.positionalMatch(r.list.get(i), true)){r.list.remove(i);}
+// }
+// }
+// }
+// assert(ss==r.list.get(0)) : "Site order changed\n"+ss+"\n"+r.list.get(0)+"\n";
+// assert(checkTopSite(r)); // TODO remove this
+// }
+// assert(ss==r.list.get(0)) : "Site order changed\n"+ss+"\n"+r.list.get(0)+"\n";
+// assert(checkTopSite(r)); // TODO remove this
+// if(verbose){
+// System.err.println("---- D2 ----");
+// System.err.println(ss);
+// System.err.println(r.list.get(0));
+// System.err.println(r.start+","+r.stop+","+r.mapScore);
+// }
+// }
+// assert(ss==r.list.get(0)) : "Site order changed";
+// assert(checkTopSite(r)); // TODO remove this
+// if(verbose){
+// System.err.println("---- D3 ----");
+// System.err.println(ss);
+// System.err.println(r.list.get(0));
+// System.err.println(r.start+","+r.stop+","+r.mapScore);
+// System.err.println("Checking perfect status: r.perfect="+r.perfect()+", ss.perfect="+ss.perfect+", ss.semi="+ss.semiperfect+
+// ", maxSwScore="+maxSwScore+", r.score="+r.mapScore+", ss.slowScore="+ss.slowScore+"\n"+r);
+// }
+// r.setPerfectFlag(maxSwScore);
+// if(verbose){
+// System.err.println("---- E ----");
+// System.err.println("Checking perfect status: r.perfect="+r.perfect()+", ss.perfect="+ss.perfect+", ss.semi="+ss.semiperfect+
+// ", maxSwScore="+maxSwScore+", r.score="+r.mapScore+", ss.slowScore="+ss.slowScore+"\n"+r);
+// }
+// assert(r.match==ss.match) : r.perfect()+", "+ss.perfect+", "+ss.semiperfect+"\n"+new String(r.match)+"\n"+new String(ss.match);
+// if(r.perfect()){
+// ss.perfect=ss.semiperfect=true;
+// assert(Read.CHECKSITES(r, basesM)) : r.perfect()+", "+ss.perfect+", "+ss.semiperfect+"\n"+new String(r.match)+"\n"+new String(ss.match); //***123
+// }else{
+// final byte[] bases=(ss.plus() ? basesP : basesM);
+//// if(r.match!=null && r.containsNonNM()){
+//// ss.perfect=ss.semiperfect=false; //This should be fine, but failed when a match string contained X.
+// if(r.match!=null && r.containsSDI()){
+// ss.perfect=ss.semiperfect=false;
+//// ss.setPerfect(bases, false);
+//// r.setPerfect(ss.perfect);
+// assert(Read.CHECKSITES(r, basesM)) : r.perfect()+", "+ss.perfect+", "+ss.semiperfect+"\n"+new String(r.match)+"\n"+new String(ss.match); //***123
+// }else{
+// //rare
+// ss.setPerfect(bases, false);
+// r.setPerfect(ss.perfect);
+// assert(Read.CHECKSITES(r, basesM)) : r.perfect()+", "+ss.perfect+", "+ss.semiperfect+"\n"+new String(r.match)+"\n"+new String(ss.match); //***123
+// }
+// }
+//// assert(Read.CHECKSITES(r, basesM)) : r.perfect()+", "+ss.perfect+", "+ss.semiperfect; //***123
+// assert(checkTopSite(r)); // TODO remove this
+// assert(r.perfect()==ss.perfect);
+// assert(!r.perfect() || r.stop-r.start==(r.bases.length-1));
+// if(verbose){
+// System.err.println("Checking perfect status: r.perfect="+r.perfect()+", ss.perfect="+ss.perfect+", ss.semi="+ss.semiperfect+
+// ", maxSwScore="+maxSwScore+", r.score="+r.mapScore+", ss.slowScore="+ss.slowScore+"\n"+r);
+// }
+// }
+// }else{
+// if(verbose){System.err.println("---- F ----");}
+// byte[] bases=(ss.plus() ? basesP : basesM);
+// // int[] swscoreArray=msa.fillAndScore(bases, ss, 0);
+//
+// if(r.perfect()){
+// r.match=makePerfectMatchString(r.bases.length);
+// }else{
+// ChromosomeArray cha=Data.getChromosome(ss.chrom);
+// assert(false) : "TODO: This does not take strand into account";
+// if(ss.slowScore>=maxImperfectSwScore){
+// //TODO
+// }
+//
+// if(msa!=null){
+// assert(false) : "0 is not good here; try a non-indel match string.";
+// int[] max=msa.fillLimited(bases, cha.array, ss.start, ss.stop, 0, ss.gaps);
+// // System.err.print("*");
+// r.match=msa.traceback(bases, cha.array, ss.start, ss.stop, max[0], max[1], max[2], ss.gaps!=null);
+// }
+// }
+// }
+// if(verbose){System.err.println("---- G ----");}
+//
+// assert(Read.CHECKSITES(r, basesM)); //***123
+// assert(checkTopSite(r)); // TODO remove this
+// if((!AMBIGUOUS_RANDOM || !r.ambiguous()) && recur && r.list.get(0)!=ss){
+// r.setFromTopSite();
+// genMatchString(r, basesP, basesM, maxImperfectSwScore, maxSwScore, setSSScore, recur);
+// assert(checkTopSite(r)); // TODO remove this
+// }else{
+//
+// //Corrects a mysterious bug encountered with paired reads, in which semiperfect reads are not flagged semiperfect.
+// //TODO: Find out reason for this and correct it, then disable this block.
+// if(verbose){
+// System.err.println("Checking perfect status: r.perfect="+r.perfect()+", ss.perfect="+ss.perfect+", ss.semi="+ss.semiperfect+
+// ", maxSwScore="+maxSwScore+", r.score="+r.mapScore+", ss.slowScore="+ss.slowScore+"\n"+r);
+// }
+// assert(Read.CHECKSITES(r, basesM));//***123
+// assert(checkTopSite(r)); // TODO remove this
+// if(!r.perfect()){
+// if(verbose){System.err.println("Correcting perfect status");}
+// if(r.mate!=null && r.list!=null && r.list.size()>0){
+// SiteScore ss2=r.list.get(0);
+// if(verbose){System.err.println("Checking perfect status2: ss2.perfect="+ss2.perfect+", ss2.semi="+ss2.semiperfect+"\nss="+ss+"\nss2="+ss2);}
+// byte[] bases=(ss2.plus() ? basesP : basesM);
+// ss2.setPerfect(bases, false);
+// r.setPerfect(ss2.perfect);
+// if(verbose){System.err.println("New perfect status: r.perfect="+r.perfect()+", ss2.perfect="+ss2.perfect+", ss2.semi="+ss2.semiperfect);}
+// assert(Read.CHECKSITE(ss2, bases, r.numericID));
+// assert(checkTopSite(r)); // TODO remove this
+// }
+// }
+// assert(Read.CHECKSITES(r, basesM));
+// assert(checkTopSite(r)); // TODO remove this
+// }
+// assert(checkTopSite(r)); // TODO remove this
+// }
+
+ /** Assumes list is sorted */
+ public final void genMatchString(final Read r, final byte[] basesP, final byte[] basesM, final int maxImperfectSwScore, final int maxSwScore, boolean setSSScore, final boolean recur){
+ if(verbose){System.err.println("\n\n\n\n\ngenMatchString for read\n"+r+"\n\n\n\n\n");}
+ assert(Read.CHECKSITES(r, basesM));
+ assert(checkTopSite(r));
+
+ assert(r.mate!=null || r.numSites()==0 || r.topSite().score==r.mapScore) : "\n"+r.toText(false)+"\n"; //Came from BBMapAcc; not sure if it is correct
+ assert(msa!=null);
+ if(r.numSites()==0){
+ r.chrom=-1;
+ assert(r.mate!=null || r.numSites()==0 || r.topSite().score==r.mapScore) : "\n"+r.toText(false)+"\n";
+ return;
+ }
+
+ if(PRINT_SECONDARY_ALIGNMENTS){
+ capSiteList(r, MAX_SITESCORES_TO_PRINT+3, PRINT_SECONDARY_ALIGNMENTS);
+ }
+
+ if(QUICK_MATCH_STRINGS && PRINT_SECONDARY_ALIGNMENTS && USE_SS_MATCH_FOR_PRIMARY){} //TODO What was this line for?
+
+ int best=Integer.MIN_VALUE;
+ int scoreChanged=0;
+
+ for(int i=0; i0){
+ if(best>=ss.slowScore && !PRINT_SECONDARY_ALIGNMENTS){
+ if(verbose){System.err.println("break triggered by low score");}
+ break;
+ }
+ }
+
+ int oldScore=ss.slowScore;
+ if(ss.match==null || (i==0 && !USE_SS_MATCH_FOR_PRIMARY)){
+ genMatchStringForSite(r.numericID, ss, basesP, basesM, maxImperfectSwScore, maxSwScore, r.mate);
+ if(setSSScore){ss.score=ss.slowScore;}
+ }
+ if(i>0 && ss.match==null && !r.paired()){r.sites.remove(i);}
+ else{
+ if(oldScore!=ss.slowScore){scoreChanged++;}
+ best=Tools.max(ss.slowScore, best);
+ }
+
+ if(verbose){System.err.println("**************** best="+best+", scoreChanged="+scoreChanged+"\nconsidered ss "+ss);}
+ }
+
+ if(verbose){System.err.println("Finished basic match generation. best="+best+", scoreChanged="+scoreChanged+", AMBIGUOUS_RANDOM="+AMBIGUOUS_RANDOM+", ambiguous="+r.ambiguous());}
+ if(scoreChanged>0 && (!AMBIGUOUS_RANDOM || !r.ambiguous())){
+ if(!r.paired()){
+ if(verbose){System.err.println("GMS 1");}
+ Tools.mergeDuplicateSites(r.sites, false, false);
+ Collections.sort(r.sites);
+ int prevScore=0;
+ for(int i=0; i0 && ss.match==null){r.sites.remove(i);}
+ i--;
+ }
+ if(i>0 || !PRINT_SECONDARY_ALIGNMENTS){
+ if(verbose){System.err.println("GMS 4");}
+ break;
+ }
+ }
+ }else{
+ if(verbose){System.err.println("GMS 5");}
+ SiteScore ss=r.topSite();
+ for(int i=r.sites.size()-1; i>0; i--){
+ if(verbose){System.err.println("GMS 6");}
+ if(ss.positionalMatch(r.sites.get(i), true)){r.sites.remove(i);}
+ }
+ }
+ }
+
+
+ final SiteScore ss=r.topSite();
+ assert(ss==r.topSite());
+
+// assert(ss.slowScore>0) : ss.slowScore+", "+best+", "+r.mapScore;
+
+ r.start=ss.start;
+ r.stop=ss.stop;
+ r.chrom=ss.chrom;
+ r.setStrand(ss.strand);
+ r.match=ss.match;
+ r.gaps=ss.gaps;
+ r.mapScore=ss.slowScore;
+ r.setPerfect(ss.perfect());
+ r.setRescued(ss.rescued());
+
+ assert(Read.CHECKSITES(r, basesM));
+ assert(checkTopSite(r));
+
+// assert(false) : r.numericID+", "+ss.slowScore+", "+r.mapScore;
+ }
+
+
+ protected final int genMatchStringForSite(final long id, final SiteScore ss, final byte[] basesP, final byte[] basesM, final int maxImperfectSwScore, final int maxSwScore, final Read mate){
+ final byte[] bases=ss.plus() ? basesP : basesM;
+ assert(Read.CHECKSITE(ss, bases, id));
+ assert(msa!=null);
+
+
+ final int minMsaLimit;
+ if(PAIRED){
+// minMsaLimit=-100+(int)(MINIMUM_ALIGNMENT_SCORE_RATIO_PRE_RESCUE*maxSwScore);
+ minMsaLimit=-1+(int)(MINIMUM_ALIGNMENT_SCORE_RATIO_PAIRED*maxSwScore);
+// minMsaLimit=0;
+ }else{
+ minMsaLimit=-1+(int)(MINIMUM_ALIGNMENT_SCORE_RATIO*maxSwScore);
+// minMsaLimit=0;
+ }
+
+ if(GEN_MATCH_FAST){
+
+ assert(!(SLOW_ALIGN || AbstractIndex.USE_EXTENDED_SCORE) || AbstractIndex.GENERATE_BASE_SCORES_FROM_QUALITY ||
+ (ss.slowScore==maxSwScore) == ss.perfect()) :
+ bases.length+", "+ss.toText()+", "+maxSwScore+", "+ss.slowScore+", "+ss.perfect()+", "+ss.semiperfect();
+
+ //TODO: This WAS disabled because I saw a read marked perfect with a sub in it, probably with quality 0 at that point.
+ if((SLOW_ALIGN || AbstractIndex.USE_EXTENDED_SCORE) && ss.perfect()){
+ assert(ss.stop-ss.start==(bases.length-1));
+ ss.match=makePerfectMatchString(bases.length);
+ assert(ss.isPerfect(bases)) : id+", "+ss; //TODO: Slow assertion
+ }else{
+ int oldScore=ss.slowScore;
+ assert(ss.gaps==null || ss.gaps[0]==ss.start && ss.gaps[ss.gaps.length-1]==ss.stop);
+ int padding=(ss.perfect || ss.semiperfect ? 0 : Tools.max(SLOW_ALIGN_PADDING, 6));
+
+ if(verbose){System.err.println("Attempting to realign read:\n"+id+", "+ss+"\npadding="+padding+"\nrescued="+ss.rescued());}
+
+ TranslateColorspaceRead.realign_new(ss, bases, msa, padding, true, minMsaLimit, MAX_INDEL<1, id); //Also generates the match string
+ ss.gaps=GapTools.fixGaps(ss.start, ss.stop, ss.gaps, Shared.MINGAP);
+
+ if(verbose){System.err.println("Realigned read:\n"+id+", "+ss+"\npadding="+padding+"\nrescued="+ss.rescued());}
+ assert(Read.CHECKSITE(ss, bases, id));
+
+ if(ss.slowScore"+ss.slowScore);
+ }
+
+ int extra=(MAX_INDEL>0 ? 80 : 20)+SLOW_ALIGN_PADDING;
+ int expectedLen=GapTools.calcGrefLen(ss.start, ss.stop, ss.gaps); //TODO Gaps should be correct here!!!
+ int remaining=(msa.maxColumns-expectedLen-2);
+ extra=Tools.max(0, Tools.min(remaining/2, extra));
+ TranslateColorspaceRead.realign_new(ss, bases, msa, extra, true, minMsaLimit, false, id);
+ ss.gaps=GapTools.fixGaps(ss.start, ss.stop, ss.gaps, Shared.MINGAP);
+ assert(Read.CHECKSITE(ss, bases, id));
+
+ if(verbose){
+ System.err.println("\n-> "+ss.start+","+ss.stop+","+ss.slowScore+
+ /*(r.originalSite==null ? "" : "\t*"+r.originalSite)+*/"\t(extra = "+extra+")");
+ }
+ }
+ if(verbose){System.err.println("---- B ----");}
+ assert(Read.CHECKSITE(ss, bases, id));
+
+ if(verbose){
+ System.err.println("---- D3 ----");
+ System.err.println(ss);
+ System.err.println("Checking perfect status: ss.perfect="+ss.perfect()+", ss.semi="+ss.semiperfect()+
+ ", maxSwScore="+maxSwScore+", ss.slowScore="+ss.slowScore);
+ }
+ ss.setPerfectFlag(maxSwScore, bases);
+ if(verbose){
+ System.err.println("---- E ----");
+ System.err.println("Checking perfect status: ss.perfect="+ss.perfect()+", ss.semi="+ss.semiperfect()+
+ ", maxSwScore="+maxSwScore+", ss.slowScore="+ss.slowScore);
+ }
+
+ assert(Read.CHECKSITE(ss, bases, id));
+ }
+ }else{
+ if(verbose){System.err.println("---- F ----");}
+ ChromosomeArray cha=Data.getChromosome(ss.chrom);
+
+ if(ss.perfect()){
+ ss.match=makePerfectMatchString(bases.length);
+ }else{
+ assert(false) : "TODO: This does not take strand into account";
+ if(ss.slowScore>=maxImperfectSwScore){
+ //TODO
+ }
+
+ if(msa!=null){
+ assert(false) : "0 is not good here; try a non-indel match string.";
+ int[] max=msa.fillLimited(bases, cha.array, ss.start, ss.stop, 0, ss.gaps);
+ // System.err.print("*");
+ ss.match=msa.traceback(bases, cha.array, ss.start, ss.stop, max[0], max[1], max[2], ss.gaps!=null);
+ }
+ }
+ }
+ if(verbose){System.err.println("---- G ----");}
+
+ assert(Read.CHECKSITE(ss, bases, id));
+ return ss.slowScore;
+ }
+
+
+
+ /** Returns the number of additional bases away that should be searched for slow align.
+ * This should probably be called between quickMap and slowAlign, only on
+ * sites where stop-start<=bases.length-1 */
+ final void findTipDeletions(final Read r, final byte[] basesP, final byte[] basesM, final int maxSwScore, final int maxImperfectScore){
+
+ boolean findRight=r.quality==null || (r.minQualityLastNBases(TIP_DELETION_MAX_TIPLEN)>=TIP_DELETION_MIN_QUALITY &&
+ r.avgQualityLastNBases(TIP_DELETION_MAX_TIPLEN)>=TIP_DELETION_AVG_QUALITY);
+ boolean findLeft=r.quality==null || (r.minQualityFirstNBases(TIP_DELETION_MAX_TIPLEN)>=TIP_DELETION_MIN_QUALITY &&
+ r.avgQualityFirstNBases(TIP_DELETION_MAX_TIPLEN)>=TIP_DELETION_AVG_QUALITY);
+ if(!findRight && !findLeft){
+// System.err.print(".");
+ return;
+ }
+// System.err.print("*");
+
+ for(SiteScore ss : r.sites){
+ final byte[] bases=(ss.strand==Gene.PLUS ? basesP : basesM);
+ if(!ss.semiperfect && ss.slowScore=maxImperfectScore /*&& ss.stop-ss.start<=basesP.length-1*/){return false;}
+ assert(lookRight || lookLeft);
+ assert(TIP_DELETION_MAX_TIPLEN>2);
+ if(bases.length<=2*TIP_DELETION_MAX_TIPLEN){return false;}
+ assert(TIP_DELETION_MAX_TIPLEN0);
+
+ int maxSearch=TIP_DELETION_SEARCH_RANGE;
+ maxSearch=Tools.min(maxSearch, ALIGN_COLUMNS-(SLOW_RESCUE_PADDING+8+Tools.max(bases.length, ss.stop-ss.start)));
+ if(maxSearch<1){return false;}
+
+ boolean changed=false;
+
+ if(lookRight){
+ int x=findTipDeletionsRight(bases, ss.chrom, ss.stop, maxSearch, TIP_DELETION_MAX_TIPLEN);
+ if(x>0){
+ assert(x+ss.stop-ss.start0){
+ assert(y+ss.stop-ss.start(anchor.sites.size());
+ }
+
+ final int maxLooseSwScore=msa.maxQuality(basesP.length);
+ final int maxAnchorSwScore=msa.maxQuality(anchor.bases.length);
+ final int maxImperfectScore=msa.maxImperfectScore(basesP.length);
+
+ final int bestLooseScore=loose.sites.isEmpty() ? 0 : loose.topSite().slowScore;
+ final int bestAnchorScore=anchor.topSite().slowScore;
+
+ if(bestLooseScore==maxLooseSwScore && bestAnchorScore==maxAnchorSwScore
+ && anchor.topSite().pairedScore>0){return;}
+
+ int rescueScoreLimit=(int)(0.95f*bestAnchorScore);
+// int retainScoreLimit=(int)(bestLooseScore>0 ? 0.58f*bestLooseScore : 0.58f*maxLooseSwScore);
+ int retainScoreLimit=Tools.max((int)(0.68f*bestLooseScore), (int)(0.4f*maxLooseSwScore));
+ int retainScoreLimit2=Tools.max((int)(0.95f*bestLooseScore), (int)(0.55f*maxLooseSwScore));
+ final int maxMismatches=PERFECTMODE ? 0 : (bestLooseScore>maxImperfectScore) ? 5 : (int)(0.60f*basesP.length-1); //Higher number is more lenient
+ assert(PERFECTMODE || maxMismatches>1 || loose.bases.length<16) : loose; //Added the <16 qualifier when a 4bp read failed this assertion
+
+ final boolean findTipDeletions=FIND_TIP_DELETIONS && bestLooseScore=TIP_DELETION_MIN_QUALITY
+ && loose.avgQualityLastNBases(TIP_DELETION_MAX_TIPLEN)>=TIP_DELETION_AVG_QUALITY);
+ final boolean findLeft=findTipDeletions && loose.quality==null || (loose.minQualityFirstNBases(TIP_DELETION_MAX_TIPLEN)>=TIP_DELETION_MIN_QUALITY
+ && loose.avgQualityFirstNBases(TIP_DELETION_MAX_TIPLEN)>=TIP_DELETION_AVG_QUALITY);
+
+// int searchIntoAnchor=Tools.max(20, Tools.min(anchor.bases.length, loose.bases.length));
+ for(SiteScore ssa : anchor.sites){
+ if(ssa.slowScoreretainScoreLimit && ss.isInBounds()){
+ if(ss.score>retainScoreLimit2){//Set them as paired to make them more resistant to being discarded
+ ss.pairedScore=Tools.max(ss.pairedScore, ss.slowScore+ssa.slowScore/4);
+ ssa.pairedScore=Tools.max(ssa.pairedScore, ssa.slowScore+ss.slowScore/4);
+ assert(ss.pairedScore>0);
+ assert(ssa.pairedScore>0);
+ }
+ loose.sites.add(ss);
+ }
+ }
+ }
+ }else{
+ assert(ssa.pairedScore>0);
+ assert(ssa.pairedScore>ssa.quickScore || ssa.pairedScore>ssa.slowScore) : ssa.toText();
+ }
+ }
+ }
+
+
+ final void slowRescue(final byte[] bases, SiteScore ss, final int maxScore, final int maxImperfectScore,
+ boolean findTipDeletionsRight, boolean findTipDeletionsLeft){
+
+ int swscoreNoIndel=msa.scoreNoIndels(bases, ss.chrom, ss.start);
+ final int oldStart=ss.start;
+
+ if(swscoreNoIndel0){
+ ss.slowScore=swscoreNoIndel;
+ if(findTipDeletionsRight || findTipDeletionsLeft){
+ boolean changed=findTipDeletions(ss, bases, maxImperfectScore, findTipDeletionsRight, findTipDeletionsLeft);
+ if(changed){
+ ss.match=null;
+ swscoreNoIndel=msa.scoreNoIndels(bases, ss.chrom, ss.start);
+ }
+ }
+
+ final int minMsaLimit=-CLEARZONE1e+(int)(MINIMUM_ALIGNMENT_SCORE_RATIO_PAIRED*maxScore);
+
+ final int minscore=Tools.max(swscoreNoIndel, minMsaLimit);
+ final int[] swscoreArray=msa.fillAndScoreLimited(bases, ss.chrom, ss.start, ss.stop, SLOW_RESCUE_PADDING, minscore, ss.gaps);
+
+ if(swscoreArray!=null){
+ ss.slowScore=ss.score=swscoreArray[0];
+ ss.start=swscoreArray[1];
+ ss.stop=swscoreArray[2];
+
+ if(QUICK_MATCH_STRINGS && swscoreArray!=null && swscoreArray.length==6 && swscoreArray[0]>=minscore && (PRINT_SECONDARY_ALIGNMENTS || USE_SS_MATCH_FOR_PRIMARY)){
+ assert(swscoreArray.length==6) : swscoreArray.length;
+ assert(swscoreArray[0]>=minscore) : "\n"+Arrays.toString(swscoreArray)+"\n"+minscore;
+ ss.match=msa.traceback(bases, Data.getChromosome(ss.chrom).array, ss.start-SLOW_RESCUE_PADDING, ss.stop+SLOW_RESCUE_PADDING,
+ swscoreArray[3], swscoreArray[4], swscoreArray[5], ss.gaps!=null);
+ ss.fixXY(bases, true, msa);
+ }else{ss.match=null;}
+
+ }else{
+ ss.slowScore=ss.score=swscoreNoIndel;
+ ss.start=oldStart;
+ ss.stop=ss.start+bases.length-1;
+ }
+ }else{
+ ss.slowScore=ss.score=swscoreNoIndel;
+ ss.stop=ss.start+bases.length-1;
+ }
+ ss.pairedScore=ss.score+1;
+ assert(ss.slowScore<=maxScore);
+ ss.perfect=(ss.slowScore==maxScore);
+ if(ss.perfect){ss.semiperfect=true;}
+ else{ss.setPerfect(bases);}
+ }
+
+
+ protected static final void capSiteList(Read r, int cap, boolean printSecondary){
+ if(r==null || r.sites==null || cap<0){return;}
+ if(cap==0){r.sites=null;}
+ else{
+ for(int i=r.sites.size()-1; i>=cap; i--){r.sites.remove(i);}
+ }
+ if(!printSecondary || r.numSites()<2){return;}
+ int max=r.topSite().slowScore;
+ int min=Tools.min(max-500, (int)(max*.95f));
+ for(int i=r.sites.size()-1; i>0; i--){
+ if(r.sites.get(i).slowScore0; i--){
+ SiteScore ss2=r.sites.get(i);
+ if(ss1.chrom==ss2.chrom && ss1.strand==ss2.strand && ss1.start==ss2.start && ss1.stop==ss2.stop){
+ if(!Shared.anomaly){
+// Shared.anomaly=true;
+// System.err.println("Ignoring anomalous duplicate site: "+"\n"+r.toText(false)+(r.mate==null ? "" : "\n"+r.mate.toText(false))+"\n");
+ System.err.println("Ignoring anomalous duplicate site for rid="+r.numericID);
+// new Exception().printStackTrace(System.err);
+ }
+ r.sites.remove(i);
+ x++;
+ }else{break;}
+ }
+ return x;
+ }
+
+ protected final void removeUnmapped(ArrayList list){
+ for(int i=0; i list){
+ for(int i=0; i list){
+ for(int i=0; i list, boolean retainPaired, int maxScore, boolean specialCasePerfect, int minSitesToRetain, int maxSitesToRetain);
+
+ public final int trimListAdvanced(ArrayList list, boolean retainPaired, boolean retainSemiperfect, int maxScore, boolean specialCasePerfect,
+ int minSitesToRetain, int maxSitesToRetain, boolean indexUsesExtendedScore, float thresh){
+ if(list==null || list.size()==0){return -99999;}
+ if(list.size()==1){return list.get(0).score;}
+
+ final int highestScore;
+ if(indexUsesExtendedScore){
+
+ highestScore=Tools.trimSiteList(list, .6f, retainPaired, true, minSitesToRetain, maxSitesToRetain);
+ if(highestScore==maxScore && specialCasePerfect){
+ Tools.trimSiteList(list, .94f, retainPaired, true, minSitesToRetain, maxSitesToRetain);
+ if(list.size()>8){Tools.trimSiteList(list, .99f, retainPaired, true, minSitesToRetain, maxSitesToRetain);}
+ return highestScore;
+ }
+
+ }else{
+ highestScore=Tools.trimSiteList(list, .4f, retainPaired, true, minSitesToRetain, maxSitesToRetain);
+ thresh=thresh*0.5f;
+ }
+
+ int lim, lastScore=list.get(0).score;
+ long area=lastScore;
+ for(lim=1; lim list, final byte[] basesP, final byte[] basesM,
+ final int maxSwScore, final int maxImperfectSwScore);
+
+ /** This is only for saving ambiguous xy which is now irrelevant */
+ public final boolean processAmbiguous(ArrayList list, boolean primary, boolean removeAmbiguous, int clearzone, boolean save_xy){
+ if(!save_xy){return true;}
+ assert(false) : "Needs to be redone with contig names.";
+
+ assert(list.size()>1);
+ boolean ambiguous=true;
+// if(save_xy && minChrom<=24 && maxChrom>=24){
+// int best=list.get(0).score;
+//
+// //Remove everything outside of the clearzone
+// for(int i=list.size()-1; i>0; i--){
+// assert(best>=list.get(i).score);
+// if(best-list.get(i).score>clearzone){
+//// assert(i>1); //No longer true because of clearzone/clearzone2
+// list.remove(i);
+// }else{
+//// assert(i>0); //Maybe no longer true because of clearzone/clearzone2
+// break;
+// }
+// }
+//
+//
+// assert(list.size()>1);
+// int Xcount=0;
+// int Ycount=0;
+// for(SiteScore ss : list){
+// assert(ss.score-list.get(0).score<=clearzone);
+// if(ss.chrom==23){
+// Xcount++;
+// }else if(ss.chrom==24){
+// Ycount++;
+// }
+// }
+// if(Xcount>1 || Ycount>2 || (Xcount+Ycount)0; i--){list.remove(i);}
+// assert(list.size()==1);
+// }
+// }
+ assert(list.size()>=1);
+
+ if(ambiguous){
+ assert(list.size()>1);
+ if(removeAmbiguous){
+ list.clear();
+ }
+ }
+
+ return ambiguous;
+ }
+
+
+ public abstract void calcStatistics1(final Read r, final int maxSwScore, final int maxPossibleQuickScore);
+
+
+ public abstract void calcStatistics2(final Read r, final int maxSwScore, final int maxPossibleQuickScore);
+
+// /** Assumes list is sorted */
+// public abstract void genMatchString(final Read r, final byte[] basesP, final byte[] basesM, final int maxImperfectSwScore, final int maxSwScore, boolean setSSScore, boolean recur);
+
+ public abstract void processRead(Read r, final byte[] basesM);
+
+ @Deprecated
+ protected final boolean applyClearzone3_old(Read r, int CLEARZONE3, float INV_CLEARZONE3){
+
+ assert(!r.paired()); //This is currently for unpaired reads
+ if(!r.mapped() || r.ambiguous() || r.discarded() || r.numSites()<2){return false;}
+
+ final int score1=r.topSite().slowScore;
+ final int score2=r.sites.get(1).slowScore;
+ final int score3=(r.sites.size()>2 ? r.sites.get(2).slowScore : -1);
+ int dif=score1-score2;
+
+ assert(r.mapScore==score1) : r.mapScore+", "+r.topSite().toText();
+
+ assert(score1==r.mapScore);
+ assert(score1>=score2) : "\n"+r.topSite().toText()+"\t<\t"+r.sites.get(1).toText()+"\n"+r.toText(false)+"\n";
+ if(dif>=CLEARZONE3){return false;}
+
+// final int dif2=40+(CLEARZONE3-dif)/3;
+// final int dif2=(CLEARZONE3-dif)/2;
+ int dif2=(CLEARZONE3-dif);
+
+ float f=dif2*INV_CLEARZONE3;
+
+ int sub=(dif2+2*(int)(f*dif2));
+
+ if(score3!=-1){
+ assert(score1>=score3);
+ dif=score1-score3;
+ assert(score1>=score3);
+ if(dif0;
+ }
+
+
+ protected final boolean applyClearzone3(Read r, int CLEARZONE3, float INV_CLEARZONE3){
+
+ assert(!r.paired()); //This is currently for unpaired reads
+ final ArrayList list=r.sites;
+ if(!r.mapped() || r.ambiguous() || r.discarded() || list==null || list.size()<2){return false;}
+
+ final int score1=list.get(0).slowScore;
+ assert(r.mapScore==score1) : r.mapScore+", "+list.get(0).toText()+"\n"+r;
+
+ float sub=0;
+ final int max=Tools.min(CZ3_MULTS.length, list.size());
+ for(int i=1; i2 && ss2.slowScore=CLEARZONE3){break;}
+// int dif2=(CLEARZONE3-dif);
+// float f=dif2*INV_CLEARZONE3;
+// sub+=(dif2+2*(f*dif2))*CZ3_MULTS[i];
+ float f=calcCZ3_fraction(score1, ss2.slowScore, CLEARZONE3, INV_CLEARZONE3);
+ if(f<=0){break;}
+ sub+=(f*CZ3_MULTS[i]);
+ }
+ }
+ assert(sub>=0);
+ if(sub<=0){return false;}
+
+ float sub2;
+// float asymptote=8f+0.0267f*r.bases.length;
+ float asymptote=4f+0.03f*r.bases.length;
+ sub=sub*1.8f;
+ sub2=CLEARZONE3*((asymptote*sub)/(sub+asymptote));
+// sub2=CLEARZONE3*sub;
+// System.out.println("sub="+sub+", sub2="+sub2+", CLEARZONE3="+CLEARZONE3+", (5*sub)="+(5*sub)+", (sub+5*CLEARZONE3)="+(sub+5*CLEARZONE3));
+ int subi=(int)(sub2+0.5f);
+ if(subi>=r.mapScore-300){
+ subi=r.mapScore-300;
+ }
+ if(subi<=0){return false;}
+
+ for(SiteScore ss : list){
+ ss.score-=subi;
+ ss.slowScore-=subi;
+ }
+ r.mapScore-=subi;
+ assert(r.mapScore>200);
+ return true;
+ }
+
+
+// protected float calcCZ3(int score1, int score2, int CLEARZONE3, float INV_CLEARZONE3){
+//
+// int dif=score1-score2;
+// if(dif>=CLEARZONE3){return 0;}
+// //Now dif is between 0 and CZ3
+//
+//// final int dif2=40+(CLEARZONE3-dif)/3;
+//// final int dif2=(CLEARZONE3-dif)/2;
+// int dif2=(CLEARZONE3-dif); //dif2 is higher if the scores are closer.
+//
+// float f=dif2*INV_CLEARZONE3; //f ranges linearly from 1 (if the scores are identical) to 0 (when score2 is maximally below score1)
+//
+// float f2=f*f;
+// float f7=(float)Math.pow(f, .7);
+//
+//// return (dif2+2f*f*dif2+2f*Tools.min(f2,0.5f)*dif2);
+// return (CLEARZONE3*f7+2f*f*dif2+2f*Tools.min(f2,0.5f)*dif2);
+// }
+
+
+ protected float calcCZ3_fraction(int score1, int score2, int CLEARZONE3, float INV_CLEARZONE3){
+
+ int dif=score1-score2;
+ if(dif>=CLEARZONE3){return 0;}
+ //Now dif is between 0 and CZ3
+
+// final int dif2=40+(CLEARZONE3-dif)/3;
+// final int dif2=(CLEARZONE3-dif)/2;
+ int dif2=(CLEARZONE3-dif); //dif2 is higher if the scores are closer.
+
+ float f=dif2*INV_CLEARZONE3; //f ranges linearly from 1 (if the scores are identical) to 0 (when score2 is maximally below score1)
+
+ float f2=f*f;
+// float f7=(float)Math.pow(f, .7);
+
+// return (dif2+2f*f*dif2+2f*Tools.min(f2,0.5f)*dif2);
+ return f+2f*f2+2f*f2*f;
+ }
+
+ /** Returns number of perfect pairs */
+ public abstract int pairSiteScoresInitial(Read r, Read r2, boolean trim);
+
+
+
+
+
+ protected static void pairSiteScoresFinal(Read r, Read r2, boolean trim, boolean setScore, int MAX_PAIR_DIST, int AVERAGE_PAIR_DIST,
+ boolean SAME_STRAND_PAIRS, boolean REQUIRE_CORRECT_STRANDS_PAIRS, int maxTrimSitesToRetain){
+
+ if(r.sites!=null){
+ for(SiteScore ss : r.sites){ss.pairedScore=0;}
+ }
+ if(r2.sites!=null){
+ for(SiteScore ss : r2.sites){ss.pairedScore=0;}
+ }
+
+ if(r.numSites()<1 || r2.numSites()<1){return;}
+
+ SiteScore.PCOMP.sort(r.sites);
+ SiteScore.PCOMP.sort(r2.sites);
+
+ int maxPairedScore1=-1;
+ int maxPairedScore2=-1;
+
+
+// if(verbose){
+// System.out.println(r.list.size()+", "+r2.list.size());
+// System.out.println();
+// for(SiteScore ss : r.list){
+// System.out.println(ss.toText());
+// }
+// System.out.println();
+// for(SiteScore ss : r2.list){
+// System.out.println(ss.toText());
+// }
+// System.out.println();
+// }
+
+ final float mult1=Tools.min(1/2f, Tools.max(1/4f, (r.bases.length/(4f*r2.bases.length))));
+ final float mult2=Tools.min(1/2f, Tools.max(1/4f, (r2.bases.length/(4f*r.bases.length))));
+
+ final int ilimit=r.sites.size()-1;
+ final int jlimit=r2.sites.size()-1;
+
+ final int outerDistLimit=(Tools.max(r.bases.length, r2.bases.length)*OUTER_DIST_MULT)/OUTER_DIST_DIV; //Minimum pairing distance
+ final int expectedFragLength=AVERAGE_PAIR_DIST+r.bases.length+r2.bases.length;
+
+ if(verboseS){
+ System.err.println("************************** PAIRING ********************************");
+ System.err.println("outerDistLimit="+outerDistLimit+", MAX_PAIR_DIST="+MAX_PAIR_DIST);
+ }
+
+ for(int i=0, j=0; i<=ilimit && j<=jlimit; i++){
+ SiteScore ss1=r.sites.get(i);
+ SiteScore ss2=r2.sites.get(j);
+
+ while(jMAX_PAIR_DIST))){
+ j++;
+// if(verbose){System.err.println("a.Incrementing j->"+j);}
+ ss2=r2.sites.get(j);
+ }
+
+ for(int k=j; k<=jlimit; k++){
+ ss2=r2.sites.get(k);
+
+ if(verboseS){
+ System.err.println("Considering sites:\n"+ss1+"\n"+ss2);
+ }
+
+ if(ss2.chrom>ss1.chrom){break;}
+ // if(verbose){System.err.println("Same chrom");}
+ if(ss2.start-ss1.stop>MAX_PAIR_DIST){break;}
+
+ final int innerdist;
+ final int outerdist;
+
+ //assert(!SAME_STRAND_PAIRS) : "TODO";
+
+ if(REQUIRE_CORRECT_STRANDS_PAIRS){
+ if(ss1.strand!=ss2.strand){
+ if(ss1.strand==Gene.PLUS){
+ innerdist=ss2.start-ss1.stop;
+ outerdist=ss2.stop-ss1.start;
+ }else{
+ innerdist=ss1.start-ss2.stop;
+ outerdist=ss1.stop-ss2.start;
+ }
+ }else{
+ if(ss1.start<=ss2.start){
+ innerdist=ss2.start-ss1.stop;
+ outerdist=ss2.stop-ss1.start;
+ }else{
+ innerdist=ss1.start-ss2.stop;
+ outerdist=ss1.stop-ss2.start;
+ }
+ }
+ }else{
+ if(ss1.start<=ss2.start){
+ innerdist=ss2.start-ss1.stop;
+ outerdist=ss2.stop-ss1.start;
+ }else{
+ innerdist=ss1.start-ss2.stop;
+ outerdist=ss1.stop-ss2.start;
+ }
+ }
+
+ if(verboseS){
+ System.err.println("innerdist="+innerdist+", outerdist="+outerdist);
+ }
+
+// if(ss1.start<=ss2.start){
+// innerdist=ss2.start-ss1.stop;
+// outerdist=ss2.stop-ss1.start;
+// }else{
+// innerdist=ss1.start-ss2.stop;
+// outerdist=ss1.stop-ss2.start;
+// }
+ assert(outerdist>=innerdist) : "outerdist=outerDistLimit && innerdist<=MAX_PAIR_DIST){
+
+ boolean strandOK=((ss1.strand==ss2.strand)==SAME_STRAND_PAIRS);
+ // if(verbose){System.err.println("strandOK="+strandOK);}
+
+ if(strandOK || !REQUIRE_CORRECT_STRANDS_PAIRS){
+
+ int deviation=absdif(AVERAGE_PAIR_DIST, innerdist);
+
+ final int pairedScore1;
+ final int pairedScore2;
+ if(strandOK){
+ // pairedScore1=ss1.score+(int)(ss2.score*mult1);
+ // pairedScore2=ss2.score+(int)(ss1.score*mult2);
+
+ pairedScore1=ss1.score+1+
+ Tools.max(1, (int)(ss2.score*mult1)-(((deviation)*ss2.score)/Tools.max(100,(10*expectedFragLength+100))));
+ pairedScore2=ss2.score+1+
+ Tools.max(1, (int)(ss1.score*mult2)-(((deviation)*ss1.score)/Tools.max(100,(10*expectedFragLength+100))));
+
+
+ }else{//e.g. a junction
+ pairedScore1=ss1.score+ss2.score/16;
+ pairedScore2=ss2.score+ss1.score/16;
+ }
+
+ if(verboseS){
+ System.err.println("strandOK="+strandOK+"\tpairedScore1="+pairedScore1+", pairedScore2="+pairedScore2);
+ System.err.println(" \tscore1="+ss1.score+", score2="+ss2.score);
+ }
+
+ ss1.pairedScore=Tools.max(ss1.pairedScore, pairedScore1);
+ ss2.pairedScore=Tools.max(ss2.pairedScore, pairedScore2);
+ maxPairedScore1=Tools.max(ss1.score, maxPairedScore1);
+ maxPairedScore2=Tools.max(ss2.score, maxPairedScore2);
+ // if(verbose){System.err.println("Paired:\nss1="+ss1.toText()+", ss2="+ss2.toText());}
+ }
+ }else{
+ // if(verbose){System.err.println("Out of range");}
+ }
+ }
+ // if(verbose){System.err.println("\nss1="+ss1.toText()+", ss2="+ss2.toText());}
+
+ }
+
+ if(setScore){
+ for(SiteScore ss : r.sites){
+ if(ss.pairedScore>ss.score){ss.score=ss.pairedScore;}
+ else{assert(ss.pairedScore==0);}
+ }
+ for(SiteScore ss : r2.sites){
+ if(ss.pairedScore>ss.score){ss.score=ss.pairedScore;}
+ else{assert(ss.pairedScore==0);}
+ }
+ }
+
+ if(trim){
+// Tools.trimSitesBelowCutoffInplace(r.list, (int)(maxPairedScore1*.95f), false);
+// Tools.trimSitesBelowCutoffInplace(r2.list, (int)(maxPairedScore2*.95f), false);
+ Tools.trimSitesBelowCutoff(r.sites, (int)(maxPairedScore1*.95f), false, true, 1, maxTrimSitesToRetain);
+ Tools.trimSitesBelowCutoff(r2.sites, (int)(maxPairedScore2*.95f), false, true, 1, maxTrimSitesToRetain);
+ }
+ }
+
+ protected final boolean canPair(SiteScore ss1, SiteScore ss2, int len1, int len2,
+ boolean REQUIRE_CORRECT_STRANDS_PAIRS, boolean SAME_STRAND_PAIRS, int MAX_PAIR_DIST){
+ if(ss1.chrom!=ss2.chrom){return false;}
+ if(REQUIRE_CORRECT_STRANDS_PAIRS){
+ boolean strandOK=((ss1.strand==ss2.strand)==SAME_STRAND_PAIRS);
+ if(!strandOK){return false;}
+ }
+// int dist=0;
+//
+// if(ss1.start<=ss2.start){
+// dist=ss2.start-ss1.stop;
+// }else if(ss1.start>ss2.start){
+// dist=ss1.start-ss2.stop;
+// }
+//
+// return (dist>=MIN_PAIR_DIST && dist<=MAX_PAIR_DIST);
+
+// final int outerDistLimit=MIN_PAIR_DIST+len1+len2;
+// final int outerDistLimit=(Tools.max(len1, len2)*(OUTER_DIST_MULT2))/OUTER_DIST_DIV;
+ final int outerDistLimit=(Tools.max(len1, len2)*(OUTER_DIST_MULT))/OUTER_DIST_DIV;
+ int innerdist=0;
+ int outerdist=0;
+
+ if(verboseS){
+ System.err.println("canPair: outerDistLimit="+outerDistLimit);
+ }
+
+// if(ss1.start<=ss2.start){
+// innerdist=ss2.start-ss1.stop;
+// outerdist=ss2.stop-ss1.start;
+// }else if(ss1.start>ss2.start){
+// innerdist=ss1.start-ss2.stop;
+// outerdist=ss1.stop-ss2.start;
+// }
+// assert(outerdist>=innerdist);
+
+ //assert(!SAME_STRAND_PAIRS) : "TODO";
+
+ if(REQUIRE_CORRECT_STRANDS_PAIRS){
+ if(ss1.strand!=ss2.strand){
+ if(ss1.strand==Gene.PLUS){
+ innerdist=ss2.start-ss1.stop;
+ outerdist=ss2.stop-ss1.start;
+ }else{
+ innerdist=ss1.start-ss2.stop;
+ outerdist=ss1.stop-ss2.start;
+ }
+ }else{
+ if(ss1.start<=ss2.start){
+ innerdist=ss2.start-ss1.stop;
+ outerdist=ss2.stop-ss1.start;
+ }else{
+ innerdist=ss1.start-ss2.stop;
+ outerdist=ss1.stop-ss2.start;
+ }
+ }
+ }else{
+ if(ss1.start<=ss2.start){
+ innerdist=ss2.start-ss1.stop;
+ outerdist=ss2.stop-ss1.start;
+ }else{
+ innerdist=ss1.start-ss2.stop;
+ outerdist=ss1.stop-ss2.start;
+ }
+ }
+
+ return (outerdist>=outerDistLimit && innerdist<=MAX_PAIR_DIST);
+ }
+
+
+// /** Returns the number of additional bases away that should be searched for slow align.
+// * This should probably be called between quickMap and slowAlign, only on
+// * sites where stop-start<=bases.length-1 */
+// public abstract void findTipDeletions(final Read r, final byte[] basesP, final byte[] basesM, final int maxSwScore, final int maxImperfectScore);
+//
+// public abstract boolean findTipDeletions(SiteScore ss, final byte[] bases, final int maxImperfectScore, boolean lookRight, boolean lookLeft);
+
+
+ /** Returns the number of additional bases away that should be searched for slow align.
+ * This should probably be called between quickMap and slowAlign, only on
+ * sites where stop-start<=bases.length-1 */
+ protected final int findTipDeletionsRight(final byte[] bases, final int chrom,
+ int originalStop, int searchDist, int tiplen){
+ ChromosomeArray cha=Data.getChromosome(chrom);
+ byte[] ref=cha.array;
+ if(originalStop1);
+ if(tiplen<4){return 0;}
+// System.err.println("Tiplen="+tiplen+", mismatches="+originalMismatches);
+// System.err.print("* ");
+
+ searchDist=Tools.min(searchDist, 30*originalMismatches);
+ int lastIndexToStart=Tools.min(ref.length-1, originalStop+searchDist);
+ for(int start=originalStop+1; start<=lastIndexToStart && minMismatches>0; start++){
+// System.err.print("_");
+ int mismatches=0;
+ for(int j=0; j2 || originalMismatches-minMismatches<2){
+ return 0;
+ }
+// System.err.println(" $$$ ");
+ return bestStart-originalStop;
+ }
+
+
+ /** Returns the number of additional bases away that should be searched for slow align.
+ * This should probably be called between quickMap and slowAlign, only on
+ * sites where stop-start<=bases.length-1 */
+ protected final int findTipDeletionsLeft(final byte[] bases, final int chrom,
+ final int originalStart, int searchDist, int tiplen){
+ ChromosomeArray cha=Data.getChromosome(chrom);
+ byte[] ref=cha.array;
+ if(originalStart+tiplen>=ref.length){return 0;} //fail
+
+ if(cha.minIndex>=originalStart){return 0;} //fail
+
+ int minMismatches=tiplen;
+ int bestStart=originalStart;
+
+ int lastMismatch=0;
+ int originalMismatches=0;
+ int contig=0;
+ for(int i=0; i1);
+ if(tiplen<4){return 0;}
+// System.err.println("Tiplen="+tiplen+", mismatches="+originalMismatches);
+// System.err.print("* ");
+
+ searchDist=Tools.min(searchDist, 16+16*originalMismatches+8*tiplen);
+ int lastIndexToStart=Tools.max(cha.minIndex, originalStart-searchDist);
+ for(int start=originalStart-1; start>=lastIndexToStart && minMismatches>0; start--){
+// System.err.print("_");
+ int mismatches=0;
+ for(int j=0; j2 || originalMismatches-minMismatches<2){
+ return 0;
+ }
+// System.err.println(" $$$ ");
+ return originalStart-bestStart;
+ }
+
+
+// public abstract void rescue(Read anchor, Read loose, byte[] basesP, byte[] basesM, int searchDist);
+
+
+// public abstract void slowRescue(final byte[] bases, SiteScore ss, final int maxScore, final int maxImperfectScore,
+// boolean findTipDeletionsRight, boolean findTipDeletionsLeft);
+
+
+ /** Assumes bases/colors are already on the correct strand */
+ public final SiteScore quickRescue(final byte[] bases, final int chrom, final byte strand, final int loc, final int searchDist,
+ final boolean searchRight, final int idealStart, final int maxAllowedMismatches, int POINTS_MATCH, int POINTS_MATCH2){
+ if(bases==null || bases.length<10){return null;}
+ ChromosomeArray cha=Data.getChromosome(chrom);
+ byte[] ref=cha.array;
+
+ int lowerBound, upperBound;
+ if(searchRight){
+ lowerBound=Tools.max(cha.minIndex, loc);
+ upperBound=Tools.min(ref.length-bases.length, loc+searchDist);
+ }else{
+ lowerBound=Tools.max(cha.minIndex, loc-searchDist);
+ upperBound=Tools.min(ref.length-bases.length, loc);
+ }
+
+// int minMismatches=(int)(bases.length*.6f); //Default: .75f. Lower numbers are faster with lower quality.
+ int minMismatches=maxAllowedMismatches+1;
+ //For situations like RNASEQ with lots of deletions, a higher value of at least .75 should be used.
+
+ int maxContigMatches=0;
+ int bestScore=0;
+ int bestStart=-1;
+ int bestAbsdif=Integer.MAX_VALUE;
+
+ if(searchRight){
+ for(int start=lowerBound; start<=upperBound/* && minMismatches>0*/; start++){
+ int mismatches=0;
+ int contig=0;
+ int currentContig=0;
+ for(int j=0; jbestScore || (score==bestScore && absdif=start && lowerBound<=start);
+// assert(upperBound>=idealStart);
+// assert(lowerBound<=idealStart);
+ }
+ }
+ }else{
+ for(int start=upperBound; start>=lowerBound/* && minMismatches>0*/; start--){
+ int mismatches=0;
+ int contig=0;
+ int currentContig=0;
+ for(int j=0; jbestScore || (score==bestScore && absdif=start && lowerBound<=start);
+// assert(upperBound>=idealStart);
+// assert(lowerBound<=idealStart);
+ }
+ }
+ }
+
+ if(bestStart<0){return null;}
+
+ //These scores are dummies and will not quite match the normally generated scores.
+ final int scoreOut;
+ if(USE_AFFINE_SCORE){
+ scoreOut=POINTS_MATCH+(POINTS_MATCH2*(bases.length-1-minMismatches));
+ }else{
+ scoreOut=maxContigMatches+(BASE_HIT_SCORE*(bases.length-minMismatches));
+ }
+
+ SiteScore ss=new SiteScore(chrom, strand, bestStart, bestStart+bases.length-1, 0, scoreOut);
+ ss.setPerfect(bases);
+ ss.rescued=true;
+ ss.slowScore=minMismatches; //TODO: Clear this field later!
+ return ss;
+ }
+
+
+ /** Assumes bases/colors are already on the correct strand */
+ protected final int[] quickerRescue(final byte[] bases, final int chrom, int loc, final int searchDist){
+ ChromosomeArray cha=Data.getChromosome(chrom);
+ byte[] ref=cha.array;
+ if(loc0; start++){
+ int mismatches=0;
+ for(int j=0; j ssl=r.sites;
+ if(ssl==null){return 0;}
+ int initial=ssl.size();
+ for(int i=0; imax){
+ ssl.remove(i);
+ i--;
+ ss=null;
+ }else if(/*DONT_OUTPUT_UNMAPPED_READS && */SAM_OUT){
+ if(!Data.isSingleScaffold(ss.chrom, ss.start, ss.stop)){
+ //TODO: Attempt to trim instead of removing
+ ssl.remove(i);
+ i--;
+ ss=null;
+ }
+ }
+ if(ss!=null){
+ int expectedLen=GapTools.calcGrefLen(ss);
+ if(expectedLen>=EXPECTED_LEN_LIMIT){
+ //TODO: Alternately, I could kill the site.
+ ss.stop=ss.start+Tools.min(r.bases.length+40, EXPECTED_LEN_LIMIT);
+ if(ss.gaps!=null){GapTools.fixGaps(ss);}
+ }
+ }
+ }
+
+// System.out.println("Estimated greflen: "+GapTools.calcGrefLen(r.start, r.stop, r.gaps));
+// assert(false);
+
+ return initial-ssl.size();
+ }
+
+ protected static final int forbidSelfMapping(ArrayList ssl, SiteScore original){
+// assert(original!=null);
+ if(ssl==null || ssl.isEmpty() || original==null){return 0;}
+ int removed=0;
+ for(int i=0; i0){Tools.condenseStrict(ssl);}
+ return removed;
+ }
+
+
+ /** Generate a score penalty based on the presence of errors near the read tips. */
+ public static int calcTipScorePenalty(final Read r, final int maxScore, final int tiplen){
+ if(!r.mapped() || r.match==null || r.bases.length<2*tiplen){return 0;}
+
+ int points=0;
+ final byte[] match=r.match;
+ final byte[] bases=r.bases;
+ final int last=r.bases.length-1;
+ byte prev='m';
+ for(int i=0, cpos=0; cpos<=tiplen; i++){
+ byte b=match[i];
+ if(b=='m'){
+ cpos++;
+ }else if(b=='D'){
+ if(prev!='D'){points+=2*(tiplen+2-cpos);}
+ }else if(b=='N' || b=='C'){
+ points+=(tiplen+2-cpos);
+ cpos++;
+ }else{
+ if(Character.isDigit(b)){
+ r.match=Read.toLongMatchString(r.match);
+ return calcTipScorePenalty(r, maxScore, tiplen);
+ }
+ assert(b=='I' || b=='S') : ((char)b)+"\n"+new String(match)+"\n"+new String(bases)+"\n";
+ points+=2*(tiplen+2-cpos);
+ cpos++;
+ }
+ prev=b;
+ }
+
+ prev='m';
+ for(int i=match.length-1, cpos=0; cpos<=tiplen; i--){
+ byte b=match[i];
+ if(b=='m'){
+ cpos++;
+ }else if(b=='D'){
+ if(prev!='D'){points+=2*(tiplen+2-cpos);}
+ }else if(b=='N' || b=='C'){
+ points+=(tiplen+2-cpos);
+ cpos++;
+ }else{
+ assert(b=='I' || b=='S');
+ points+=2*(tiplen+2-cpos);
+ cpos++;
+ }
+ prev=b;
+ }
+
+ byte b=bases[0];
+ //homopolymer tip penalty
+ if(b!='N' && b==bases[1]){
+ for(int i=2; i<=tiplen && bases[i]==b; i++){points++;}
+ }
+
+ //homopolymer tip penalty
+ b=bases[last];
+ if(b!='N' && b==bases[last-1]){
+ for(int i=last-2; i>=(last-tiplen) && bases[i]==b; i--){points++;}
+ }
+
+ //Did not seem to help
+// int hits=r.list.get(0).hits;
+// float desired=Tools.min(6, bases.length/12f);
+// if(hits0){
+ r.mapScore-=penalty;
+ for(SiteScore ss : r.sites){
+ ss.score-=penalty;
+ ss.slowScore-=penalty;
+ ss.pairedScore-=penalty;
+ }
+ }
+ }
+
+
+ /** {group of correct hit (or -1), size of correct group, number of groups,
+ * number of elements, correctScore, maxScore, size of top group, num correct, firstElementCorrect,
+ * firstElementCorrectLoose, firstGroupCorrectLoose} */
+ protected int[] calcCorrectness(Read r, int thresh){
+ //assume sorted.
+ ArrayList ssl=r.sites;
+
+ if(ssl==null || ssl.isEmpty()){
+ return new int[] {-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ }
+
+ SiteScore original=r.originalSite;
+ assert((original==null) != (r.synthetic()));
+ if(original==null){
+ original=ssl.get(0);
+ }
+
+ int group=0;
+ int correctGroup=-1;
+ int groupSize=0;
+ int correctGroupSize=-1;
+ int prevScore=Integer.MAX_VALUE;
+ int sizeOfTopGroup=0;
+ SiteScore correct=null;
+
+ int firstElementCorrect=0;
+ int firstElementCorrectLoose=0;
+ int firstGroupCorrectLoose=0;
+
+ int numCorrect=0;
+
+ for(int i=0; iss.score || (AMBIGUOUS_RANDOM && r.ambiguous()) || r.mate!=null) : "i="+i+", r="+r;
+
+ if(correctGroup==group){
+ correctGroupSize=groupSize;
+ }
+
+ group++;
+ groupSize=0;
+ prevScore=ss.score;
+ }
+ groupSize++;
+
+
+// boolean b=isCorrectHit(ss, original.chrom, original.strand, original.start, 1, thresh);
+ boolean b=isCorrectHit(ss, original.chrom, original.strand, original.start, original.stop, thresh);
+ boolean b2=isCorrectHitLoose(ss, original.chrom, original.strand, original.start, original.stop, thresh+20);
+ if(b){
+ if(i==0){firstElementCorrect=1;}
+ numCorrect++;
+ if(correct==null){
+ correct=ss;
+ correctGroup=group;
+ }
+ }
+ if(b2){
+ if(i==0){firstElementCorrectLoose=1;}
+ if(group==0){firstGroupCorrectLoose=1;}
+ }
+ }
+ if(correctGroup==group){
+ correctGroupSize=groupSize;
+ }
+
+ assert(correctGroup!=0 && correctGroup<=group);
+ assert(group<=ssl.size());
+ assert(sizeOfTopGroup>0 && sizeOfTopGroup<=ssl.size());
+ assert((correctGroup>0) == (correctGroupSize>0));
+ return new int[] {correctGroup, correctGroupSize, group, ssl.size(),
+ correct==null ? 0 : correct.score, ssl.get(0).score, sizeOfTopGroup, numCorrect, firstElementCorrect,
+ firstElementCorrectLoose, firstGroupCorrectLoose};
+ }
+
+
+ public static final boolean isCorrectHit(SiteScore ss, int trueChrom, byte trueStrand, int trueStart, int trueStop, int thresh){
+// boolean b=(ss.chrom==trueChrom && ss.strand==trueStrand);
+ if(ss.chrom!=trueChrom || ss.strand!=trueStrand){return false;}
+
+ assert(ss.stop>ss.start) : ss.toText()+", "+trueStart+", "+trueStop;
+ assert(trueStop>trueStart) : ss.toText()+", "+trueStart+", "+trueStop;
+
+ return (absdif(ss.start, trueStart)<=thresh && absdif(ss.stop, trueStop)<=thresh);
+// return (absdif(ss.start, trueStart)<=thresh || absdif(ss.stop, trueStop)<=thresh);
+
+// if(absdif(ss.start, trueStart)<=thresh){return true;}
+// if(absdif(ss.stop, trueStop)<=thresh){return true;}
+// return false;
+
+// if(absdif(ss.start, trueStart)>thresh){return false;}
+// if(absdif(ss.stop, trueStop)>thresh){return false;}
+// return true;
+ }
+
+
+ public static final boolean isCorrectHitLoose(SiteScore ss, int trueChrom, byte trueStrand, int trueStart, int trueStop, int thresh){
+// boolean b=(ss.chrom==trueChrom && ss.strand==trueStrand);
+ if(ss.chrom!=trueChrom || ss.strand!=trueStrand){return false;}
+
+ assert(ss.stop>ss.start) : ss.toText()+", "+trueStart+", "+trueStop;
+ assert(trueStop>trueStart) : ss.toText()+", "+trueStart+", "+trueStop;
+
+ return (absdif(ss.start, trueStart)<=thresh || absdif(ss.stop, trueStop)<=thresh);
+
+// if(absdif(ss.start, trueStart)<=thresh){return true;}
+// if(absdif(ss.stop, trueStop)<=thresh){return true;}
+// return false;
+
+// if(absdif(ss.start, trueStart)>thresh){return false;}
+// if(absdif(ss.stop, trueStop)>thresh){return false;}
+// return true;
+ }
+
+ protected static final byte[] makePerfectMatchString(int len){
+ byte[] r=new byte[len];
+ Arrays.fill(r, (byte)'m');
+ return r;
+ }
+
+ protected static final int absdif(int a, int b){
+ return a>b ? a-b : b-a;
+ }
+
+ /** Returns maximum read length supported by this mapper */
+ public abstract int maxReadLength();
+
+ /** Ensure top site is congruent with read */
+ protected static final boolean checkTopSite(Read r){
+ if(!r.mapped()){return true;}
+ if(r.numSites()==0){return false;}
+ SiteScore ss=r.topSite();
+ if(ss==null){return false;}
+ boolean b=(ss.start==r.start) && (ss.stop==r.stop) && (ss.strand==r.strand()) && (ss.chrom==r.chrom) && (ss.match==r.match);
+ assert(b) : "\nread="+r+"\nmate="+r.mate+"\nss="+ss+"\n"+(ss==null ? "ss is null" :
+ ((ss.start==r.start)+", "+(ss.stop==r.stop)+", "+(ss.strand==r.strand())+", "+(ss.chrom==r.chrom)+", "+(ss.match==r.match))+"\nlist="+r.sites);
+ return b;
+ }
+
+
+ protected static final int removeLongIndels(ArrayList list, int maxlen){
+ if(list==null || list.size()<1){return 0;}
+ int removed=0;
+ for(int i=list.size()-1; i>=0; i--){
+ SiteScore ss=list.get(i);
+ if(hasLongIndel(ss.match, maxlen)){
+ list.remove(i);
+ removed++;
+ }
+ }
+ return removed;
+ }
+
+ protected static final boolean hasLongIndel(byte[] match, int maxlen){
+ if(match==null || match.lengthmaxlen){return true;}
+ }else{
+ len=0;
+ }
+ prev=b;
+ }
+ return false;
+ }
+
+ /** TODO */
+ final void processReadSplit(Read r, byte[] basesM, int minlen, int maxlen){
+ assert(minlen>=KEYLEN && maxlen>=minlen) : KEYLEN+", "+maxlen+", "+minlen;
+ int len=r.bases==null ? 0 : r.bases.length;
+ if(len<=maxlen){
+ processRead(r, basesM);
+ return;
+ }
+ ArrayList subreads=r.split(minlen, maxlen);
+ }
+
+ public final synchronized boolean finished(){return finished;}
+
+ public final synchronized boolean working(){return !finished;}
+
+ final synchronized void finish(){
+ assert(!finished);
+ finished=true;
+ notifyAll();
+ }
+
+ private boolean finished=false;
+
+ private static final float[] CZ3_MULTS=new float[] {0f, 1f, .75f, 0.5f, 0.25f, 0.125f, 0.0625f};
+
+ /*--------------------------------------------------------------*/
+
+ /** Input read source. */
+ protected final ConcurrentReadStreamInterface cris;
+
+
+ /** All reads go here.
+ * If outputunmapped=false, omit unmapped single reads and double-unmapped paired reads. */
+ protected final RTextOutputStream3 outStream;
+ /** All mapped reads (and half-mapped pairs) go here except reads that only map to the blacklist. */
+ protected final RTextOutputStream3 outStreamMapped;
+ /** All unmapped reads (and double-unmapped pairs) go here. */
+ protected final RTextOutputStream3 outStreamUnmapped;
+ /** All reads (and half-mapped pairs) that map best to the blacklist go here. */
+ protected final RTextOutputStream3 outStreamBlack;
+
+
+ /*--------------------------------------------------------------*/
+
+
+ public final String MSA_TYPE;
+ final MSA msa;
+ final TranslateColorspaceRead tcr;
+ public final ReadStats readstats;
+ public final int POINTS_MATCH, POINTS_MATCH2;
+ public final int KEYLEN;
+
+ protected final boolean PERFECTMODE; //Only look for perfect matches
+ protected final boolean SEMIPERFECTMODE; //Only look for perfect and semiperfect matches
+ protected final boolean FORBID_SELF_MAPPING; //Do not allow reads to map to their official origin. Allows you to find next-best matches (when supported)
+ protected final boolean RCOMP_MATE; //Reverse-complement mate prior to mapping
+ /** True if this thread should generate a match string for the best match */
+ protected final boolean MAKE_MATCH_STRING;
+
+ protected final boolean DONT_OUTPUT_UNMAPPED_READS;
+ protected final boolean DONT_OUTPUT_BLACKLISTED_READS;
+ protected final boolean PRINT_SECONDARY_ALIGNMENTS;
+ protected final boolean QUICK_MATCH_STRINGS;
+ protected final boolean USE_SS_MATCH_FOR_PRIMARY=true;
+
+ protected final int MAX_SITESCORES_TO_PRINT;
+
+ /** Scores below the (max possible alignment score)*(MINIMUM_ALIGNMENT_SCORE_RATIO) will be discarded.
+ * Default: 0.4 for synthetic data. */
+ protected final float MINIMUM_ALIGNMENT_SCORE_RATIO;
+ protected final float MINIMUM_ALIGNMENT_SCORE_RATIO_PRE_RESCUE;
+ protected final float MINIMUM_ALIGNMENT_SCORE_RATIO_PAIRED;
+
+ protected final float keyDensity;
+ protected final float maxKeyDensity;
+ protected final float minKeyDensity;
+ protected final int maxDesiredKeys;
+
+ /*--------------------------------------------------------------*/
+
+ final int CLEARZONE1e;
+
+ /*--------------------------------------------------------------*/
+
+ final int MIN_APPROX_HITS_TO_KEEP;
+ final boolean USE_EXTENDED_SCORE;
+ public static final boolean GENERATE_BASE_SCORES_FROM_QUALITY=AbstractIndex.GENERATE_BASE_SCORES_FROM_QUALITY;
+ final int BASE_HIT_SCORE;
+ final int BASE_KEY_HIT_SCORE;
+ final boolean USE_AFFINE_SCORE;
+ final int EXPECTED_LEN_LIMIT;
+ final int MAX_INDEL;
+
+ final boolean TRIM_LIST;
+ final int TIP_DELETION_SEARCH_RANGE;
+ final boolean FIND_TIP_DELETIONS;
+ final int ALIGN_COLUMNS;
+
+ /*--------------------------------------------------------------*/
+
+
+ /** Deprecated. Must be set to false. Reads and index are in SOLiD colorspace. */
+ protected final boolean colorspace;
+ /** Use dynamic programming slow-alignment phase to increase quality. Program may not run anymore if this is disabled. */
+ protected final boolean SLOW_ALIGN;
+ /** Produce local alignments instead of global alignments */
+ protected final boolean LOCAL_ALIGN;
+ /** Discard reads with ambiguous alignments (consider them unmapped). */
+ protected final boolean AMBIGUOUS_TOSS;
+ /** Choose a random site for reads with ambiguous alignments. */
+ protected final boolean AMBIGUOUS_RANDOM;
+ /** Output all sites for reads with ambiguous alignments. */
+ protected final boolean AMBIGUOUS_ALL;
+ /** Quality-trim left side of reads before mapping. */
+ protected final boolean TRIM_LEFT;
+ /** Quality-trim right side of reads before mapping. */
+ protected final boolean TRIM_RIGHT;
+ /** Undo quality trimming after mapping. */
+ protected final boolean UNTRIM;
+ /** Trim until 2 consecutive bases are encountered with at least this quality. */
+ protected final byte TRIM_QUAL;
+ /** Don't trim reads to be shorter than this */
+ protected final int TRIM_MIN_LENGTH=30;
+ /** Distance cutoff for classifying a read as loosely correct */
+ protected final int THRESH;
+ /** Semi-deprecated. Minimum chrom to index or load. */
+ protected final int minChrom;
+ /** Semi-deprecated. Maximum chrom to index or load. */
+ protected final int maxChrom;
+ /** Disallow sites that do not have at least k consecutive matching bases. */
+ protected final int KFILTER;
+
+
+ /** When reads are not in valid pairing orientation, eliminate (mark unmapped) the lower-scoring read. */
+ protected final boolean KILL_BAD_PAIRS;
+ /** For human genome, map ambiguous reads in the PAR to the X chromosome. */
+ protected final boolean SAVE_AMBIGUOUS_XY;
+ /** Deprecated. Must be set to true. */
+ protected final boolean GEN_MATCH_FAST=true;
+ /** For colorspace reads, translate to base space before outputting them. */
+ protected final boolean translateToBaseSpace;
+
+ /** Padding for dynamic-programming slow alignment. */
+ protected final int SLOW_ALIGN_PADDING;
+ /** Padding for dynamic-programming slow alignment for rescued reads (which typically may need more padding). */
+ protected final int SLOW_RESCUE_PADDING;
+ /** If a site is unpaired, search nearby for a possible site for the other read. */
+ protected final boolean DO_RESCUE;
+ /** Forbid alignments with indels longer than MAX_INDEL */
+ protected final boolean STRICT_MAX_INDEL;
+ /** Bandwidth of banded MSA */
+ protected final int BANDWIDTH;
+
+ protected final boolean PAIRED;
+ protected final boolean REQUIRE_CORRECT_STRANDS_PAIRS;
+ protected final boolean SAME_STRAND_PAIRS;
+
+ /*--------------------------------------------------------------*/
+
+ protected int AVERAGE_PAIR_DIST=100;
+
+ /** Extra padding for when slow alignment fails. */
+ protected int EXTRA_PADDING=10;
+
+ protected final boolean GENERATE_KEY_SCORES_FROM_QUALITY;
+
+ /*--------------------------------------------------------------*/
+
+ protected static boolean CALC_STATISTICS=true;
+ protected static int MIN_PAIR_DIST=-160;
+ protected static int MAX_PAIR_DIST=32000;
+ /** IMPORTANT!!!! This option causes non-deterministic output. */
+ protected static final boolean DYNAMIC_INSERT_LENGTH=true;
+ /** Counts undefined bases. */
+ protected static final boolean DISCARD_MOSTLY_UNDEFINED_READS=true;
+
+ protected static final byte TIP_DELETION_MIN_QUALITY=6;
+ protected static final byte TIP_DELETION_AVG_QUALITY=14;
+ protected static final int TIP_DELETION_MAX_TIPLEN=8;
+
+ protected static final int OUTER_DIST_MULT=14;
+// protected static final int OUTER_DIST_MULT2=OUTER_DIST_MULT-1;
+ protected static final int OUTER_DIST_DIV=32;
+
+ protected static long SKIP_INITIAL=0;
+
+ protected static boolean OUTPUT_PAIRED_ONLY=false;
+
+// static{if(OUTER_DIST_MULT2<1){throw new RuntimeException();}}
+
+ /*--------------------------------------------------------------*/
+
+ public int totalNumCorrect1=0;
+ public int totalNumIncorrect1=0;
+ public int totalNumIncorrectPrior1=0;
+ public int totalNumCapturedAllCorrect1=0;
+ public int totalNumCapturedAllCorrectTop1=0;
+ public int totalNumCapturedAllCorrectOnly1=0;
+
+ public int totalNumCorrect2=0;
+ public int totalNumIncorrect2=0;
+ public int totalNumIncorrectPrior2=0;
+ public int totalNumCapturedAllCorrect2=0;
+ public int totalNumCapturedAllCorrectTop2=0;
+ public int totalNumCapturedAllCorrectOnly2=0;
+
+ /*--------------------------------------------------------------*/
+
+ public boolean verbose=false;
+ public static final boolean verboseS=false;
+
+ public long readsUsed=0;
+ public long readsUsed2=0;
+ public long numMated=0;
+ public long badPairs=0;
+ public long innerLengthSum=0;
+ public long outerLengthSum=0;
+ public long insertSizeSum=0;
+ public long keysUsed=0;
+ public long basesUsed=0; //basesUsed and basesAtQuickmap are identical
+ public long basesAtQuickmap=0; //basesUsed and basesAtQuickmap are identical
+ public long syntheticReads=0;
+
+ public int mapped1=0;
+ public int mappedRetained1=0;
+ public int rescuedP1=0;
+ public int rescuedM1=0;
+ public int truePositiveP1=0;
+ public int truePositiveM1=0;
+ public int falsePositive1=0;
+ public int totalCorrectSites1=0;
+
+ public int firstSiteCorrectP1=0;
+ public int firstSiteCorrectM1=0;
+ public int firstSiteIncorrect1=0;
+ public int firstSiteCorrectLoose1=0;
+ public int firstSiteIncorrectLoose1=0;
+ public int firstSiteCorrectPaired1=0;
+ public int firstSiteCorrectSolo1=0;
+ public int firstSiteCorrectRescued1=0;
+
+ public long matchCountS1=0;
+ public long matchCountI1=0;
+ public long matchCountD1=0;
+ public long matchCountM1=0;
+ public long matchCountN1=0;
+
+
+ public int perfectHit1=0; //Highest quick score is max quick score
+ public int uniqueHit1=0; //Only one hit has highest score
+ public int correctUniqueHit1=0; //unique highest hit on answer site
+ public int correctMultiHit1=0; //non-unique highest hit on answer site
+ public int correctLowHit1=0; //hit on answer site, but not highest scorer
+ public int noHit1=0;
+
+ /** Number of perfect hit sites found */
+ public int perfectHitCount1=0;
+ /** Number of sites found that are perfect except for no-ref */
+ public int semiPerfectHitCount1=0;
+
+
+ public int perfectMatch1=0; //Highest slow score is max slow score
+ public int semiperfectMatch1=0;
+
+ public int ambiguousBestAlignment1=0;
+
+ public long initialSiteSum1=0;
+ public long postTrimSiteSum1=0;
+ public long postRescueSiteSum1=0;
+ public long siteSum1=0;
+ public long topSiteSum1=0;
+
+ public long lowQualityReadsDiscarded1=0;
+
+ public int mapped2=0;
+ public int mappedRetained2=0;
+ public int rescuedP2=0;
+ public int rescuedM2=0;
+ public int truePositiveP2=0;
+ public int truePositiveM2=0;
+ public int falsePositive2=0;
+ public int totalCorrectSites2=0;
+
+ public int firstSiteCorrectP2=0;
+ public int firstSiteCorrectM2=0;
+ public int firstSiteIncorrect2=0;
+ public int firstSiteCorrectLoose2=0;
+ public int firstSiteIncorrectLoose2=0;
+ public int firstSiteCorrectPaired2=0;
+ public int firstSiteCorrectSolo2=0;
+ public int firstSiteCorrectRescued2=0;
+
+ public long matchCountS2=0;
+ public long matchCountI2=0;
+ public long matchCountD2=0;
+ public long matchCountM2=0;
+ public long matchCountN2=0;
+
+ public int perfectHit2=0; //Highest quick score is max quick score
+ public int uniqueHit2=0; //Only one hit has highest score
+ public int correctUniqueHit2=0; //unique highest hit on answer site
+ public int correctMultiHit2=0; //non-unique highest hit on answer site
+ public int correctLowHit2=0; //hit on answer site, but not highest scorer
+ public int noHit2=0;
+
+ /** Number of perfect hit sites found */
+ public int perfectHitCount2=0;
+ /** Number of sites found that are perfect except for no-ref */
+ public int semiPerfectHitCount2=0;
+
+ public int perfectMatch2=0; //Highest slow score is max slow score
+ public int semiperfectMatch2=0;
+
+ public int ambiguousBestAlignment2=0;
+
+ public long initialSiteSum2=0;
+ public long postTrimSiteSum2=0;
+ public long postRescueSiteSum2=0;
+ public long siteSum2=0;
+ public long topSiteSum2=0;
+
+ public long lowQualityReadsDiscarded2=0;
+
+ /*--------------------------------------------------------------*/
+
+ int idmodulo;
+}
diff --git a/current/align2/AbstractMapper.java b/current/align2/AbstractMapper.java
new file mode 100755
index 0000000..75280bd
--- /dev/null
+++ b/current/align2/AbstractMapper.java
@@ -0,0 +1,2476 @@
+package align2;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.lang.Thread.State;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import jgi.CalcTrueQuality;
+
+import stream.ConcurrentGenericReadInputStream;
+import stream.ConcurrentReadInputStream;
+import stream.ConcurrentReadStreamInterface;
+import stream.ConcurrentSolidInputStream;
+import stream.FASTQ;
+import stream.FastaReadInputStream;
+import stream.FastqReadInputStream;
+import stream.RTextInputStream;
+import stream.RTextOutputStream3;
+import stream.RandomReadInputStream;
+import stream.Read;
+import stream.ReadStreamWriter;
+import stream.SamLine;
+import stream.SamReadInputStream;
+import stream.SequentialReadInputStream;
+
+import dna.Data;
+import dna.Timer;
+import fileIO.ByteFile;
+import fileIO.ReadWrite;
+import fileIO.FileFormat;
+
+/**
+ * Abstract superclass created from BBMap variants.
+ * Handles argument parsing, I/O stream initialization and shutdown,
+ * thread management, statistics collection and formatting.
+ * @author Brian Bushnell
+ * @date Oct 15, 2013
+ *
+ */
+public abstract class AbstractMapper {
+
+ public AbstractMapper(String[] args){
+ if(Shared.COMMAND_LINE==null){
+ Shared.COMMAND_LINE=(args==null ? null : args.clone());
+ Shared.BBMAP_CLASS=this.getClass().getName();
+ int x=Shared.BBMAP_CLASS.lastIndexOf('.');
+ if(x>=0){Shared.BBMAP_CLASS=Shared.BBMAP_CLASS.substring(x+1);}
+ }
+ setDefaults();
+ preparse0(args);
+ String[] args2=preparse(args);
+ parse(args2);
+ postparse(args2);
+ setup();
+ }
+
+ void printOptions(){
+ sysout.println("For help, please consult readme.txt or run the shellscript with no parameters.");
+ }
+
+ final void abort(AbstractMapThread[] mtts, String message){
+ closeStreams(cris, rosA, rosM, rosU, rosB);
+ if(mtts!=null){int x=shutDownThreads(mtts, true);}
+ if(message==null){throw new RuntimeException();}
+ throw new RuntimeException(message);
+ }
+
+ /** In megabytes */
+ final void adjustThreadsforMemory(long threadMem){
+ Runtime rt=Runtime.getRuntime();
+ long mmemory=rt.maxMemory()/1000000;
+ long tmemory=rt.totalMemory()/1000000;
+ long fmemory=rt.freeMemory()/1000000;
+ long umemory=tmemory-fmemory;
+ long amemory=mmemory-umemory-40;
+// System.err.println("mmemory="+mmemory+", tmemory="+tmemory+", fmemory="+fmemory+", umemory="+umemory+", amemory="+amemory);
+ int maxThreads=(int)(amemory/threadMem);
+ if(Shared.THREADS>maxThreads){
+ System.err.println("\nMax Memory = "+mmemory+" MB\nAvailable Memory = "+amemory+" MB");
+ if(maxThreads<1){abort(null, "\n\nNot enough memory. Please run on a node with at least "+((long)((umemory+40+threadMem)*1.15))+" MB.\n");}
+ System.err.println("Reducing threads from "+Shared.THREADS+" to "+maxThreads+" due to low system memory.");
+ Shared.THREADS=maxThreads;
+ }
+ }
+
+ abstract void setDefaults();
+
+ abstract String[] preparse(String[] args);
+
+ abstract void postparse(String[] args);
+
+ abstract void setup();
+
+ abstract void loadIndex();
+
+ abstract void processAmbig2();
+
+ abstract void testSpeed(String[] args);
+
+ abstract void setSemiperfectMode();
+
+ abstract void setPerfectMode();
+
+ abstract void printSettings(int k);
+
+ private final void parse(String[] args){
+
+
+ sysout.println("Executing "+getClass().getName()+" "+Arrays.toString(args)+"\n");
+ sysout.println("BBMap version "+Shared.BBMAP_VERSION_STRING);
+
+ if(Tools.parseHelp(args)){
+ printOptions();
+ System.exit(0);
+ }
+
+ Timer t=new Timer();
+ t.start();
+
+ for(int i=0; i1 ? split[1] : null;
+ if("null".equalsIgnoreCase(b)){b=null;}
+// System.err.println("Processing "+arg);
+ if(arg.startsWith("-Xmx") || arg.startsWith("-Xms") || arg.equals("-ea") || arg.equals("-da")){
+ //jvm argument; do nothing
+ }else if(a.equals("printtoerr")){
+ if(Tools.parseBoolean(b)){
+ sysout=System.err;
+ Data.sysout=System.err;
+ }
+ }else if(a.equals("colorspace") || a.equals("cs")){
+ colorspace=Tools.parseBoolean(b);
+ sysout.println("Set colorspace to "+colorspace);
+ }else if(a.equals("path") || a.equals("root")){
+ Data.setPath(b);
+ }else if(a.equals("ref") || a.equals("reference") || a.equals("fasta")){
+ reference=b;
+ }else if(a.equals("in") || a.equals("in1")){
+ in1=b;
+ }else if(a.equals("in2")){
+ in2=b;
+ }else if(a.equals("out")){
+ if(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")){
+ outFile=null;
+ }else{
+ outFile=b;
+// outFile=b.replace('#', '1');
+// outFile2=(b.contains("#") ? b.replace('#', '2') : null);
+ }
+ }else if(a.equals("out1")){
+ outFile=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ if(outFile==null){
+ outFile=null;
+ }
+ }else if(a.equals("out2")){
+ outFile2=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ }else if(a.equals("outm") || a.equals("outm1") || a.equals("outmapped") || a.equals("outmapped1")){
+ outFileM=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ }else if(a.equals("outm2") || a.equals("outmapped2")){
+ outFileM2=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ }else if(a.equals("outu") || a.equals("outu1") || a.equals("outunmapped") || a.equals("outunmapped1")){
+ outFileU=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ }else if(a.equals("outu2") || a.equals("outunmapped2")){
+ outFileU2=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ }else if(a.equals("outb") || a.equals("outb1") || a.equals("outblack") || a.equals("outblack1") || a.equals("outblacklist") || a.equals("outblacklist1")){
+ outFileB=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ }else if(a.equals("outb2") || a.equals("outblack2") || a.equals("outblacklist2")){
+ outFileB2=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ }else if(a.equals("blacklist") && !Data.scaffoldPrefixes){
+ if(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")){blacklist=null;}
+ else{
+ if(blacklist==null){blacklist=new ArrayList();}
+ if(b.indexOf(',')<0 || new File(b).exists()){blacklist.add(b);}
+ else{
+ String[] temp=b.split(",");
+ for(String tmp : temp){blacklist.add(tmp);}
+ }
+ }
+ }else if(a.startsWith("out_") && b!=null){
+ //ignore, it will be processed later
+ }else if(a.equals("qualityhistogram") || a.equals("qualityhist") || a.equals("qhist")){
+ ReadStats.QUAL_HIST_FILE=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ ReadStats.COLLECT_QUALITY_STATS=(ReadStats.QUAL_HIST_FILE!=null);
+ if(ReadStats.COLLECT_QUALITY_STATS){sysout.println("Set quality histogram output to "+ReadStats.QUAL_HIST_FILE);}
+ }else if(a.equals("matchhistogram") || a.equals("matchhist") || a.equals("mhist")){
+ ReadStats.MATCH_HIST_FILE=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ ReadStats.COLLECT_MATCH_STATS=(ReadStats.MATCH_HIST_FILE!=null);
+ if(ReadStats.COLLECT_MATCH_STATS){sysout.println("Set match histogram output to "+ReadStats.MATCH_HIST_FILE);}
+ }else if(a.equals("inserthistogram") || a.equals("inserthist") || a.equals("ihist")){
+ ReadStats.INSERT_HIST_FILE=(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none")) ? null : b;
+ ReadStats.COLLECT_INSERT_STATS=(ReadStats.INSERT_HIST_FILE!=null);
+ if(ReadStats.COLLECT_INSERT_STATS){sysout.println("Set insert size histogram output to "+ReadStats.INSERT_HIST_FILE);}
+ }else if(a.equals("bamscript") || a.equals("bs")){
+ bamscript=b;
+ }else if(a.equals("tuc") || a.equals("touppercase")){
+ Read.TO_UPPER_CASE=Tools.parseBoolean(b);
+ }else if(a.equals("trd") || a.equals("trc") || a.equals("trimreaddescription") || a.equals("trimreaddescriptions")){
+ Shared.TRIM_READ_COMMENTS=Tools.parseBoolean(b);
+ }else if(a.equals("fakequal") || a.equals("fakequality")){
+ if(b==null || b.length()<1){b="f";}
+ if(Character.isLetter(b.charAt(0))){
+ FastaReadInputStream.FAKE_QUALITY=Tools.parseBoolean(b);
+ }else{
+ int x=Integer.parseInt(b);
+ if(x<1){
+ FastaReadInputStream.FAKE_QUALITY=false;
+ }else{
+ FastaReadInputStream.FAKE_QUALITY=true;
+ FastaReadInputStream.FAKE_QUALITY_LEVEL=(byte)Tools.min(x, 50);
+ }
+ }
+ }else if(a.equals("keepnames")){
+ SamLine.KEEP_NAMES=Tools.parseBoolean(b);
+ }else if(a.equals("local")){
+ LOCAL_ALIGN=Tools.parseBoolean(b);
+ }else if(a.equals("idtag")){
+ SamLine.MAKE_IDENTITY_TAG=Tools.parseBoolean(b);
+ }else if(a.equals("inserttag")){
+ SamLine.MAKE_INSERT_TAG=Tools.parseBoolean(b);
+ }else if(a.equals("correctnesstag")){
+ SamLine.MAKE_CORRECTNESS_TAG=Tools.parseBoolean(b);
+ }else if(a.equals("minidentity") || a.equals("minid")){
+ if(b.lastIndexOf('%')==b.length()-1){minid=Double.parseDouble(b.substring(b.length()-1))/100;}
+ else{minid=Double.parseDouble(b);}
+ assert(minid>=0 && minid<=100) : "min identity must be between 0 and 1. Values from 1 to 100 will be assumed percent and divided by 100.";
+ }else if(a.equals("xmtag") || a.equals("xm")){
+ SamLine.MAKE_XM_TAG=Tools.parseBoolean(b);
+ }else if(a.equals("stoptag")){
+ SamLine.MAKE_STOP_TAG=Tools.parseBoolean(b);
+ }else if(a.equals("parsecustom") || a.equals("fastqparsecustom")){
+ FASTQ.PARSE_CUSTOM=Tools.parseBoolean(b);
+ sysout.println("Set FASTQ.PARSE_CUSTOM to "+FASTQ.PARSE_CUSTOM);
+ }else if(a.equals("reads")){
+ reads=Long.parseLong(b);
+ }else if(a.equals("skipreads")){
+ AbstractMapThread.SKIP_INITIAL=Long.parseLong(b);
+ }else if(a.equals("readlen") || a.equals("length") || a.equals("len")){
+ readlen=Integer.parseInt(b);
+ }else if(a.equals("ziplevel") || a.equals("zl")){
+ ziplevel=Integer.parseInt(b);
+ }else if(a.equals("bf1")){
+ ByteFile.FORCE_MODE_BF1=Tools.parseBoolean(b);
+ ByteFile.FORCE_MODE_BF2=!ByteFile.FORCE_MODE_BF1;
+ }else if(a.equals("bf2")){
+ ByteFile.FORCE_MODE_BF2=Tools.parseBoolean(b);
+ ByteFile.FORCE_MODE_BF1=!ByteFile.FORCE_MODE_BF2;
+ }else if(a.equals("usegzip") || a.equals("gzip")){
+ gzip=Tools.parseBoolean(b);
+ }else if(a.equals("usepigz") || a.equals("pigz")){
+ if(b!=null && Character.isDigit(b.charAt(0))){
+ int zt=Integer.parseInt(b);
+ if(zt<1){pigz=false;}
+ else{
+ pigz=true;
+ if(zt>1){
+ ReadWrite.MAX_ZIP_THREADS=zt;
+ ReadWrite.ZIP_THREAD_DIVISOR=1;
+ }
+ }
+ }else{pigz=Tools.parseBoolean(b);}
+
+ }else if(a.equals("usegunzip") || a.equals("gunzip")){
+ gunzip=Tools.parseBoolean(b);
+ }else if(a.equals("useunpigz") || a.equals("unpigz")){
+ unpigz=Tools.parseBoolean(b);
+ }else if(a.equals("kfilter")){
+ KFILTER=Integer.parseInt(b);
+ }else if(a.equals("msa")){
+ MSA_TYPE=b;
+ }else if(a.equals("bandwidth") || a.equals("bw")){
+ int x=Tools.max(0, Integer.parseInt(b));
+ MSA.bandwidth=x;
+ }else if(a.equals("bandwidthratio") || a.equals("bwr")){
+ float x=Tools.max(0, Float.parseFloat(b));
+ MSA.bandwidthRatio=x;
+ assert(x>=0) : "Bandwidth ratio should be at least 0.";
+ }else if(a.equals("trim") || a.equals("qtrim")){
+ if(b==null){TRIM_RIGHT=TRIM_LEFT=true;}
+ else if(b.equalsIgnoreCase("left") || b.equalsIgnoreCase("l")){TRIM_LEFT=true;TRIM_RIGHT=false;}
+ else if(b.equalsIgnoreCase("right") || b.equalsIgnoreCase("r")){TRIM_LEFT=false;TRIM_RIGHT=true;}
+ else if(b.equalsIgnoreCase("both") || b.equalsIgnoreCase("rl") || b.equalsIgnoreCase("lr")){TRIM_LEFT=TRIM_RIGHT=true;}
+ else{TRIM_RIGHT=TRIM_LEFT=Tools.parseBoolean(b);}
+ }else if(a.equals("optitrim") || a.equals("otf") || a.equals("otm")){
+ if(b!=null && (b.charAt(0)=='.' || Character.isDigit(b.charAt(0)))){
+ TrimRead.optimalMode=true;
+ TrimRead.optimalBias=Float.parseFloat(b);
+ assert(TrimRead.optimalBias>=0 && TrimRead.optimalBias<1);
+ }else{
+ TrimRead.optimalMode=Tools.parseBoolean(b);
+ }
+ }else if(a.equals("trimright")){
+ TRIM_RIGHT=Tools.parseBoolean(b);
+ }else if(a.equals("trimleft")){
+ TRIM_LEFT=Tools.parseBoolean(b);
+ }else if(a.equals("trimq") || a.equals("trimquality")){
+ TRIM_QUALITY=Byte.parseByte(b);
+ }else if(a.equals("q102matrix") || a.equals("q102m")){
+ CalcTrueQuality.q102matrix=b;
+ }else if(a.equals("qbpmatrix") || a.equals("bqpm")){
+ CalcTrueQuality.qbpmatrix=b;
+ }else if(a.equals("loadq102")){
+ CalcTrueQuality.q102=Tools.parseBoolean(b);
+ }else if(a.equals("loadqbp")){
+ CalcTrueQuality.qbp=Tools.parseBoolean(b);
+ }else if(a.equals("loadq10")){
+ CalcTrueQuality.q10=Tools.parseBoolean(b);
+ }else if(a.equals("loadq12")){
+ CalcTrueQuality.q12=Tools.parseBoolean(b);
+ }else if(a.equals("loadqb012")){
+ CalcTrueQuality.qb012=Tools.parseBoolean(b);
+ }else if(a.equals("loadqb234")){
+ CalcTrueQuality.qb234=Tools.parseBoolean(b);
+ }else if(a.equals("loadqp")){
+ CalcTrueQuality.qp=Tools.parseBoolean(b);
+ }else if(a.equals("adjustquality") || a.equals("adjq")){
+ TrimRead.ADJUST_QUALITY=Tools.parseBoolean(b);
+ }else if(a.equals("untrim") || a.equals("outputuntrimmed")){
+ UNTRIM=Tools.parseBoolean(b);
+ }else if(a.equals("eono") || a.equals("erroronnooutput")){
+ ERROR_ON_NO_OUTPUT=Tools.parseBoolean(b);
+ }else if(a.equals("log")){
+ RefToIndex.LOG=Tools.parseBoolean(b);
+ }else if(a.equals("testinterleaved")){
+ FASTQ.TEST_INTERLEAVED=Tools.parseBoolean(b);
+ sysout.println("Set TEST_INTERLEAVED to "+FASTQ.TEST_INTERLEAVED);
+ }else if(a.equals("forceinterleaved")){
+ FASTQ.FORCE_INTERLEAVED=Tools.parseBoolean(b);
+ sysout.println("Set FORCE_INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
+ }else if(a.equals("interleaved") || a.equals("int")){
+ if("auto".equalsIgnoreCase(b)){FASTQ.FORCE_INTERLEAVED=!(FASTQ.TEST_INTERLEAVED=true);}
+ else{
+ FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=Tools.parseBoolean(b);
+ sysout.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
+ }
+ }else if(a.equals("overwrite") || a.equals("ow")){
+ OVERWRITE=ReadStats.OVERWRITE=Tools.parseBoolean(b);
+ sysout.println("Set OVERWRITE to "+OVERWRITE);
+ }else if(a.equals("sitesonly") || a.equals("outputsitesonly")){
+ outputSitesOnly=Tools.parseBoolean(b);
+ sysout.println("Set outputSitesOnly to "+outputSitesOnly);
+ }else if(a.equals("discardambiguous") || a.equals("tossambiguous")){
+ REMOVE_DUPLICATE_BEST_ALIGNMENTS=Tools.parseBoolean(b);
+ sysout.println("Set REMOVE_DUPLICATE_BEST_ALIGNMENTS to "+REMOVE_DUPLICATE_BEST_ALIGNMENTS);
+ }else if(a.equals("ambiguous") || a.equals("ambig")){
+ if(b==null){
+ throw new RuntimeException(arg);
+ }else if(b.equalsIgnoreCase("keep") || b.equalsIgnoreCase("best") || b.equalsIgnoreCase("first")){
+ ambigMode=AMBIG_BEST;
+ }else if(b.equalsIgnoreCase("all")){
+ ambigMode=AMBIG_ALL;
+ }else if(b.equalsIgnoreCase("random")){
+ ambigMode=AMBIG_RANDOM;
+ }else if(b.equalsIgnoreCase("toss") || b.equalsIgnoreCase("discard") || b.equalsIgnoreCase("remove")){
+ ambigMode=AMBIG_TOSS;
+ }else{
+ throw new RuntimeException(arg);
+ }
+// sysout.println("Set REMOVE_DUPLICATE_BEST_ALIGNMENTS to "+REMOVE_DUPLICATE_BEST_ALIGNMENTS);
+ }else if(a.equals("maxsites")){
+ MAX_SITESCORES_TO_PRINT=Integer.parseInt(b);
+ }else if(a.equals("secondary")){
+ PRINT_SECONDARY_ALIGNMENTS=Tools.parseBoolean(b);
+ ReadStreamWriter.OUTPUT_SAM_SECONDARY_ALIGNMENTS=PRINT_SECONDARY_ALIGNMENTS;
+ }else if(a.equals("quickmatch")){
+ QUICK_MATCH_STRINGS=Tools.parseBoolean(b);
+ }else if(a.equals("ambiguous2") || a.equals("ambig2")){
+ if(b==null){
+ throw new RuntimeException(arg);
+ }else if(b.equalsIgnoreCase("split") || b.equalsIgnoreCase("stream")){
+ BBSplitter.AMBIGUOUS2_MODE=BBSplitter.AMBIGUOUS2_SPLIT;
+ }else if(b.equalsIgnoreCase("keep") || b.equalsIgnoreCase("best") || b.equalsIgnoreCase("first")){
+ BBSplitter.AMBIGUOUS2_MODE=BBSplitter.AMBIGUOUS2_FIRST;
+ }else if(b.equalsIgnoreCase("toss") || b.equalsIgnoreCase("discard") || b.equalsIgnoreCase("remove")){
+ BBSplitter.AMBIGUOUS2_MODE=BBSplitter.AMBIGUOUS2_TOSS;
+ }else if(b.equalsIgnoreCase("random")){
+ BBSplitter.AMBIGUOUS2_MODE=BBSplitter.AMBIGUOUS2_RANDOM;
+ }else if(b.equalsIgnoreCase("all")){
+ BBSplitter.AMBIGUOUS2_MODE=BBSplitter.AMBIGUOUS2_ALL;
+ }else{
+ throw new RuntimeException(arg);
+ }
+ }else if(a.equals("forbidselfmapping")){
+ FORBID_SELF_MAPPING=Tools.parseBoolean(b);
+ sysout.println("Set FORBID_SELF_MAPPING to "+FORBID_SELF_MAPPING);
+ }else if(a.equals("threads") || a.equals("t")){
+ if(b.equalsIgnoreCase("auto")){Shared.SET_THREADS(-1);}
+ else{Shared.THREADS=Integer.parseInt(b);}
+ sysout.println("Set threads to "+Shared.THREADS);
+ }else if(a.equals("samversion") || a.equals("samv") || a.equals("sam")){
+ SamLine.VERSION=Float.parseFloat(b);
+ }else if(a.equals("match") || a.equals("cigar")){
+ if(b!=null){b=b.toLowerCase();}else{b="true";}
+ if(b.equals("long") || b.equals("normal")){
+ MAKE_MATCH_STRING=true;
+ Read.COMPRESS_MATCH_BEFORE_WRITING=false;
+// sysout.println("Writing long match strings.");
+ }else if(b.equals("short") || b.equals("compressed")){
+ MAKE_MATCH_STRING=true;
+ Read.COMPRESS_MATCH_BEFORE_WRITING=true;
+// sysout.println("Writing short match strings.");
+ }else{
+ MAKE_MATCH_STRING=Tools.parseBoolean(b);
+ }
+
+ if(MAKE_MATCH_STRING){
+ sysout.println("Cigar strings enabled.");
+ }else{
+ sysout.println("Cigar strings disabled.");
+ }
+ }else if(a.equals("semiperfectmode")){
+ SEMIPERFECTMODE=Tools.parseBoolean(b);
+ if(ziplevel==-1){ziplevel=2;}
+ }else if(a.equals("perfectmode")){
+ PERFECTMODE=Tools.parseBoolean(b);
+ if(ziplevel==-1){ziplevel=2;}
+ }else if(a.equals("trimlist")){
+ TRIM_LIST=Tools.parseBoolean(b);
+ }else if(a.equals("pairedrandom")){
+ PAIRED_RANDOM_READS=Tools.parseBoolean(b);
+ }else if(a.equals("ordered") || a.equals("ord")){
+ OUTPUT_ORDERED_READS=Tools.parseBoolean(b);
+ sysout.println("Set OUTPUT_ORDERED_READS to "+OUTPUT_ORDERED_READS);
+ }else if(a.equals("outputunmapped")){
+ DONT_OUTPUT_UNMAPPED_READS=!Tools.parseBoolean(b);
+ sysout.println("Set DONT_OUTPUT_UNMAPPED_READS to "+DONT_OUTPUT_UNMAPPED_READS);
+ }else if(a.equals("outputblacklisted")){
+ DONT_OUTPUT_BLACKLISTED_READS=!Tools.parseBoolean(b);
+ sysout.println("Set DONT_OUTPUT_BLACKLISTED_READS to "+DONT_OUTPUT_BLACKLISTED_READS);
+ }else if(a.equals("build") || a.equals("genome") || a.equals("index")){
+ build=Integer.parseInt(b);
+ }else if(a.equals("minchrom")){
+ minChrom=Integer.parseInt(b);
+ maxChrom=Tools.max(minChrom, maxChrom);
+ }else if(a.equals("maxchrom")){
+ maxChrom=Byte.parseByte(b);
+ minChrom=Tools.min(minChrom, maxChrom);
+ }else if(a.equals("expectedsites")){
+ expectedSites=Integer.parseInt(b);
+ }else if(a.equals("targetsize")){
+ targetGenomeSize=Tools.parseKMG(b);
+ }else if(a.equals("fgte")){
+ fractionGenomeToExclude=Float.parseFloat(b);
+ sysout.println("Set fractionGenomeToExclude to "+String.format("%.4f",fractionGenomeToExclude));
+ }else if(a.equals("minratio")){
+ MINIMUM_ALIGNMENT_SCORE_RATIO=Float.parseFloat(b);
+ sysout.println("Set MINIMUM_ALIGNMENT_SCORE_RATIO to "+String.format("%.3f",MINIMUM_ALIGNMENT_SCORE_RATIO));
+ }else if(a.equals("asciiin") || a.equals("qualityin") || a.equals("qualin") || a.equals("qin")){
+ if(b.equalsIgnoreCase("auto")){
+ FASTQ.DETECT_QUALITY=true;
+ }else{
+ byte x;
+ if(b.equalsIgnoreCase("sanger")){x=33;}
+ else if(b.equalsIgnoreCase("illumina")){x=64;}
+ else{x=Byte.parseByte(b);}
+ FASTQ.ASCII_OFFSET=x;
+ sysout.println("Set fastq input ASCII offset to "+FASTQ.ASCII_OFFSET);
+ FASTQ.DETECT_QUALITY=false;
+ }
+ }else if(a.equals("asciiout") || a.equals("qualityout") || a.equals("qualout") || a.equals("qout")){
+ if(b.equalsIgnoreCase("auto")){
+ FASTQ.DETECT_QUALITY_OUT=true;
+ }else{
+ byte ascii_offset=Byte.parseByte(b);
+ FASTQ.ASCII_OFFSET_OUT=ascii_offset;
+ sysout.println("Set fastq output ASCII offset to "+FASTQ.ASCII_OFFSET_OUT);
+ FASTQ.DETECT_QUALITY_OUT=false;
+ }
+ }else if(a.equals("qauto")){
+ FASTQ.DETECT_QUALITY=FASTQ.DETECT_QUALITY_OUT=true;
+ }else if(a.equals("rcompmate") || a.equals("reversecomplementmate")){
+ rcompMate=Tools.parseBoolean(b);
+ sysout.println("Set RCOMP_MATE to "+rcompMate);
+ }else if(a.equals("verbose")){
+ verbose=Tools.parseBoolean(b);
+ TranslateColorspaceRead.verbose=verbose;
+ AbstractIndex.verbose2=verbose;
+ }else if(a.equals("verbosestats")){
+ if(Character.isDigit(b.charAt(0))){
+ verbose_stats=Integer.parseInt(b);
+ }else{
+ verbose_stats=Tools.parseBoolean(b) ? 9 : 0;
+ }
+ }else if(a.equals("maxdellen")){
+ maxDelLen=Integer.parseInt(b);
+ }else if(a.equals("maxinslen")){
+ maxInsLen=Integer.parseInt(b);
+ }else if(a.equals("maxsublen")){
+ maxSubLen=Integer.parseInt(b);
+ }else if(a.equals("fastareadlen")){
+ FastaReadInputStream.TARGET_READ_LEN=Integer.parseInt(b);
+ FastaReadInputStream.SPLIT_READS=(FastaReadInputStream.TARGET_READ_LEN>0);
+ }else if(a.equals("fastaminread") || a.equals("fastaminlen")){
+ FastaReadInputStream.MIN_READ_LEN=Integer.parseInt(b);
+ }else if(a.equals("fastawrap")){
+ FastaReadInputStream.DEFAULT_WRAP=Integer.parseInt(b);
+ }else if(a.equals("minqual")){
+ minQuality=Byte.parseByte(b);
+ midQuality=Tools.max(minQuality, midQuality);
+ maxQuality=Tools.max(midQuality, maxQuality);
+ }else if(a.equals("midqual")){
+ midQuality=Byte.parseByte(b);
+ maxQuality=Tools.max(midQuality, maxQuality);
+ minQuality=Tools.min(minQuality, midQuality);
+ }else if(a.equals("maxqual")){
+ maxQuality=Byte.parseByte(b);
+ midQuality=Tools.min(maxQuality, midQuality);
+ minQuality=Tools.min(minQuality, midQuality);
+ }else if(a.equals("matelen") || a.equals("pairlen")){
+ int x=Integer.parseInt(b);
+ RandomReads.mateLen=x;
+ AbstractMapThread.MAX_PAIR_DIST=Tools.max(x, AbstractMapThread.MAX_PAIR_DIST);
+ }else if(a.equals("s") || a.equals("snps")){
+ maxSnps=Integer.parseInt(b);
+ baseSnpRate=1;
+ }else if(a.equals("u") || a.equals("subs")){
+ maxInss=Integer.parseInt(b);
+ baseInsRate=1;
+ }else if(a.equals("d") || a.equals("dels")){
+ maxDels=Integer.parseInt(b);
+ baseDelRate=1;
+ }else if(a.equals("i") || a.equals("inss")){
+ maxSubs=Integer.parseInt(b);
+ baseSubRate=1;
+ }else if(a.equals("sequentialoverlap")){
+ sequentialOverlap=Integer.parseInt(b);
+ }else if(a.equals("sequentialstrandalt")){
+ sequentialStrandAlt=Tools.parseBoolean(b);
+ }else if(a.equals("k") || a.equals("keylen")){
+ keylen=Integer.parseInt(b);
+ }else if(a.equals("genscaffoldinfo")){
+ RefToIndex.genScaffoldInfo=Tools.parseBoolean(b);
+ }else if(a.equals("loadscaffolds")){
+ Data.LOAD_SCAFFOLDS=Tools.parseBoolean(b);
+ }else if(a.equals("autoRefToIndex.chrombits")){
+ if("auto".equalsIgnoreCase(b)){RefToIndex.AUTO_CHROMBITS=true;}
+ else{RefToIndex.AUTO_CHROMBITS=Tools.parseBoolean(b);}
+ }else if(a.equals("RefToIndex.chrombits") || a.equals("cbits")){
+ if("auto".equalsIgnoreCase(b)){RefToIndex.AUTO_CHROMBITS=true;}
+ else{
+ RefToIndex.AUTO_CHROMBITS=false;
+ RefToIndex.chrombits=Integer.parseInt(b);
+ }
+ }else if(a.equals("requirecorrectstrand") || a.equals("rcs")){
+ REQUIRE_CORRECT_STRANDS_PAIRS=Tools.parseBoolean(b);
+ }else if(a.equals("samestrandpairs") || a.equals("ssp")){
+ SAME_STRAND_PAIRS=Tools.parseBoolean(b);
+ if(SAME_STRAND_PAIRS){sysout.println("Warning! SAME_STRAND_PAIRS=true mode is not fully tested.");}
+ }else if(a.equals("killbadpairs") || a.equals("kbp")){
+ KILL_BAD_PAIRS=Tools.parseBoolean(b);
+ }else if(a.equals("pairedonly") || a.equals("po")){
+ AbstractMapThread.OUTPUT_PAIRED_ONLY=Tools.parseBoolean(b);
+ }else if(a.equals("mdtag") || a.equals("md")){
+ SamLine.MAKE_MD_TAG=Tools.parseBoolean(b);
+ }else if(a.equals("tophat")){
+ if(Tools.parseBoolean(b)){
+ SamLine.MAKE_TOPHAT_TAGS=true;
+ FastaReadInputStream.FAKE_QUALITY=true;
+ FastaReadInputStream.FAKE_QUALITY_LEVEL=40;
+ SamLine.MAKE_MD_TAG=true;
+ }
+ }else if(a.equals("xstag") || a.equals("xs")){
+ SamLine.MAKE_XS_TAG=true;
+ if(b!=null){
+ b=b.toLowerCase();
+ if(b.startsWith("fr-")){b=b.substring(3);}
+ if(b.equals("ss") || b.equals("secondstrand")){
+ SamLine.XS_SECONDSTRAND=true;
+ }else if(b.equals("fs") || b.equals("firststrand")){
+ SamLine.XS_SECONDSTRAND=false;
+ }else if(b.equals("us") || b.equals("unstranded")){
+ SamLine.XS_SECONDSTRAND=false;
+ }else{
+ SamLine.MAKE_XS_TAG=Tools.parseBoolean(b);
+ }
+ }
+ setxs=true;
+ }else if(a.equals("intronlen") || a.equals("intronlength")){
+ SamLine.INTRON_LIMIT=Integer.parseInt(b);
+ setintron=true;
+ }else if(a.equals("sortscaffolds")){
+ SamLine.SORT_SCAFFOLDS=Tools.parseBoolean(b);
+ }else if(a.equals("customtag")){
+ SamLine.MAKE_CUSTOM_TAGS=Tools.parseBoolean(b);
+ }else if(a.equals("idmodulo") || a.equals("idmod")){
+ idmodulo=Integer.parseInt(b);
+ }else if(a.equals("samplerate")){
+ samplerate=Float.parseFloat(b);
+ assert(samplerate<=1f && samplerate>=0f) : "samplerate="+samplerate+"; should be between 0 and 1";
+ }else if(a.equals("sampleseed")){
+ sampleseed=Long.parseLong(b);
+ }else if(a.equals("minhits") || a.equals("minapproxhits")){
+ minApproxHits=Integer.parseInt(b);
+ }else if(a.equals("maxindel")){
+ maxIndel1=Tools.max(0, Integer.parseInt(b));
+ maxIndel2=2*maxIndel1;
+ }else if(a.equals("maxindel1") || a.equals("maxindelsingle")){
+ maxIndel1=Tools.max(0, Integer.parseInt(b));
+ maxIndel2=Tools.max(maxIndel1, maxIndel2);
+ }else if(a.equals("maxindel2") || a.equals("maxindelsum")){
+ maxIndel2=Tools.max(0, Integer.parseInt(b));
+ maxIndel1=Tools.min(maxIndel1, maxIndel2);
+ }else if(a.equals("strictmaxindel")){
+ if(b!=null && Character.isDigit(b.charAt(0))){
+ maxIndel1=Tools.max(0, Integer.parseInt(b));
+ maxIndel2=2*maxIndel1;
+ STRICT_MAX_INDEL=true;
+ }else{
+ STRICT_MAX_INDEL=Tools.parseBoolean(b);
+ }
+ }else if(a.equals("padding")){
+ SLOW_ALIGN_PADDING=Integer.parseInt(b);
+ SLOW_RESCUE_PADDING=SLOW_ALIGN_PADDING;
+ }else if(a.equals("rescue")){
+ RESCUE=Tools.parseBoolean(b);
+ }else if(a.equals("tipsearch")){
+ TIP_SEARCH_DIST=Tools.max(0, Integer.parseInt(b));
+ }else if(a.equals("dper") || a.equals("dprr")){
+ DOUBLE_PRINT_ERROR_RATE=Tools.parseBoolean(b);
+ }else if(a.equals("chromc")){
+ Data.CHROMC=Tools.parseBoolean(b);
+ }else if(a.equals("chromgz")){
+ Data.CHROMGZ=Tools.parseBoolean(b);
+ }else if(a.equals("nodisk")){
+ RefToIndex.NODISK=Tools.parseBoolean(b);
+ }else if(a.equals("maxchromlen")){
+ RefToIndex.maxChromLen=Long.parseLong(b);
+ }else if(a.equals("minscaf") || a.equals("mincontig")){
+ RefToIndex.minScaf=Integer.parseInt(b);
+ }else if(a.equals("midpad")){
+ RefToIndex.midPad=Integer.parseInt(b);
+ }else if(a.equals("startpad")){
+ RefToIndex.startPad=Integer.parseInt(b);
+ }else if(a.equals("stoppad")){
+ RefToIndex.stopPad=Integer.parseInt(b);
+ }else if(a.equals("forceanalyze")){
+ forceanalyze=Tools.parseBoolean(b);
+ }else if(a.equals("machineoutput") || a.equals("machineout")){
+ MACHINE_OUTPUT=Tools.parseBoolean(b);
+ }else if(a.equals("showprogress")){
+ if(b!=null && Character.isDigit(b.charAt(0))){
+ long x=Tools.max(1, Long.parseLong(b));
+ ConcurrentGenericReadInputStream.PROGRESS_INCR=x;
+ ConcurrentGenericReadInputStream.SHOW_PROGRESS=(x>0);
+ }else{
+ ConcurrentGenericReadInputStream.SHOW_PROGRESS=Tools.parseBoolean(b);
+ }
+ }else if(a.equals("scafstats") || a.equals("scaffoldstats")){
+ if(b==null && arg.indexOf('=')<0){b="stdout";}
+ if(b==null || b.equalsIgnoreCase("false") || b.equalsIgnoreCase("f") || b.equalsIgnoreCase("none") || b.equalsIgnoreCase("null")){
+ BBSplitter.TRACK_SCAF_STATS=false;
+ BBSplitter.SCAF_STATS_FILE=null;
+ sysout.println("No file specified; not tracking scaffold statistics.");
+ }else{
+ BBSplitter.TRACK_SCAF_STATS=true;
+ BBSplitter.SCAF_STATS_FILE=b;
+ sysout.println("Scaffold statistics will be written to "+b);
+ }
+ }else if(a.equals("setstats") || a.equals("refstats")){
+ if(b==null && arg.indexOf('=')<0){b="stdout";}
+ if(b==null || b.equalsIgnoreCase("false") || b.equalsIgnoreCase("f") || b.equalsIgnoreCase("none") || b.equalsIgnoreCase("null")){
+ BBSplitter.TRACK_SET_STATS=false;
+ BBSplitter.SET_STATS_FILE=null;
+ sysout.println("No file specified; not tracking reference set statistics.");
+ }else{
+ BBSplitter.TRACK_SET_STATS=true;
+ BBSplitter.SET_STATS_FILE=b;
+ sysout.println("Reference set statistics will be written to "+b);
+ }
+ }else if(a.equals("camelwalk")){
+ AbstractIndex.USE_CAMELWALK=Tools.parseBoolean(b);
+ }else if(a.equals("usequality") || a.equals("uq")){
+ AbstractIndex.GENERATE_KEY_SCORES_FROM_QUALITY=AbstractIndex.GENERATE_BASE_SCORES_FROM_QUALITY=Tools.parseBoolean(b);
+ }else if(a.equals("keepbadkeys") || a.equals("kbk")){
+ KeyRing.KEEP_BAD_KEYS=Tools.parseBoolean(b);
+ }else if(i>1){
+ throw new RuntimeException("Unknown parameter: "+arg);
+ }
+ }
+
+ if(TrimRead.ADJUST_QUALITY){CalcTrueQuality.initializeMatrices();}
+ }
+
+ private final void preparse0(String[] args){
+ for(int i=0; i1 ? split[1].toLowerCase() : null;
+ if("null".equalsIgnoreCase(b)){b=null;}
+ if(b!=null && (b.equals("stdout") || b.startsWith("stdout."))){
+ sysout=System.err;
+ Data.sysout=System.err;
+ }else if(a.equals("printtoerr")){
+ if(Tools.parseBoolean(b)){sysout=System.err; Data.sysout=System.err;}
+ }else if(b!=null && (b.equals("stdin") || b.startsWith("stdin."))){
+ SYSIN=true;
+ }else if(a.equals("fast")){
+ fast=Tools.parseBoolean(b);
+ if(fast){slow=false;}
+ args[i]=null;
+ }else if(a.equals("slow")){
+ slow=Tools.parseBoolean(b);
+ if(slow){fast=false;}
+ args[i]=null;
+ }
+ }
+ }
+
+ static final String padPercent(double value, int places){
+ String x=String.format("%."+places+"f", value);
+ int desired=3+(places<1 ? 0 : 1+places);
+ while(x.length()0){
+ System.err.println("\n\n**************************************************************************\n\n" +
+ "Warning! "+broken+" mapping thread"+(broken==1 ? "" : "s")+" did not terminate normally.\n" +
+ "Please check the error log; the output may be corrupt or incomplete.\n\n" +
+ "**************************************************************************\n\n");
+ }
+ return broken;
+ }
+
+ static final boolean closeStreams(ConcurrentReadStreamInterface cris, RTextOutputStream3 rosA, RTextOutputStream3 rosM, RTextOutputStream3 rosU, RTextOutputStream3 rosB){
+ errorState|=ReadWrite.closeStreams(cris, rosA, rosM, rosU, rosB);
+ if(BBSplitter.streamTable!=null){
+ for(RTextOutputStream3 tros : BBSplitter.streamTable.values()){
+ errorState|=ReadWrite.closeStream(tros);
+ }
+ }
+ if(BBSplitter.streamTableAmbiguous!=null){
+ for(RTextOutputStream3 tros : BBSplitter.streamTableAmbiguous.values()){
+ errorState|=ReadWrite.closeStream(tros);
+ }
+ }
+ return errorState;
+ }
+
+ static final ConcurrentReadStreamInterface getReadInputStream(String in1, String in2){
+
+ assert(in1!=null);
+ assert(!in1.equalsIgnoreCase(in2)) : in1+", "+in2;
+
+ BBIndex.COLORSPACE=colorspace;
+ final ConcurrentReadStreamInterface cris;
+
+ FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, 0, 0, true, true);
+ FileFormat ff2=FileFormat.testInput(in2, FileFormat.FASTQ, 0, 0, true, true);
+
+ if(ff1.sequential()){
+ if(reads<0){reads=Long.MAX_VALUE;}
+// assert(false) : trials;
+ SequentialReadInputStream ris=new SequentialReadInputStream(reads, readlen, Tools.max(50, readlen/2), sequentialOverlap, sequentialStrandAlt);
+ cris=new ConcurrentReadInputStream(ris, reads);
+
+ }else if(ff1.csfasta()){
+ colorspace=true;
+ BBIndex.COLORSPACE=colorspace;
+
+ if(in2!=null){
+ cris=new ConcurrentSolidInputStream(in1, in1.replace(".csfasta", ".qual"), in2, in2.replace(".csfasta", ".qual"), reads);
+ }else{
+ cris=new ConcurrentSolidInputStream(in1, in1.replace(".csfasta", ".qual"), reads, null);
+ }
+ }else if(ff1.fastq()){
+ FastqReadInputStream fris1=new FastqReadInputStream(ff1, colorspace);
+ FastqReadInputStream fris2=(ff2==null ? null : new FastqReadInputStream(ff2, colorspace));
+ cris=new ConcurrentGenericReadInputStream(fris1, fris2, reads);
+
+ }else if(ff1.samOrBam()){
+
+ SamReadInputStream fris1=new SamReadInputStream(ff1, colorspace, false, FASTQ.FORCE_INTERLEAVED);
+ cris=new ConcurrentGenericReadInputStream(fris1, null, reads);
+
+ }else if(ff1.fasta()){
+
+ FastaReadInputStream fris1=new FastaReadInputStream(ff1, false, (FASTQ.FORCE_INTERLEAVED && ff2==null), ff2==null ? Shared.READ_BUFFER_MAX_DATA : -1);
+ FastaReadInputStream fris2=(ff2==null ? null : new FastaReadInputStream(ff2, colorspace, false, -1));
+ cris=new ConcurrentGenericReadInputStream(fris1, fris2, reads);
+
+ }else if(ff1.bread()){
+
+ RTextInputStream rtis=new RTextInputStream(in1, in2, reads);
+ cris=new ConcurrentReadInputStream(rtis, reads);
+
+
+ }else if(ff1.random()){
+
+ useRandomReads=true;
+ assert(readlen>0);
+
+ RandomReads.PERFECT_READ_RATIO=PERFECT_READ_RATIO;
+
+ RandomReadInputStream ris=new RandomReadInputStream(reads, readlen,
+ maxSnps, maxInss, maxDels, maxSubs,
+ baseSnpRate, baseInsRate, baseDelRate, baseSubRate,
+ maxInsLen, maxDelLen, maxSubLen,
+ minChrom, maxChrom, colorspace, PAIRED_RANDOM_READS,
+ minQuality, midQuality, maxQuality);
+ cris=new ConcurrentReadInputStream(ris, reads);
+ }else{
+ throw new RuntimeException("Can't determine read input source: ff1="+ff1+", ff2="+ff2);
+ }
+ return cris;
+ }
+
+
+ static void printOutput(final AbstractMapThread[] mtts, final Timer t, final int keylen, final boolean paired, final boolean SKIMMER){
+ if(MACHINE_OUTPUT){
+ printOutput_Machine(mtts, t, keylen, paired, false);
+ return;
+ }
+ long msaIterationsLimited=0;
+ long msaIterationsUnlimited=0;
+
+ long basesUsed=0;
+ long basesAtQuickmap=0;
+ long keysUsed=0;
+
+ long syntheticReads=0;
+ long numMated=0;
+ long badPairs=0;
+ long innerLengthSum=0;
+ long outerLengthSum=0;
+ long insertSizeSum=0;
+
+ long callsToScore=0;
+ long callsToExtend=0;
+ long initialKeys=0;
+ long initialKeyIterations=0;
+ long usedKeys=0;
+ long usedKeyIterations=0;
+
+ long[] hist_hits=new long[41];
+ long[] hist_hits_score=new long[41];
+ long[] hist_hits_extend=new long[41];
+
+ long initialSiteSum1=0;
+ long postTrimSiteSum1=0;
+ long postRescueSiteSum1=0;
+ long siteSum1=0;
+ long topSiteSum1=0;
+
+ long matchCountS1=0;
+ long matchCountI1=0;
+ long matchCountD1=0;
+ long matchCountM1=0;
+ long matchCountN1=0;
+
+
+ long mapped1=0;
+ long mappedRetained1=0;
+ long rescuedP1=0;
+ long rescuedM1=0;
+ long truePositiveP1=0;
+ long truePositiveM1=0;
+ long falsePositive1=0;
+ long totalCorrectSites1=0;
+ long firstSiteCorrectP1=0;
+ long firstSiteCorrectM1=0;
+ long firstSiteIncorrect1=0;
+ long firstSiteCorrectLoose1=0;
+ long firstSiteIncorrectLoose1=0;
+ long firstSiteCorrectPaired1=0;
+ long firstSiteCorrectSolo1=0;
+ long firstSiteCorrectRescued1=0;
+ long perfectHit1=0; //Highest score is max score
+ long uniqueHit1=0; //Only one hit has highest score
+ long correctUniqueHit1=0; //unique highest hit on answer site
+ long correctMultiHit1=0; //non-unique highest hit on answer site (non-skimmer only)
+ long correctLowHit1=0; //hit on answer site, but not highest scorer
+ long noHit1=0;
+ long perfectMatch1=0; //Highest slow score is max slow score
+ long semiperfectMatch1=0;
+ long perfectHitCount1=0;
+ long semiPerfectHitCount1=0;
+ long duplicateBestAlignment1=0;
+
+ long totalNumCorrect1=0; //Only for skimmer
+ long totalNumIncorrect1=0; //Only for skimmer
+ long totalNumIncorrectPrior1=0; //Only for skimmer
+ long totalNumCapturedAllCorrect1=0; //Only for skimmer
+ long totalNumCapturedAllCorrectTop1=0; //Only for skimmer
+ long totalNumCapturedAllCorrectOnly1=0; //Only for skimmer
+
+ long initialSiteSum2=0;
+ long postTrimSiteSum2=0;
+ long postRescueSiteSum2=0;
+ long siteSum2=0;
+ long topSiteSum2=0;
+
+ long mapped2=0;
+ long mappedRetained2=0;
+ long rescuedP2=0;
+ long rescuedM2=0;
+ long truePositiveP2=0;
+ long truePositiveM2=0;
+ long falsePositive2=0;
+ long totalCorrectSites2=0;
+ long firstSiteCorrectP2=0;
+ long firstSiteCorrectM2=0;
+ long firstSiteIncorrect2=0;
+ long firstSiteCorrectLoose2=0;
+ long firstSiteIncorrectLoose2=0;
+ long firstSiteCorrectPaired2=0;
+ long firstSiteCorrectSolo2=0;
+ long firstSiteCorrectRescued2=0;
+ long perfectHit2=0; //Highest score is max score
+ long perfectHitCount2=0;
+ long semiPerfectHitCount2=0;
+
+ long uniqueHit2=0; //Only one hit has highest score
+ long correctUniqueHit2=0; //unique highest hit on answer site
+ long correctMultiHit2=0; //non-unique highest hit on answer site (non-skimmer only)
+ long correctLowHit2=0; //hit on answer site, but not highest scorer
+ long noHit2=0;
+ long perfectMatch2=0; //Highest slow score is max slow score
+ long semiperfectMatch2=0;
+ long duplicateBestAlignment2=0;
+
+ long totalNumCorrect2=0; //Only for skimmer
+ long totalNumIncorrect2=0; //Only for skimmer
+ long totalNumIncorrectPrior2=0; //Only for skimmer
+ long totalNumCapturedAllCorrect2=0; //Only for skimmer
+ long totalNumCapturedAllCorrectTop2=0; //Only for skimmer
+ long totalNumCapturedAllCorrectOnly2=0; //Only for skimmer
+
+ long matchCountS2=0;
+ long matchCountI2=0;
+ long matchCountD2=0;
+ long matchCountM2=0;
+ long matchCountN2=0;
+
+ readsUsed=0;
+ for(int i=0; i "+falsePositive);
+ totalCorrectSites1+=mtt.totalCorrectSites1;
+
+ firstSiteCorrectP1+=mtt.firstSiteCorrectP1;
+ firstSiteCorrectM1+=mtt.firstSiteCorrectM1;
+ firstSiteIncorrect1+=mtt.firstSiteIncorrect1;
+ firstSiteCorrectLoose1+=mtt.firstSiteCorrectLoose1;
+ firstSiteIncorrectLoose1+=mtt.firstSiteIncorrectLoose1;
+ firstSiteCorrectPaired1+=mtt.firstSiteCorrectPaired1;
+ firstSiteCorrectSolo1+=mtt.firstSiteCorrectSolo1;
+ firstSiteCorrectRescued1+=mtt.firstSiteCorrectRescued1;
+
+ perfectHit1+=mtt.perfectHit1; //Highest score is max score
+ perfectHitCount1+=mtt.perfectHitCount1;
+ semiPerfectHitCount1+=mtt.semiPerfectHitCount1;
+ uniqueHit1+=mtt.uniqueHit1; //Only one hit has highest score
+ correctUniqueHit1+=mtt.correctUniqueHit1; //unique highest hit on answer site
+ correctMultiHit1+=mtt.correctMultiHit1; //non-unique highest hit on answer site
+ correctLowHit1+=mtt.correctLowHit1; //hit on answer site, but not highest scorer
+ noHit1+=mtt.noHit1;
+
+ totalNumCorrect1+=mtt.totalNumCorrect1; //Skimmer only
+ totalNumIncorrect1+=mtt.totalNumIncorrect1; //Skimmer only
+ totalNumIncorrectPrior1+=mtt.totalNumIncorrectPrior1; //Skimmer only
+ totalNumCapturedAllCorrect1+=mtt.totalNumCapturedAllCorrect1; //Skimmer only
+ totalNumCapturedAllCorrectTop1+=mtt.totalNumCapturedAllCorrectTop1; //Skimmer only
+ totalNumCapturedAllCorrectOnly1+=mtt.totalNumCapturedAllCorrectOnly1; //Skimmer only
+
+ perfectMatch1+=mtt.perfectMatch1; //Highest slow score is max slow score
+ semiperfectMatch1+=mtt.semiperfectMatch1; //A semiperfect mapping was found
+
+ duplicateBestAlignment1+=mtt.ambiguousBestAlignment1;
+
+ initialSiteSum1+=mtt.initialSiteSum1;
+ postTrimSiteSum1+=mtt.postTrimSiteSum1;
+ postRescueSiteSum1+=mtt.postRescueSiteSum1;
+ siteSum1+=mtt.siteSum1;
+ topSiteSum1+=mtt.topSiteSum1;
+
+ AbstractIndex index=mtt.index();
+ callsToScore+=index.callsToScore;
+ callsToExtend+=index.callsToExtendScore;
+ initialKeys+=index.initialKeys;
+ initialKeyIterations+=index.initialKeyIterations;
+ usedKeys+=index.usedKeys;
+ usedKeyIterations+=index.usedKeyIterations;
+
+ for(int j=0; j "+falsePositive);
+ totalCorrectSites2+=mtt.totalCorrectSites2;
+
+ firstSiteCorrectP2+=mtt.firstSiteCorrectP2;
+ firstSiteCorrectM2+=mtt.firstSiteCorrectM2;
+ firstSiteIncorrect2+=mtt.firstSiteIncorrect2;
+ firstSiteCorrectLoose2+=mtt.firstSiteCorrectLoose2;
+ firstSiteIncorrectLoose2+=mtt.firstSiteIncorrectLoose2;
+ firstSiteCorrectPaired2+=mtt.firstSiteCorrectPaired2;
+ firstSiteCorrectSolo2+=mtt.firstSiteCorrectSolo2;
+ firstSiteCorrectRescued2+=mtt.firstSiteCorrectRescued2;
+
+ perfectHit2+=mtt.perfectHit2; //Highest score is max score
+ perfectHitCount2+=mtt.perfectHitCount2;
+ semiPerfectHitCount2+=mtt.semiPerfectHitCount2;
+ uniqueHit2+=mtt.uniqueHit2; //Only one hit has highest score
+ correctUniqueHit2+=mtt.correctUniqueHit2; //unique highest hit on answer site
+ correctMultiHit2+=mtt.correctMultiHit2; //non-unique highest hit on answer site
+ correctLowHit2+=mtt.correctLowHit2; //hit on answer site, but not highest scorer
+ noHit2+=mtt.noHit2;
+
+ totalNumCorrect2+=mtt.totalNumCorrect2; //Skimmer only
+ totalNumIncorrect2+=mtt.totalNumIncorrect2; //Skimmer only
+ totalNumIncorrectPrior2+=mtt.totalNumIncorrectPrior2; //Skimmer only
+ totalNumCapturedAllCorrect2+=mtt.totalNumCapturedAllCorrect2; //Skimmer only
+ totalNumCapturedAllCorrectTop2+=mtt.totalNumCapturedAllCorrectTop2; //Skimmer only
+ totalNumCapturedAllCorrectOnly2+=mtt.totalNumCapturedAllCorrectOnly2; //Skimmer only
+
+ perfectMatch2+=mtt.perfectMatch2; //Highest slow score is max slow score
+ semiperfectMatch2+=mtt.semiperfectMatch2; //A semiperfect mapping was found
+
+ duplicateBestAlignment2+=mtt.ambiguousBestAlignment2;
+
+ initialSiteSum2+=mtt.initialSiteSum2;
+ postTrimSiteSum2+=mtt.postTrimSiteSum2;
+ postRescueSiteSum2+=mtt.postRescueSiteSum2;
+ siteSum2+=mtt.siteSum2;
+ topSiteSum2+=mtt.topSiteSum2;
+
+ matchCountS2+=mtt.matchCountS2;
+ matchCountI2+=mtt.matchCountI2;
+ matchCountD2+=mtt.matchCountD2;
+ matchCountM2+=mtt.matchCountM2;
+ matchCountN2+=mtt.matchCountN2;
+
+ }
+ reads=readsUsed;
+ if(syntheticReads>0){SYNTHETIC=true;}
+
+ t.stop();
+ long nanos=t.elapsed;
+
+ if(verbose_stats>1){
+ StringBuilder sb=new StringBuilder(1000);
+ sb.append("\n\n###################\n#hits\tcount\tscore\textend\n");
+ for(int i=0; i=1){sysout.println("MSA iterations: \t"+String.format("%.2fL + %.2fU = %.2f", milf,milu,milf+milu));}
+
+ sysout.println();
+ sysout.println("\nRead 1 data:");
+ if(verbose_stats>=1){
+ if(avgInitialKeys>0){sysout.println(String.format("Avg Initial Keys: \t"+(avgInitialKeys<100?" ":"")+"%.3f",
+ avgInitialKeys));}
+ if(avgUsedKeys>0){sysout.println(String.format("Avg Used Keys: \t"+(avgUsedKeys<100?" ":"")+"%.3f",
+ avgUsedKeys));}
+ if(avgCallsToScore>0){sysout.println(String.format("Avg Calls to Score: \t"+(avgCallsToScore<100?" ":"")+"%.3f",
+ avgCallsToScore));}
+ if(avgCallsToExtendScore>0){sysout.println(String.format("Avg Calls to Extend:\t"+(avgCallsToExtendScore<100?" ":"")+"%.3f",
+ avgCallsToExtendScore));}
+ sysout.println();
+
+ sysout.println(String.format("Avg Initial Sites: \t"+(avgInitialSites<10?" ":"")+"%.3f", avgInitialSites));
+ if(TRIM_LIST){sysout.println(String.format("Avg Post-Trim: \t"+(avgPostTrimSites<10?" ":"")+"%.3f", avgPostTrimSites));}
+ if(paired){sysout.println(String.format("Avg Post-Rescue: \t"+(avgPostRescueSites<10?" ":"")+"%.3f", avgPostRescueSites));}
+ sysout.println(String.format("Avg Final Sites: \t"+(avgSites<10?" ":"")+"%.3f", avgSites));
+ sysout.println(String.format("Avg Top Sites: \t"+(avgTopSites<10?" ":"")+"%.3f", avgTopSites));
+ if(verbose_stats>1){
+ sysout.println(String.format("Avg Perfect Sites: \t"+(avgPerfectSites<10?" ":"")+"%.3f \t"+
+ (perfectHitCountPercent<10?" ":"")+"%.3f%%", avgPerfectSites, perfectHitCountPercent));
+ sysout.println(String.format("Avg Semiperfect Sites:\t"+(avgSemiPerfectSites<10?" ":"")+"%.3f \t"+
+ (semiPerfectHitCountPercent<10?" ":"")+"%.3f%%", avgSemiPerfectSites, semiPerfectHitCountPercent));
+ }
+
+ if(SYNTHETIC){
+ sysout.println(String.format("Avg Correct Sites: \t"+(avgNumCorrect<10?" ":"")+"%.3f", avgNumCorrect));
+ if(SKIMMER){
+ sysout.println(String.format("Avg Incorrect Sites:\t"+(avgNumIncorrect<10?" ":"")+"%.3f", avgNumIncorrect));
+ sysout.println(String.format("Avg IncorrectP Sites:\t"+(avgNumIncorrectPrior<10?" ":"")+"%.3f", avgNumIncorrectPrior));
+ }
+ }
+ }
+
+ sysout.println();
+// sysout.println(String.format("perfectHit: \t%.2f", perfectHitPercent)+"%");
+// sysout.println(String.format("uniqueHit: \t%.2f", uniqueHitPercent)+"%");
+// sysout.println(String.format("correctUniqueHit:\t%.2f", correctUniqueHitPercent)+"%");
+//// sysout.println(String.format("correctMultiHit: \t%.2f", correctMultiHitPercent)+"%");
+// sysout.println(String.format("correctHighHit: \t%.2f", correctHighHitPercent)+"%");
+// sysout.println(String.format("correctHit: \t%.2f", correctHitPercent)+"%");
+
+ //sysout.println(String.format("mapped: \t"+(mappedB<10?" ":"")+"%.3f", mappedB)+"%");
+ if(REMOVE_DUPLICATE_BEST_ALIGNMENTS){
+ double x=ambiguousFound+mappedRetainedB;
+ sysout.println("mapped: \t"+padPercent(x,4)+"%"+"\t"+mappedReads+" reads");
+ sysout.println("unambiguous: \t"+padPercent(mappedRetainedB,4)+"%"+"\t"+unambiguousReads+" reads");
+ }else{
+ double x=mappedRetainedB-ambiguousFound;
+ sysout.println("mapped: \t"+padPercent(mappedRetainedB,4)+"%"+"\t"+mappedReads+" reads");
+ sysout.println("unambiguous: \t"+padPercent(x,4)+"%"+"\t"+unambiguousReads+" reads");
+ }
+ if(SYNTHETIC){
+ sysout.println(String.format("true positive: \t"+((truePositiveStrict)<10?" ":"")+"%.4f%%\t(loose: "+(truePositiveLoose<10?" ":"")+"%.4f%%)",
+ truePositiveStrict, truePositiveLoose));
+ sysout.println(String.format("false positive: \t"+(falsePositiveB<10?" ":"")+"%.4f%%\t(loose: "+(falsePositiveLooseB<10?" ":"")+"%.4f%%)",
+ falsePositiveB, falsePositiveLooseB));
+ sysout.println(String.format("SNR: \t"+(snrStrict<10 && snrStrict>=0 ?" ":"")+"%.4f \t(loose: "+(snrLoose<10&&snrLoose>=0?" ":"")+"%.4f)",
+ snrStrict, snrLoose));
+ if(verbose_stats>0){sysout.println(String.format("Plus/Minus ratio:\t %1.4f", truePositivePMRatio));}
+
+ if(SKIMMER){
+ sysout.println(String.format("found all correct:\t"+(rateCapturedAllCorrect<10?" ":"")+"%.3f", rateCapturedAllCorrect)+"%");
+ sysout.println(String.format("all correct top: \t"+(rateCapturedAllTop<10?" ":"")+"%.3f", rateCapturedAllTop)+"%");
+ sysout.println(String.format("all correct only: \t"+(rateCapturedAllOnly<10?" ":"")+"%.3f", rateCapturedAllOnly)+"%");
+ }
+ }
+
+ sysout.println();
+ if(paired){
+ sysout.println(String.format("Mated pairs: \t"+(matedPercent<10?" ":"")+"%.4f", matedPercent)+"%");
+ if(SYNTHETIC){
+ sysout.println(String.format("correct pairs: \t"+(truePositivePairedB<10?" ":"")+"%.3f", truePositivePairedB)+"% (of mated)");
+ }
+ sysout.println(String.format("bad pairs: \t"+(badPairsPercent<10?" ":"")+"%.3f", badPairsPercent)+"% (of all reads)");
+ }
+ if(SYNTHETIC){
+ sysout.println(String.format("correct singles: \t"+(truePositiveSoloB<10?" ":"")+"%.4f", truePositiveSoloB)+"%");
+ }
+ if(paired){
+ sysout.println(String.format("rescued: \t"+(rescuedPB+rescuedMB<10?" ":"")+"%.3f", rescuedPB+rescuedMB)+"%");
+// sysout.println(String.format("rescued +: \t%.3f", rescuedPB)+"%");
+// sysout.println(String.format("rescued -: \t%.3f", rescuedMB)+"%");
+ if(SYNTHETIC){
+ sysout.println(String.format("correct rescued: \t"+(truePositiveRescuedB<10?" ":"")+"%.3f", truePositiveRescuedB)+"%");
+ }
+ sysout.println(String.format("avg insert size: \t%.2f", insertSizeAvg));
+ if(verbose_stats>=1){
+ sysout.println(String.format("avg inner length:\t%.2f", innerLengthAvg));
+ sysout.println(String.format("avg insert size: \t%.2f", outerLengthAvg));
+ }
+ }
+ sysout.println();
+ sysout.println(String.format("perfect best site:\t"+(perfectMatchPercent<10?" ":"")+"%.4f", perfectMatchPercent)+"%");
+ sysout.println(String.format("semiperfect site:\t"+(semiperfectMatchPercent<10?" ":"")+"%.4f", semiperfectMatchPercent)+"%");
+ sysout.println(String.format("ambiguousMapping:\t"+(ambiguousFound<10?" ":"")+"%.4f", ambiguousFound)+"%\t"+
+ (REMOVE_DUPLICATE_BEST_ALIGNMENTS ? "(Removed)" : "(Kept)"));
+ sysout.println(String.format("low-Q discards: \t"+(lowQualityReadsDiscardedPercent<10?" ":"")+"%.4f",
+ lowQualityReadsDiscardedPercent)+"%");
+ if(SYNTHETIC){
+ sysout.println(String.format("false negative: \t"+(noHitPercent<10?" ":"")+"%.4f", noHitPercent)+"%");
+ sysout.println(String.format("correctLowHit: \t"+(correctLowHitPercent<10?" ":"")+"%.4f", correctLowHitPercent)+"%");
+ }
+
+ if(MAKE_MATCH_STRING){
+ sysout.println();
+ sysout.println("Match Rate: \t"+padPercent(matchRate,4)+"% \t"+matchCountM1);
+ sysout.println("Error Rate: \t"+padPercent(errorRate,4)+"% \t"+matchErrors);
+ sysout.println("Sub Rate: \t"+padPercent(subRate,4)+"% \t"+matchCountS1);
+ sysout.println("Del Rate: \t"+padPercent(delRate,4)+"% \t"+matchCountD1);
+ sysout.println("Ins Rate: \t"+padPercent(insRate,4)+"% \t"+matchCountI1);
+ sysout.println("N Rate: \t"+padPercent(nRate,4)+"% \t"+matchCountN1);
+
+ if(DOUBLE_PRINT_ERROR_RATE){
+ System.err.println();
+ System.err.println(String.format("Match Rate: \t"+(matchRate<10?" ":"")+"%.4f", matchRate)+"% \t"+matchCountM1);
+ System.err.println(String.format("Error Rate: \t"+(errorRate<10?" ":"")+"%.4f", errorRate)+"% \t"+matchErrors);
+ System.err.println(String.format("Sub Rate: \t"+(subRate<10?" ":"")+"%.4f", subRate)+"% \t"+matchCountS1);
+ System.err.println(String.format("Del Rate: \t"+(delRate<10?" ":"")+"%.4f", delRate)+"% \t"+matchCountD1);
+ System.err.println(String.format("Ins Rate: \t"+(insRate<10?" ":"")+"%.4f", insRate)+"% \t"+matchCountI1);
+ System.err.println(String.format("N Rate: \t"+(nRate<10?" ":"")+"%.4f", nRate)+"% \t"+matchCountN1);
+ }
+ }
+
+ if(paired){
+ invSites100=100d/siteSum2;
+
+ perfectHitPercent=perfectHit2*invTrials100; //Highest score is max score
+ perfectMatchPercent=perfectMatch2*invTrials100;
+ semiperfectMatchPercent=semiperfectMatch2*invTrials100;
+
+ perfectHitCountPercent=perfectHitCount2*invSites100;
+ semiPerfectHitCountPercent=semiPerfectHitCount2*invSites100;
+
+ uniqueHitPercent=uniqueHit2*invTrials100; //Only one hit has highest score
+ correctUniqueHitPercent=correctUniqueHit2*invTrials100; //unique highest hit on answer site
+ correctMultiHitPercent=correctMultiHit2*invTrials100; //non-unique highest hit on answer site
+ correctLowHitPercent=correctLowHit2*invTrials100; //hit on answer site, but not highest scorer
+ ambiguousFound=(duplicateBestAlignment2*invTrials100);
+ correctHighHitPercent=(correctMultiHit2+correctUniqueHit2)*invTrials100;
+ correctHitPercent=(correctLowHit2+correctMultiHit2+correctUniqueHit2)*invTrials100;
+
+ mappedB=(mapped2*invTrials100);
+ mappedRetainedB=(mappedRetained2*invTrials100);
+ rescuedPB=(rescuedP2*invTrials100);
+ rescuedMB=(rescuedM2*invTrials100);
+ falsePositiveB=(firstSiteIncorrect2*invTrials100);
+ falsePositiveLooseB=(firstSiteIncorrectLoose2*invTrials100);
+ truePositivePB=(firstSiteCorrectP2*invTrials100);
+ truePositiveMB=(firstSiteCorrectM2*invTrials100);
+ truePositiveStrict=((firstSiteCorrectP2+firstSiteCorrectM2)*invTrials100);
+ truePositiveLoose=(firstSiteCorrectLoose2*invTrials100);
+ snrStrict=10*Math.log10((firstSiteCorrectM2+firstSiteCorrectP2+0.1)/(firstSiteIncorrect2+0.1));
+ snrLoose=10*Math.log10((firstSiteCorrectLoose2+0.1)/(firstSiteIncorrectLoose2+0.1));
+ truePositivePMRatio=(truePositivePB/truePositiveMB);
+ truePositivePairedB=(firstSiteCorrectPaired2*100d/numMated);
+ truePositiveSoloB=(firstSiteCorrectSolo2*100d/(mappedRetained2-numMated));
+ truePositiveRescuedB=(firstSiteCorrectRescued2*100d/(rescuedP2+rescuedM2));
+ avgNumCorrect=(totalCorrectSites2/(1d*(truePositiveP2+truePositiveM2)));
+ noHitPercent=noHit2*invTrials100;
+
+ avgNumCorrect=(SKIMMER ? totalNumCorrect2*invTrials : (totalCorrectSites2/(1d*(truePositiveP2+truePositiveM2))));
+ avgNumIncorrect=totalNumIncorrect1*invTrials; //Skimmer only
+ avgNumIncorrectPrior=totalNumIncorrectPrior1*invTrials; //Skimmer only
+
+ rateCapturedAllCorrect=totalNumCapturedAllCorrect2*invTrials100; //Skimmer only
+ rateCapturedAllTop=totalNumCapturedAllCorrectTop2*invTrials100; //Skimmer only
+ rateCapturedAllOnly=totalNumCapturedAllCorrectOnly2*invTrials100; //Skimmer only
+
+ if(REMOVE_DUPLICATE_BEST_ALIGNMENTS){
+ mappedReads=mappedRetained2+duplicateBestAlignment2;
+ unambiguousReads=mappedRetained2;
+ }else{
+ mappedReads=mappedRetained2;
+ unambiguousReads=mappedRetained2-duplicateBestAlignment2;
+ }
+
+ avgInitialSites=initialSiteSum2*invTrials;
+ avgPostTrimSites=postTrimSiteSum2*invTrials;
+ avgPostRescueSites=postRescueSiteSum2*invTrials;
+ avgSites=siteSum2*invTrials;
+ avgPerfectSites=(perfectHitCount1*invTrials);
+ avgSemiPerfectSites=(semiPerfectHitCount1*invTrials);
+ avgTopSites=topSiteSum2*invTrials;
+ lowQualityReadsDiscardedPercent=lowQualityReadsDiscarded2*invTrials100;
+
+ matchErrors=matchCountS2+matchCountI2+matchCountD2;
+ baseLen=matchCountM2+matchCountI2+matchCountS2+matchCountN2;
+ matchLen=matchCountM2+matchCountI2+matchCountS2+matchCountN2+matchCountD2;
+ refLen=matchCountM2+matchCountS2+matchCountN2+matchCountD2;
+ errorRate=matchErrors*100d/matchLen;
+ matchRate=matchCountM2*100d/matchLen;//baseLen;
+ subRate=matchCountS2*100d/matchLen;//baseLen;
+ delRate=matchCountD2*100d/matchLen;
+ insRate=matchCountI2*100d/matchLen;//baseLen;
+ nRate=matchCountN2*100d/matchLen;//baseLen;
+
+ sysout.println("\n\nRead 2 data:");
+ if(verbose_stats>=1){
+ sysout.println(String.format("Avg Initial Sites: \t"+(avgInitialSites<10?" ":"")+"%.3f", avgInitialSites));
+ if(TRIM_LIST){sysout.println(String.format("Avg Post-Trim: \t"+(avgPostTrimSites<10?" ":"")+"%.3f", avgPostTrimSites));}
+ sysout.println(String.format("Avg Post-Rescue: \t"+(avgPostRescueSites<10?" ":"")+"%.3f", avgPostRescueSites));
+ sysout.println(String.format("Avg Final Sites: \t"+(avgSites<10?" ":"")+"%.3f", avgSites));
+ sysout.println(String.format("Avg Top Sites: \t"+(avgTopSites<10?" ":"")+"%.3f", avgTopSites));
+ sysout.println(String.format("Avg Perfect Sites: \t"+(avgPerfectSites<10?" ":"")+"%.3f \t"+
+ (perfectHitCountPercent<10?" ":"")+"%.3f%%", avgPerfectSites, perfectHitCountPercent));
+ sysout.println(String.format("Avg Semiperfect Sites:\t"+(avgSemiPerfectSites<10?" ":"")+"%.3f \t"+
+ (semiPerfectHitCountPercent<10?" ":"")+"%.3f%%", avgSemiPerfectSites, semiPerfectHitCountPercent));
+
+ if(SYNTHETIC){
+ sysout.println(String.format("Avg Correct Sites: \t"+(avgNumCorrect<10?" ":"")+"%.3f", avgNumCorrect));
+ if(SKIMMER){
+ sysout.println(String.format("Avg Incorrect Sites:\t"+(avgNumIncorrect<10?" ":"")+"%.3f", avgNumIncorrect));
+ sysout.println(String.format("Avg IncorrectP Sites:\t"+(avgNumIncorrectPrior<10?" ":"")+"%.3f", avgNumIncorrectPrior));
+ }
+ }
+ }
+ sysout.println();
+// sysout.println(String.format("perfectHit: \t%.2f", perfectHitPercent)+"%");
+// sysout.println(String.format("uniqueHit: \t%.2f", uniqueHitPercent)+"%");
+// sysout.println(String.format("correctUniqueHit:\t%.2f", correctUniqueHitPercent)+"%");
+// sysout.println(String.format("correctMultiHit: \t%.2f", correctMultiHitPercent)+"%");
+// sysout.println(String.format("correctHighHit: \t%.2f", correctHighHitPercent)+"%");
+// sysout.println(String.format("correctHit: \t%.2f", correctHitPercent)+"%");
+
+ //sysout.println(String.format("mapped: \t"+(mappedB<10?" ":"")+"%.3f", mappedB)+"%");
+ if(REMOVE_DUPLICATE_BEST_ALIGNMENTS){
+ double x=ambiguousFound+mappedRetainedB;
+ sysout.println("mapped: \t"+padPercent(x,4)+"%"+"\t"+mappedReads+" reads");
+ sysout.println("unambiguous: \t"+padPercent(mappedRetainedB,4)+"%"+"\t"+unambiguousReads+" reads");
+ }else{
+ double x=mappedRetainedB-ambiguousFound;
+ sysout.println("mapped: \t"+padPercent(mappedRetainedB,4)+"%"+"\t"+mappedReads+" reads");
+ sysout.println("unambiguous: \t"+padPercent(x,4)+"%"+"\t"+unambiguousReads+" reads");
+ }
+ if(SYNTHETIC){
+ sysout.println(String.format("true positive: \t"+((truePositiveStrict)<10?" ":"")+"%.4f%%\t(loose: "+(truePositiveLoose<10?" ":"")+"%.4f%%)",
+ truePositiveStrict, truePositiveLoose));
+ sysout.println(String.format("false positive: \t"+(falsePositiveB<10?" ":"")+"%.4f%%\t(loose: "+(falsePositiveLooseB<10?" ":"")+"%.4f%%)",
+ falsePositiveB, falsePositiveLooseB));
+ sysout.println(String.format("SNR: \t"+(snrStrict<10 && snrStrict>=0 ?" ":"")+"%.4f \t(loose: "+(snrLoose<10&&snrLoose>=0?" ":"")+"%.4f)",
+ snrStrict, snrLoose));
+ if(verbose_stats>0){sysout.println(String.format("Plus/Minus ratio:\t %1.4f", truePositivePMRatio));}
+ }
+ sysout.println();
+ if(paired){
+// sysout.println(String.format("Mated pairs: \t"+(matedPercent<10?" ":"")+"%.4f", matedPercent)+"%");
+ if(SYNTHETIC){
+ sysout.println(String.format("correct pairs: \t"+(truePositivePairedB<10?" ":"")+"%.4f", truePositivePairedB)+"%");
+ }
+ }
+ if(SYNTHETIC){
+ sysout.println(String.format("correct singles: \t"+(truePositiveSoloB<10?" ":"")+"%.4f", truePositiveSoloB)+"%");
+ }
+ if(paired){
+ sysout.println(String.format("rescued: \t"+(rescuedPB+rescuedMB<10?" ":"")+"%.3f", rescuedPB+rescuedMB)+"%");
+ if(SYNTHETIC){
+ sysout.println(String.format("correct rescued: \t"+(truePositiveRescuedB<10?" ":"")+"%.3f", truePositiveRescuedB)+"%");
+ }
+ }
+ sysout.println();
+ sysout.println(String.format("perfect best site:\t"+(perfectMatchPercent<10?" ":"")+"%.4f", perfectMatchPercent)+"%");
+ sysout.println(String.format("semiperfect site:\t"+(semiperfectMatchPercent<10?" ":"")+"%.4f", semiperfectMatchPercent)+"%");
+ sysout.println(String.format("ambiguousMapping:\t"+(ambiguousFound<10?" ":"")+"%.4f", ambiguousFound)+"%\t"+
+ (REMOVE_DUPLICATE_BEST_ALIGNMENTS ? "(Removed)" : "(Kept)"));
+ sysout.println(String.format("low-Q discards: \t"+(lowQualityReadsDiscardedPercent<10?" ":"")+"%.4f",
+ lowQualityReadsDiscardedPercent)+"%");
+ if(SYNTHETIC){
+ sysout.println(String.format("false negative: \t"+(noHitPercent<10?" ":"")+"%.4f", noHitPercent)+"%");
+ sysout.println(String.format("correctLowHit: \t"+(correctLowHitPercent<10?" ":"")+"%.4f", correctLowHitPercent)+"%");
+ }
+
+ if(MAKE_MATCH_STRING){
+ sysout.println();
+ sysout.println("Match Rate: \t"+padPercent(matchRate,4)+"% \t"+matchCountM2);
+ sysout.println("Error Rate: \t"+padPercent(errorRate,4)+"% \t"+matchErrors);
+ sysout.println("Sub Rate: \t"+padPercent(subRate,4)+"% \t"+matchCountS2);
+ sysout.println("Del Rate: \t"+padPercent(delRate,4)+"% \t"+matchCountD2);
+ sysout.println("Ins Rate: \t"+padPercent(insRate,4)+"% \t"+matchCountI2);
+ sysout.println("N Rate: \t"+padPercent(nRate,4)+"% \t"+matchCountN2);
+ }
+ }
+
+ if(BBSplitter.TRACK_SCAF_STATS){
+ BBSplitter.printCounts(BBSplitter.SCAF_STATS_FILE, BBSplitter.scafCountTable, true, readsUsed+readsUsed2);
+ }
+
+ if(BBSplitter.TRACK_SET_STATS){
+ BBSplitter.printCounts(BBSplitter.SET_STATS_FILE, BBSplitter.setCountTable, true, readsUsed+readsUsed2);
+ }
+
+ if(ReadStats.COLLECT_QUALITY_STATS || ReadStats.COLLECT_MATCH_STATS || ReadStats.COLLECT_INSERT_STATS){
+ ReadStats rs=ReadStats.mergeAll();
+ if(ReadStats.QUAL_HIST_FILE!=null){rs.writeQualityToFile(ReadStats.QUAL_HIST_FILE, paired);}
+ if(ReadStats.MATCH_HIST_FILE!=null){rs.writeMatchToFile(ReadStats.MATCH_HIST_FILE, paired);}
+ if(ReadStats.INSERT_HIST_FILE!=null){rs.writeInsertToFile(ReadStats.INSERT_HIST_FILE);}
+ }
+
+ assert(!CALC_STATISTICS || truePositiveP1+truePositiveM1+falsePositive1+noHit1+lowQualityReadsDiscarded1==reads) :
+ "\nThe number of reads out does not add up to the number of reads in.\nThis may indicate that a mapping thread crashed.\n"+
+ truePositiveP1+"+"+truePositiveM1+"+"+falsePositive1+"+"+noHit1+"+"+lowQualityReadsDiscarded1+" = "+
+ (truePositiveP1+truePositiveM1+falsePositive1+noHit1+lowQualityReadsDiscarded1)+" != "+reads;
+ if(!SKIMMER){
+ assert(!CALC_STATISTICS || truePositiveP1+truePositiveM1==correctLowHit1+correctMultiHit1+correctUniqueHit1);
+ }else{
+ assert(!CALC_STATISTICS || truePositiveP1+truePositiveM1==correctLowHit1+correctUniqueHit1);
+ }
+ }
+
+
+ static void printOutput_Machine(final AbstractMapThread[] mtts, final Timer t, final int keylen, final boolean paired, final boolean SKIMMER){
+ long msaIterationsLimited=0;
+ long msaIterationsUnlimited=0;
+
+ long basesUsed=0;
+ long basesAtQuickmap=0;
+ long keysUsed=0;
+
+ long syntheticReads=0;
+ long numMated=0;
+ long badPairs=0;
+ long innerLengthSum=0;
+ long outerLengthSum=0;
+ long insertSizeSum=0;
+
+ long callsToScore=0;
+ long callsToExtend=0;
+ long initialKeys=0;
+ long initialKeyIterations=0;
+ long usedKeys=0;
+ long usedKeyIterations=0;
+
+ long[] hist_hits=new long[41];
+ long[] hist_hits_score=new long[41];
+ long[] hist_hits_extend=new long[41];
+
+ long initialSiteSum1=0;
+ long postTrimSiteSum1=0;
+ long postRescueSiteSum1=0;
+ long siteSum1=0;
+ long topSiteSum1=0;
+
+ long matchCountS1=0;
+ long matchCountI1=0;
+ long matchCountD1=0;
+ long matchCountM1=0;
+ long matchCountN1=0;
+
+
+ long mapped1=0;
+ long mappedRetained1=0;
+ long rescuedP1=0;
+ long rescuedM1=0;
+ long truePositiveP1=0;
+ long truePositiveM1=0;
+ long falsePositive1=0;
+ long totalCorrectSites1=0;
+ long firstSiteCorrectP1=0;
+ long firstSiteCorrectM1=0;
+ long firstSiteIncorrect1=0;
+ long firstSiteCorrectLoose1=0;
+ long firstSiteIncorrectLoose1=0;
+ long firstSiteCorrectPaired1=0;
+ long firstSiteCorrectSolo1=0;
+ long firstSiteCorrectRescued1=0;
+ long perfectHit1=0; //Highest score is max score
+ long uniqueHit1=0; //Only one hit has highest score
+ long correctUniqueHit1=0; //unique highest hit on answer site
+ long correctMultiHit1=0; //non-unique highest hit on answer site (non-skimmer only)
+ long correctLowHit1=0; //hit on answer site, but not highest scorer
+ long noHit1=0;
+ long perfectMatch1=0; //Highest slow score is max slow score
+ long semiperfectMatch1=0;
+ long perfectHitCount1=0;
+ long semiPerfectHitCount1=0;
+ long duplicateBestAlignment1=0;
+
+ long totalNumCorrect1=0; //Only for skimmer
+ long totalNumIncorrect1=0; //Only for skimmer
+ long totalNumIncorrectPrior1=0; //Only for skimmer
+ long totalNumCapturedAllCorrect1=0; //Only for skimmer
+ long totalNumCapturedAllCorrectTop1=0; //Only for skimmer
+ long totalNumCapturedAllCorrectOnly1=0; //Only for skimmer
+
+ long initialSiteSum2=0;
+ long postTrimSiteSum2=0;
+ long postRescueSiteSum2=0;
+ long siteSum2=0;
+ long topSiteSum2=0;
+
+ long mapped2=0;
+ long mappedRetained2=0;
+ long rescuedP2=0;
+ long rescuedM2=0;
+ long truePositiveP2=0;
+ long truePositiveM2=0;
+ long falsePositive2=0;
+ long totalCorrectSites2=0;
+ long firstSiteCorrectP2=0;
+ long firstSiteCorrectM2=0;
+ long firstSiteIncorrect2=0;
+ long firstSiteCorrectLoose2=0;
+ long firstSiteIncorrectLoose2=0;
+ long firstSiteCorrectPaired2=0;
+ long firstSiteCorrectSolo2=0;
+ long firstSiteCorrectRescued2=0;
+ long perfectHit2=0; //Highest score is max score
+ long perfectHitCount2=0;
+ long semiPerfectHitCount2=0;
+
+ long uniqueHit2=0; //Only one hit has highest score
+ long correctUniqueHit2=0; //unique highest hit on answer site
+ long correctMultiHit2=0; //non-unique highest hit on answer site (non-skimmer only)
+ long correctLowHit2=0; //hit on answer site, but not highest scorer
+ long noHit2=0;
+ long perfectMatch2=0; //Highest slow score is max slow score
+ long semiperfectMatch2=0;
+ long duplicateBestAlignment2=0;
+
+ long totalNumCorrect2=0; //Only for skimmer
+ long totalNumIncorrect2=0; //Only for skimmer
+ long totalNumIncorrectPrior2=0; //Only for skimmer
+ long totalNumCapturedAllCorrect2=0; //Only for skimmer
+ long totalNumCapturedAllCorrectTop2=0; //Only for skimmer
+ long totalNumCapturedAllCorrectOnly2=0; //Only for skimmer
+
+ long matchCountS2=0;
+ long matchCountI2=0;
+ long matchCountD2=0;
+ long matchCountM2=0;
+ long matchCountN2=0;
+
+ readsUsed=0;
+ for(int i=0; i "+falsePositive);
+ totalCorrectSites1+=mtt.totalCorrectSites1;
+
+ firstSiteCorrectP1+=mtt.firstSiteCorrectP1;
+ firstSiteCorrectM1+=mtt.firstSiteCorrectM1;
+ firstSiteIncorrect1+=mtt.firstSiteIncorrect1;
+ firstSiteCorrectLoose1+=mtt.firstSiteCorrectLoose1;
+ firstSiteIncorrectLoose1+=mtt.firstSiteIncorrectLoose1;
+ firstSiteCorrectPaired1+=mtt.firstSiteCorrectPaired1;
+ firstSiteCorrectSolo1+=mtt.firstSiteCorrectSolo1;
+ firstSiteCorrectRescued1+=mtt.firstSiteCorrectRescued1;
+
+ perfectHit1+=mtt.perfectHit1; //Highest score is max score
+ perfectHitCount1+=mtt.perfectHitCount1;
+ semiPerfectHitCount1+=mtt.semiPerfectHitCount1;
+ uniqueHit1+=mtt.uniqueHit1; //Only one hit has highest score
+ correctUniqueHit1+=mtt.correctUniqueHit1; //unique highest hit on answer site
+ correctMultiHit1+=mtt.correctMultiHit1; //non-unique highest hit on answer site
+ correctLowHit1+=mtt.correctLowHit1; //hit on answer site, but not highest scorer
+ noHit1+=mtt.noHit1;
+
+ totalNumCorrect1+=mtt.totalNumCorrect1; //Skimmer only
+ totalNumIncorrect1+=mtt.totalNumIncorrect1; //Skimmer only
+ totalNumIncorrectPrior1+=mtt.totalNumIncorrectPrior1; //Skimmer only
+ totalNumCapturedAllCorrect1+=mtt.totalNumCapturedAllCorrect1; //Skimmer only
+ totalNumCapturedAllCorrectTop1+=mtt.totalNumCapturedAllCorrectTop1; //Skimmer only
+ totalNumCapturedAllCorrectOnly1+=mtt.totalNumCapturedAllCorrectOnly1; //Skimmer only
+
+ perfectMatch1+=mtt.perfectMatch1; //Highest slow score is max slow score
+ semiperfectMatch1+=mtt.semiperfectMatch1; //A semiperfect mapping was found
+
+ duplicateBestAlignment1+=mtt.ambiguousBestAlignment1;
+
+ initialSiteSum1+=mtt.initialSiteSum1;
+ postTrimSiteSum1+=mtt.postTrimSiteSum1;
+ postRescueSiteSum1+=mtt.postRescueSiteSum1;
+ siteSum1+=mtt.siteSum1;
+ topSiteSum1+=mtt.topSiteSum1;
+
+ AbstractIndex index=mtt.index();
+ callsToScore+=index.callsToScore;
+ callsToExtend+=index.callsToExtendScore;
+ initialKeys+=index.initialKeys;
+ initialKeyIterations+=index.initialKeyIterations;
+ usedKeys+=index.usedKeys;
+ usedKeyIterations+=index.usedKeyIterations;
+
+ for(int j=0; j "+falsePositive);
+ totalCorrectSites2+=mtt.totalCorrectSites2;
+
+ firstSiteCorrectP2+=mtt.firstSiteCorrectP2;
+ firstSiteCorrectM2+=mtt.firstSiteCorrectM2;
+ firstSiteIncorrect2+=mtt.firstSiteIncorrect2;
+ firstSiteCorrectLoose2+=mtt.firstSiteCorrectLoose2;
+ firstSiteIncorrectLoose2+=mtt.firstSiteIncorrectLoose2;
+ firstSiteCorrectPaired2+=mtt.firstSiteCorrectPaired2;
+ firstSiteCorrectSolo2+=mtt.firstSiteCorrectSolo2;
+ firstSiteCorrectRescued2+=mtt.firstSiteCorrectRescued2;
+
+ perfectHit2+=mtt.perfectHit2; //Highest score is max score
+ perfectHitCount2+=mtt.perfectHitCount2;
+ semiPerfectHitCount2+=mtt.semiPerfectHitCount2;
+ uniqueHit2+=mtt.uniqueHit2; //Only one hit has highest score
+ correctUniqueHit2+=mtt.correctUniqueHit2; //unique highest hit on answer site
+ correctMultiHit2+=mtt.correctMultiHit2; //non-unique highest hit on answer site
+ correctLowHit2+=mtt.correctLowHit2; //hit on answer site, but not highest scorer
+ noHit2+=mtt.noHit2;
+
+ totalNumCorrect2+=mtt.totalNumCorrect2; //Skimmer only
+ totalNumIncorrect2+=mtt.totalNumIncorrect2; //Skimmer only
+ totalNumIncorrectPrior2+=mtt.totalNumIncorrectPrior2; //Skimmer only
+ totalNumCapturedAllCorrect2+=mtt.totalNumCapturedAllCorrect2; //Skimmer only
+ totalNumCapturedAllCorrectTop2+=mtt.totalNumCapturedAllCorrectTop2; //Skimmer only
+ totalNumCapturedAllCorrectOnly2+=mtt.totalNumCapturedAllCorrectOnly2; //Skimmer only
+
+ perfectMatch2+=mtt.perfectMatch2; //Highest slow score is max slow score
+ semiperfectMatch2+=mtt.semiperfectMatch2; //A semiperfect mapping was found
+
+ duplicateBestAlignment2+=mtt.ambiguousBestAlignment2;
+
+ initialSiteSum2+=mtt.initialSiteSum2;
+ postTrimSiteSum2+=mtt.postTrimSiteSum2;
+ postRescueSiteSum2+=mtt.postRescueSiteSum2;
+ siteSum2+=mtt.siteSum2;
+ topSiteSum2+=mtt.topSiteSum2;
+
+ matchCountS2+=mtt.matchCountS2;
+ matchCountI2+=mtt.matchCountI2;
+ matchCountD2+=mtt.matchCountD2;
+ matchCountM2+=mtt.matchCountM2;
+ matchCountN2+=mtt.matchCountN2;
+
+ }
+ reads=readsUsed;
+ if(syntheticReads>0){SYNTHETIC=true;}
+
+ t.stop();
+ long nanos=t.elapsed;
+
+ if(verbose_stats>1){
+ StringBuilder sb=new StringBuilder(1000);
+ sb.append("\n\n###################\n#hits\tcount\tscore\textend\n");
+ for(int i=0; i=1){sysout.println("MSA_iterations"+DELIMITER+String.format("%.2fL + %.2fU = %.2f", milf,milu,milf+milu));}
+
+// sysout.println();
+// sysout.println("\nRead 1 data:");
+
+ sysout.println();
+
+ if(REMOVE_DUPLICATE_BEST_ALIGNMENTS){
+ double x=ambiguousFound+mappedRetainedB;
+ sysout.println("R1_Mapped_Percent"+DELIMITER+padPercentMachine(x,4)+"%");
+ sysout.println("R1_Unambiguous_Percent"+DELIMITER+padPercentMachine(mappedRetainedB,4)+"%");
+ sysout.println("R1_Mapped_Reads"+DELIMITER+mappedReads);
+ sysout.println("R1_Unambiguous_Reads"+DELIMITER+unambiguousReads);
+ }else{
+ double x=mappedRetainedB-ambiguousFound;
+ sysout.println("R1_Mapped_Percent"+DELIMITER+padPercentMachine(x,4)+"%");
+ sysout.println("R1_Unambiguous_Percent"+DELIMITER+padPercentMachine(mappedRetainedB,4)+"%");
+ sysout.println("R1_Mapped_Reads"+DELIMITER+mappedReads);
+ sysout.println("R1_Unambiguous_Reads"+DELIMITER+unambiguousReads);
+ }
+
+ sysout.println();
+ if(paired){
+ sysout.println(String.format("Mated_Pairs"+DELIMITER+"%.4f%%", matedPercent));
+ sysout.println(String.format("Bad_Pairs"+DELIMITER+"%.3f%%", badPairsPercent));
+ }
+ if(paired){
+ sysout.println(String.format("R1_Rescued"+DELIMITER+"%.3f", rescuedPB+rescuedMB)+"%");
+ sysout.println(String.format("Avg_Insert_Size"+DELIMITER+"%.2f", insertSizeAvg));
+ }
+ sysout.println();
+ sysout.println(String.format("R1_Perfect_Best_Site"+DELIMITER+"%.4f", perfectMatchPercent)+"%");
+ sysout.println(String.format("R1_Semiperfect_Site"+DELIMITER+"%.4f", semiperfectMatchPercent)+"%");
+ sysout.println(String.format("R1_Ambiguous_Mapping"+DELIMITER+"%.4f", ambiguousFound)+"%");
+// +(REMOVE_DUPLICATE_BEST_ALIGNMENTS ? " (Removed)" : " (Kept)"));
+ sysout.println(String.format("R1_Low_Quality_Discards"+DELIMITER+"%.4f", lowQualityReadsDiscardedPercent)+"%");
+
+ if(MAKE_MATCH_STRING){
+ sysout.println();
+ sysout.println("R1_Match_Rate"+DELIMITER+padPercentMachine(matchRate,4)+"%");
+ sysout.println("R1_Error_Rate"+DELIMITER+padPercentMachine(errorRate,4)+"%");
+ sysout.println("R1_Sub_Rate"+DELIMITER+padPercentMachine(subRate,4)+"%");
+ sysout.println("R1_Del_Rate"+DELIMITER+padPercentMachine(delRate,4)+"%");
+ sysout.println("R1_Ins_Rate"+DELIMITER+padPercentMachine(insRate,4)+"%");
+ sysout.println("R1_N_Rate"+DELIMITER+padPercentMachine(nRate,4)+"%");
+
+ sysout.println("R1_Match_Count"+DELIMITER+matchCountM1);
+ sysout.println("R1_Error_Count"+DELIMITER+matchErrors);
+ sysout.println("R1_Sub_Count"+DELIMITER+matchCountS1);
+ sysout.println("R1_Del_Count"+DELIMITER+matchCountD1);
+ sysout.println("R1_Ins_Count"+DELIMITER+matchCountI1);
+ sysout.println("R1_N_Count"+DELIMITER+matchCountN1);
+ }
+
+ if(paired){
+ invSites100=100d/siteSum2;
+
+ perfectHitPercent=perfectHit2*invTrials100; //Highest score is max score
+ perfectMatchPercent=perfectMatch2*invTrials100;
+ semiperfectMatchPercent=semiperfectMatch2*invTrials100;
+
+ perfectHitCountPercent=perfectHitCount2*invSites100;
+ semiPerfectHitCountPercent=semiPerfectHitCount2*invSites100;
+
+ uniqueHitPercent=uniqueHit2*invTrials100; //Only one hit has highest score
+ correctUniqueHitPercent=correctUniqueHit2*invTrials100; //unique highest hit on answer site
+ correctMultiHitPercent=correctMultiHit2*invTrials100; //non-unique highest hit on answer site
+ correctLowHitPercent=correctLowHit2*invTrials100; //hit on answer site, but not highest scorer
+ ambiguousFound=(duplicateBestAlignment2*invTrials100);
+ correctHighHitPercent=(correctMultiHit2+correctUniqueHit2)*invTrials100;
+ correctHitPercent=(correctLowHit2+correctMultiHit2+correctUniqueHit2)*invTrials100;
+
+ mappedB=(mapped2*invTrials100);
+ mappedRetainedB=(mappedRetained2*invTrials100);
+ rescuedPB=(rescuedP2*invTrials100);
+ rescuedMB=(rescuedM2*invTrials100);
+ falsePositiveB=(firstSiteIncorrect2*invTrials100);
+ falsePositiveLooseB=(firstSiteIncorrectLoose2*invTrials100);
+ truePositivePB=(firstSiteCorrectP2*invTrials100);
+ truePositiveMB=(firstSiteCorrectM2*invTrials100);
+ truePositiveStrict=((firstSiteCorrectP2+firstSiteCorrectM2)*invTrials100);
+ truePositiveLoose=(firstSiteCorrectLoose2*invTrials100);
+ snrStrict=10*Math.log10((firstSiteCorrectM2+firstSiteCorrectP2+0.1)/(firstSiteIncorrect2+0.1));
+ snrLoose=10*Math.log10((firstSiteCorrectLoose2+0.1)/(firstSiteIncorrectLoose2+0.1));
+ truePositivePMRatio=(truePositivePB/truePositiveMB);
+ truePositivePairedB=(firstSiteCorrectPaired2*100d/numMated);
+ truePositiveSoloB=(firstSiteCorrectSolo2*100d/(mappedRetained2-numMated));
+ truePositiveRescuedB=(firstSiteCorrectRescued2*100d/(rescuedP2+rescuedM2));
+ avgNumCorrect=(totalCorrectSites2/(1d*(truePositiveP2+truePositiveM2)));
+ noHitPercent=noHit2*invTrials100;
+
+ avgNumCorrect=(SKIMMER ? totalNumCorrect2*invTrials : (totalCorrectSites2/(1d*(truePositiveP2+truePositiveM2))));
+ avgNumIncorrect=totalNumIncorrect1*invTrials; //Skimmer only
+ avgNumIncorrectPrior=totalNumIncorrectPrior1*invTrials; //Skimmer only
+
+ rateCapturedAllCorrect=totalNumCapturedAllCorrect2*invTrials100; //Skimmer only
+ rateCapturedAllTop=totalNumCapturedAllCorrectTop2*invTrials100; //Skimmer only
+ rateCapturedAllOnly=totalNumCapturedAllCorrectOnly2*invTrials100; //Skimmer only
+
+ if(REMOVE_DUPLICATE_BEST_ALIGNMENTS){
+ mappedReads=mappedRetained2+duplicateBestAlignment2;
+ unambiguousReads=mappedRetained2;
+ }else{
+ mappedReads=mappedRetained2;
+ unambiguousReads=mappedRetained2-duplicateBestAlignment2;
+ }
+
+ avgInitialSites=initialSiteSum2*invTrials;
+ avgPostTrimSites=postTrimSiteSum2*invTrials;
+ avgPostRescueSites=postRescueSiteSum2*invTrials;
+ avgSites=siteSum2*invTrials;
+ avgPerfectSites=(perfectHitCount1*invTrials);
+ avgSemiPerfectSites=(semiPerfectHitCount1*invTrials);
+ avgTopSites=topSiteSum2*invTrials;
+ lowQualityReadsDiscardedPercent=lowQualityReadsDiscarded2*invTrials100;
+
+ matchErrors=matchCountS2+matchCountI2+matchCountD2;
+ baseLen=matchCountM2+matchCountI2+matchCountS2+matchCountN2;
+ matchLen=matchCountM2+matchCountI2+matchCountS2+matchCountN2+matchCountD2;
+ refLen=matchCountM2+matchCountS2+matchCountN2+matchCountD2;
+ errorRate=matchErrors*100d/matchLen;
+ matchRate=matchCountM2*100d/matchLen;//baseLen;
+ subRate=matchCountS2*100d/matchLen;//baseLen;
+ delRate=matchCountD2*100d/matchLen;
+ insRate=matchCountI2*100d/matchLen;//baseLen;
+ nRate=matchCountN2*100d/matchLen;//baseLen;
+
+// sysout.println("\n\nRead 2 data:");
+ sysout.println();
+// sysout.println(String.format("perfectHit"+DELIMITER+"%.2f", perfectHitPercent)+"%");
+// sysout.println(String.format("uniqueHit"+DELIMITER+"%.2f", uniqueHitPercent)+"%");
+// sysout.println(String.format("correctUniqueHit"+DELIMITER+"%.2f", correctUniqueHitPercent)+"%");
+// sysout.println(String.format("correctMultiHit"+DELIMITER+"%.2f", correctMultiHitPercent)+"%");
+// sysout.println(String.format("correctHighHit"+DELIMITER+"%.2f", correctHighHitPercent)+"%");
+// sysout.println(String.format("correctHit"+DELIMITER+"%.2f", correctHitPercent)+"%");
+
+ //sysout.println(String.format("mapped"+DELIMITER+(mappedB<10?" ":"")+"%.3f", mappedB)+"%");
+ if(REMOVE_DUPLICATE_BEST_ALIGNMENTS){
+ double x=ambiguousFound+mappedRetainedB;
+ sysout.println("R2_Mapped_Percent"+DELIMITER+padPercentMachine(x,4)+"%");
+ sysout.println("R2_Unambiguous_Percent"+DELIMITER+padPercentMachine(mappedRetainedB,4)+"%");
+ sysout.println("R2_Mapped_Reads"+DELIMITER+mappedReads);
+ sysout.println("R2_Unambiguous_Reads"+DELIMITER+unambiguousReads);
+ }else{
+ double x=mappedRetainedB-ambiguousFound;
+ sysout.println("R2_Mapped_Percent"+DELIMITER+padPercentMachine(x,4)+"%");
+ sysout.println("R2_Unambiguous_Percent"+DELIMITER+padPercentMachine(mappedRetainedB,4)+"%");
+ sysout.println("R2_Mapped_Reads"+DELIMITER+mappedReads);
+ sysout.println("R2_Unambiguous_Reads"+DELIMITER+unambiguousReads);
+ }
+ sysout.println();
+ if(paired){
+ sysout.println(String.format("R2_Rescued"+DELIMITER+"%.3f", rescuedPB+rescuedMB)+"%");
+ }
+ sysout.println();
+ sysout.println(String.format("R2_Perfect_Best_Site"+DELIMITER+"%.4f", perfectMatchPercent)+"%");
+ sysout.println(String.format("R2_Semiperfect_Site"+DELIMITER+"%.4f", semiperfectMatchPercent)+"%");
+ sysout.println(String.format("R2_Ambiguous_Mapping"+DELIMITER+"%.4f", ambiguousFound)+"%");
+ //(REMOVE_DUPLICATE_BEST_ALIGNMENTS ? "(Removed)" : "(Kept)"));
+ sysout.println(String.format("R2_Low_Quality_Discards"+DELIMITER+"%.4f", lowQualityReadsDiscardedPercent)+"%");
+
+ if(MAKE_MATCH_STRING){
+ sysout.println();
+ sysout.println("R2_Match_Rate"+DELIMITER+padPercentMachine(matchRate,4)+"%");
+ sysout.println("R2_Error_Rate"+DELIMITER+padPercentMachine(errorRate,4)+"%");
+ sysout.println("R2_Sub_Rate"+DELIMITER+padPercentMachine(subRate,4)+"%");
+ sysout.println("R2_Del_Rate"+DELIMITER+padPercentMachine(delRate,4)+"%");
+ sysout.println("R2_Ins_Rate"+DELIMITER+padPercentMachine(insRate,4)+"%");
+ sysout.println("R2_N_Rate"+DELIMITER+padPercentMachine(nRate,4)+"%");
+
+ sysout.println("R2_Match_Count"+DELIMITER+matchCountM2);
+ sysout.println("R2_Error_Count"+DELIMITER+matchErrors);
+ sysout.println("R2_Sub_Count"+DELIMITER+matchCountS2);
+ sysout.println("R2_Del_Count"+DELIMITER+matchCountD2);
+ sysout.println("R2_Ins_Count"+DELIMITER+matchCountI2);
+ sysout.println("R2_N_Count"+DELIMITER+matchCountN2);
+ }
+ }
+
+ if(BBSplitter.TRACK_SCAF_STATS){
+ BBSplitter.printCounts(BBSplitter.SCAF_STATS_FILE, BBSplitter.scafCountTable, true, readsUsed+readsUsed2);
+ }
+
+ if(BBSplitter.TRACK_SET_STATS){
+ BBSplitter.printCounts(BBSplitter.SET_STATS_FILE, BBSplitter.setCountTable, true, readsUsed+readsUsed2);
+ }
+
+ if(ReadStats.COLLECT_QUALITY_STATS || ReadStats.COLLECT_MATCH_STATS || ReadStats.COLLECT_INSERT_STATS){
+ ReadStats rs=ReadStats.mergeAll();
+ if(ReadStats.QUAL_HIST_FILE!=null){rs.writeQualityToFile(ReadStats.QUAL_HIST_FILE, paired);}
+ if(ReadStats.MATCH_HIST_FILE!=null){rs.writeMatchToFile(ReadStats.MATCH_HIST_FILE, paired);}
+ if(ReadStats.INSERT_HIST_FILE!=null){rs.writeInsertToFile(ReadStats.INSERT_HIST_FILE);}
+ }
+
+ assert(!CALC_STATISTICS || truePositiveP1+truePositiveM1+falsePositive1+noHit1+lowQualityReadsDiscarded1==reads) :
+ "\nThe number of reads out does not add up to the number of reads in.\nThis may indicate that a mapping thread crashed.\n"+
+ truePositiveP1+"+"+truePositiveM1+"+"+falsePositive1+"+"+noHit1+"+"+lowQualityReadsDiscarded1+" = "+
+ (truePositiveP1+truePositiveM1+falsePositive1+noHit1+lowQualityReadsDiscarded1)+" != "+reads;
+ if(!SKIMMER){
+ assert(!CALC_STATISTICS || truePositiveP1+truePositiveM1==correctLowHit1+correctMultiHit1+correctUniqueHit1);
+ }else{
+ assert(!CALC_STATISTICS || truePositiveP1+truePositiveM1==correctLowHit1+correctUniqueHit1);
+ }
+ }
+
+ static final void printSettings0(int k, int maxindel, float minratio){
+ if(MACHINE_OUTPUT){
+ sysout.println("Genome"+DELIMITER+Data.GENOME_BUILD);
+ sysout.println("Key_Length"+DELIMITER+k);
+ sysout.println("Max_Indel"+DELIMITER+maxindel);
+ sysout.println("Minimum_Score_Ratio"+DELIMITER+minratio);
+ sysout.println("Mapping_Mode"+DELIMITER+(PERFECTMODE ? "perfect" : SEMIPERFECTMODE ? "semiperfect" : "normal"));
+ }else{
+ sysout.println("Genome: \t"+Data.GENOME_BUILD);
+ sysout.println("Key Length: \t"+k);
+ sysout.println("Max Indel: \t"+maxindel);
+ sysout.println("Minimum Score Ratio: \t"+minratio);
+ sysout.println("Mapping Mode: \t"+(PERFECTMODE ? "perfect" : SEMIPERFECTMODE ? "semiperfect" : "normal"));
+ }
+ }
+
+
+ static final int absdif(int a, int b){
+ return a>b ? a-b : b-a;
+ }
+
+ /* ------------ Non-static fields ----------- */
+
+
+ ConcurrentReadStreamInterface cris;
+ RTextOutputStream3 rosA=null, rosM=null, rosU=null, rosB=null;
+
+ float fractionGenomeToExclude=-1;
+ int maxIndel1=-1;
+ int maxIndel2=-1;
+ int minApproxHits=-1;
+ int expectedSites=-1;
+ int ambigMode=AMBIG_BEST;
+// int ambigMode2=AMBIG_BEST;
+ boolean fast=false;
+ boolean slow=false;
+ boolean verbose=false;
+ boolean rcompMate=false;
+ boolean outputSitesOnly=false;
+ long targetGenomeSize=-1;
+ int ziplevel=-1;
+ int build=1;
+ String reference=null;
+ int keylen=13;
+ int idmodulo=1;
+ float samplerate=1f;
+ double minid=-1;
+ long sampleseed=1;
+ boolean ambiguousRandom=false, ambiguousAll=false;
+ boolean forceanalyze=false;
+ boolean gunzip=false;
+ boolean gzip=false;
+ boolean pigz=true;
+ boolean unpigz=ReadWrite.USE_UNPIGZ;
+ boolean setxs=false, setintron=false;
+ String bamscript=null;
+ String in1=null, in2=null;
+ String qfout=null, qfout2=null, qfoutM=null, qfoutM2=null, qfoutU=null, qfoutU2=null, qfoutB=null, qfoutB2=null;
+
+
+
+ /** Scores below the (max possible alignment score)*(MINIMUM_ALIGNMENT_SCORE_RATIO) will be discarded.
+ * Default: 0.4 ~ 0.5 for clean data against raw PacBio data.
+ * Very sensitive! A value of 0.2 will potentially produce many false positives. */
+ float MINIMUM_ALIGNMENT_SCORE_RATIO;
+
+ float keyDensity;//Normal key density
+ float maxKeyDensity; //For situations where some of the read is too low quality, this is the max for the rest of the read.
+ float minKeyDensity;
+ int maxDesiredKeys; //Don't go above this number of keys except to maintain minKeyDensity.
+
+ /** Additional ref bases on each end of site mapping location in alignment window.
+ * If there are no insertions or deletions, 0 is fine. */
+ int SLOW_ALIGN_PADDING;
+ int SLOW_RESCUE_PADDING;
+ int TIP_SEARCH_DIST;
+
+ /** Class name of MSA to use */
+ String MSA_TYPE;
+ int MAX_SITESCORES_TO_PRINT;
+ boolean PRINT_SECONDARY_ALIGNMENTS;
+
+
+ /* ------------ Static fields ----------- */
+
+ static final int AMBIG_BEST=0;
+ static final int AMBIG_TOSS=1;
+ static final int AMBIG_RANDOM=2;
+ static final int AMBIG_ALL=3;
+
+ static int THRESH=0; //Threshold for calculating true positives on synthetic data, or something.
+
+ static int readlen=100;
+
+ static int maxInsLen=40; //Default 40
+ static int maxSubLen=40; //Default 40
+ static int maxDelLen=40; //Default 8000
+
+ static byte minQuality=3;
+ static byte midQuality=23;
+ static byte maxQuality=35;
+
+ static int maxSnps=3;//4;
+ static int maxInss=3;//2;
+ static int maxDels=3;
+ static int maxSubs=3;//2;
+
+ static float baseSnpRate=0.25f;
+ static float baseInsRate=0.25f;
+ static float baseDelRate=0.25f;
+ static float baseSubRate=0.25f;//0.3f;
+ static float PERFECT_READ_RATIO=0.0f;//0.2f;//0.8f
+
+ //Extra work for rare cases in human only.
+ static boolean SAVE_AMBIGUOUS_XY=false;
+
+ static boolean colorspace=false;
+
+ static boolean translateToBaseSpace=false; //Translate (colorspace) reads before outputting them
+
+
+ static boolean TRIM_LIST=true; //Increases speed many times; reduces accuracy a bit
+
+ static boolean PAIRED_RANDOM_READS=false;
+ static boolean REQUIRE_CORRECT_STRANDS_PAIRS=true;
+ static boolean SAME_STRAND_PAIRS=false;
+ static boolean KILL_BAD_PAIRS=false;
+
+ static final boolean SLOW_ALIGN=true; //Do a more accurate scoring pass with MSA
+ static boolean MAKE_MATCH_STRING=SLOW_ALIGN;
+
+ /** Rescue paired reads by searching near mate */
+ static boolean RESCUE=true;
+
+ /** Generally should be set to false unless SLOW_ALIGN==true */
+ static boolean REMOVE_DUPLICATE_BEST_ALIGNMENTS=false;
+
+ /** Forbid alignments with indels longer than MAX_INDEL */
+ static boolean STRICT_MAX_INDEL=false;
+ /** Don't allow reads to map to their origin location in the reference. Useful for self-correcting reads. */
+ static boolean FORBID_SELF_MAPPING=false;
+ /** Only allow perfect and semiperfect mappings */
+ static boolean SEMIPERFECTMODE=false;
+ /** Only allow perfect mappings */
+ static boolean PERFECTMODE=false;
+ /** Only allow sites with at least this many contiguous matches */
+ static int KFILTER=-1;
+
+ /** Quality-trim left side of read before mapping */
+ static boolean TRIM_LEFT=false;
+ /** Quality-trim right side of read before mapping */
+ static boolean TRIM_RIGHT=false;
+ /** Restore read to untrimmed state after mapping (and destroy match string) */
+ static boolean UNTRIM=false;
+ /** Trim bases with quality less than or equal to this value */
+ static byte TRIM_QUALITY=7;
+ /** Produce local alignments instead of global alignments */
+ static boolean LOCAL_ALIGN=false;
+
+ static int minChrom=1;
+ static int maxChrom=Integer.MAX_VALUE;
+
+ static long reads=-1;
+ static long readsUsed=0;
+ static long readsUsed2=0;
+ static long lowQualityReadsDiscarded1=0;
+ static long lowQualityReadsDiscarded2=0;
+
+ protected static boolean CALC_STATISTICS=true;
+
+ static boolean QUICK_MATCH_STRINGS=false;
+ static boolean OUTPUT_READS=false;
+ static boolean DONT_OUTPUT_UNMAPPED_READS=false;
+ static boolean DONT_OUTPUT_BLACKLISTED_READS=false;
+
+ static boolean OUTPUT_ORDERED_READS=false;
+ static boolean DOUBLE_PRINT_ERROR_RATE=false;
+
+ static String outputBaseName="readsOut_"+(System.nanoTime()&0x1FFFF);
+ static String outFile=null;//outputBaseName+"_1.txt";
+ static String outFile2=null;//outputBaseName+"_2.txt";
+ static String outFileM=null;//outputBaseName+"_mapped_1.txt";
+ static String outFileM2=null;//outputBaseName+"_mapped_2.txt";
+ static String outFileU=null;//outputBaseName+"_unmapped_1.txt";
+ static String outFileU2=null;//outputBaseName+"_unmapped_2.txt";
+ static String outFileB=null;//outputBaseName+"_blacklist_1.txt";
+ static String outFileB2=null;//outputBaseName+"_blacklist_2.txt";
+ static ArrayList blacklist=null;
+
+ static boolean useRandomReads=false;
+ static int sequentialOverlap=5;
+ static boolean sequentialStrandAlt=false;
+
+ static boolean OVERWRITE=false;
+ static boolean SYNTHETIC=false;
+ static boolean ERROR_ON_NO_OUTPUT=false;
+ static boolean MACHINE_OUTPUT=false;
+ final static String DELIMITER="=";
+
+ static PrintStream sysout=System.err;
+ static boolean SYSIN=false;
+ static int verbose_stats=0;
+ static boolean waitForMemoryClear=false;
+
+ public static boolean errorState=false;
+
+}
diff --git a/current/align2/BBIndex.java b/current/align2/BBIndex.java
new file mode 100755
index 0000000..fd8cb05
--- /dev/null
+++ b/current/align2/BBIndex.java
@@ -0,0 +1,3296 @@
+package align2;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import stream.SiteScore;
+
+import dna.AminoAcid;
+import dna.Data;
+import dna.Gene;
+
+
+/**
+ * Based on Index11f
+ * Index stored in single array per block.
+ *
+ *
+ *
+ * @author Brian Bushnell
+ * @date Dec 22, 2012
+ *
+ */
+public final class BBIndex extends AbstractIndex {
+
+
+ public static void main(String[] args){
+
+ int k=13;
+
+ for(int i=0; iData.numChroms){maxChrom=Data.numChroms;}
+ assert(minChrom<=maxChrom);
+ Data.sysout.println("Loading index for chrom "+minChrom+"-"+maxChrom+", genome "+Data.GENOME_BUILD);
+ index=IndexMaker4.makeIndex(Data.GENOME_BUILD, minChrom, maxChrom,
+ k, NUM_CHROM_BITS, MAX_ALLOWED_CHROM_INDEX, CHROM_MASK_LOW, CHROM_MASK_HIGH, SITE_MASK, SHIFT_LENGTH, COLORSPACE, writeToDisk, diskInvalid, index);
+ }
+
+ /** Calculate statistics of index, such as list lengths, and find clumpy keys */
+ public static final synchronized void analyzeIndex(int minChrom, int maxChrom, boolean cs, float fractionToExclude, int k){
+ assert(!cs) : "Re-enable old reverse complement mode.";
+ assert(lengthHistogram==null);
+ assert(COUNTS==null);
+
+ int KEYSPACE=1<<(2*k);
+ COUNTS=new int[KEYSPACE];
+ maxChrom=maxChrom(maxChrom);
+
+ HashMap cmap=new HashMap();
+
+ for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+ Block b=index[chrom];
+ final int[] sites=b.sites;
+ final int[] starts=b.starts;
+
+ for(int key=0; key0 && dif<=CLUMPY_MAX_DIST){
+ clumps++;
+ }
+ }
+ if(clumps>0){
+ final int x=Tools.min(key, AminoAcid.reverseComplementBinaryFast(key, k));
+ final Integer ko=x;
+ LongM lm=cmap.get(ko);
+ if(lm==null){
+ lm=new LongM(0);
+ cmap.put(ko, lm);
+ }
+ lm.increment(clumps);
+ }
+ }
+ }
+ }
+
+ for(int key=0; keyCLUMPY_MIN_LENGTH_INDEX && clumps>CLUMPY_FRACTION*len)/* || (len>8*CLUMPY_MIN_LENGTH_INDEX && clumps>.75f*CLUMPY_FRACTION*len)*/){
+ int rkey=AminoAcid.reverseComplementBinaryFast(key, k);
+ assert(key<=rkey);
+ assert(key==KeyRing.reverseComplementKey(rkey, k, cs));
+ COUNTS[key]=0;
+ COUNTS[rkey]=0;
+ }
+ }
+ }
+
+ lengthHistogram=Tools.makeLengthHistogram3(COUNTS, 1000, verbose2);
+
+ if(verbose2){System.err.println("lengthHistogram: "+Arrays.toString(lengthHistogram));}
+
+ if(REMOVE_FREQUENT_GENOME_FRACTION){
+
+ int lengthLimitIndex=(int)((1-fractionToExclude)*(lengthHistogram.length-1));
+ int lengthLimitIndex2=(int)((1-fractionToExclude*DOUBLE_SEARCH_THRESH_MULT)*(lengthHistogram.length-1));
+
+ MAX_USABLE_LENGTH=Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex]);
+ MAX_USABLE_LENGTH2=Tools.max(6*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex2]);
+
+ if(verbose2){System.err.println("MAX_USABLE_LENGTH: "+MAX_USABLE_LENGTH+"\nMAX_USABLE_LENGTH2: "+MAX_USABLE_LENGTH2);}
+ }
+
+ Solver.POINTS_PER_SITE=(int)Math.floor((Solver.BASE_POINTS_PER_SITE*4000f)/Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH]));
+ if(Solver.POINTS_PER_SITE==0){Solver.POINTS_PER_SITE=-1;}
+ if(verbose2){System.err.println("POINTS_PER_SITE: "+Solver.POINTS_PER_SITE);}
+ assert(Solver.POINTS_PER_SITE<0) : Solver.POINTS_PER_SITE;
+ }
+
+
+ /** Returns the filename for the block holding this chrom */
+ public static final String fname(int chrom, int k){
+ return IndexMaker4.fname(minChrom(chrom), maxChrom(chrom), k, NUM_CHROM_BITS, COLORSPACE);
+ }
+
+ /** Ensure key offsets are strictly ascending. */
+ private static boolean checkOffsets(int[] offsets){
+ for(int i=1; i0){
+ if(x=shortest);
+ if(initialHitCountlimit3){
+ for(int i=0; ilimit || sum/finalHitCount>limit2; i--){
+ Pointer p=ptrs[i];
+ sum-=hits[p.key].length;
+ hits[p.key]=null;
+ finalHitCount--;
+ }
+
+ return finalHitCount;
+ }
+
+ /** Remove least useful keys to accelerate search */
+ private final int trimExcessHitListsByGreedy(int[] offsets, int[] keyScores, int maxHitLists, int[] keys){
+
+ float[] keyWeights=getKeyWeightArray(keyScores.length);
+ for(int i=0; ilimitS){hits[i]=null;}
+// }
+
+ final int[] lengths=getGenericArray(keys.length);
+
+ for(int i=0; i0){
+ if(x=shortest);
+ if(initialHitCountlimit3){
+ for(int i=0; i=MIN_APPROX_HITS_TO_KEEP && (sum>limit || sum/initialHitCount>limit2 || hitsCount>maxHitLists/* || worstValue<0*/)){
+ final int[] lists=getGreedyListArray(hitsCount);
+ for(int i=0, j=0; j0){
+ lists[j]=i;
+ j++;
+ }
+ }
+
+ Solver.findWorstGreedy(offsets, lengths, keyWeights, KEYLEN, lists, greedyReturn);
+ int worstIndex=greedyReturn[0];
+ int worst=lists[worstIndex];
+ worstValue=greedyReturn[1];
+ sum-=lengths[worst];
+
+// if(worstValue>0 && (hitsCount<=maxHitLists || lengths[worst]0 || lengths[worst]=0){
+ final int len=count(key);
+ if(len>0 && len0){
+ starts[i]=b.starts[key];
+ stops[i]=starts[i]+len2;
+ numHits++;
+ }
+ }
+ }
+ }
+ return numHits;
+ }
+
+
+ private final int countHits(final int[] keys, final int maxLen, boolean clearBadKeys){
+ int numHits=0;
+ for(int i=0; i=0){
+ final int len=count(key);
+ if(len>0 && len findAdvanced(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsets, long id){
+ assert(minChrom<=maxChrom && minChrom>=0);
+ ArrayList result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, true, id);
+ if(DOUBLE_SEARCH_NO_HIT && (result==null || result.isEmpty())){result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, false, id);}
+
+ return result;
+ }
+
+
+ public final ArrayList find(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsetsP, boolean obeyLimits, long id){
+
+ assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
+ final int[] keysOriginal=KeyRing.makeKeys(basesP, offsetsP, KEYLEN, COLORSPACE);
+ int[] keysP=Arrays.copyOf(keysOriginal, keysOriginal.length);
+
+ initialKeys+=offsetsP.length;
+ initialKeyIterations++;
+
+ final int maxLen=(obeyLimits ? MAX_USABLE_LENGTH : MAX_USABLE_LENGTH2);
+
+ int numHits=0;
+ numHits=countHits(keysP, maxLen, true);
+ if(numHits>0){ //TODO: Change these to higher numbers
+ int trigger=(3*keysP.length)/4;
+ if(numHits<4 && numHitsMIN_APPROX_HITS_TO_KEEP){
+ int cutoffIndex=((int) (HIT_FRACTION_TO_RETAIN*(keysP.length)-0.01f))+(keysP.length-numHits);
+
+ int zeroes=keysP.length-numHits;
+ int altMinIndex=(zeroes+(MIN_HIT_LISTS_TO_RETAIN-1));
+ cutoffIndex=Tools.max(cutoffIndex, altMinIndex);
+
+ assert(cutoffIndex>0) : cutoffIndex+"\n"+numHits;
+
+ if(cutoffIndex<(keysP.length-1)){
+ int[] lens=getGenericArray(keysP.length);
+ for(int i=0; icutoff){
+ keysP[i]=-1;
+ removed++;
+ numHits--;
+ }
+ }
+ }
+ }
+// assert(checkOffsets(offsets)) : Arrays.toString(offsets);
+
+ final ArrayList result=new ArrayList(8);
+ if(numHits\n"+Arrays.toString(offsetsM));
+ }
+ final int[] keysM=(COLORSPACE ? KeyRing.makeKeys(basesM, offsetsM, KEYLEN, COLORSPACE) : KeyRing.reverseComplementKeys(keysP, KEYLEN, COLORSPACE));
+
+// assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
+// assert(checkOffsets(offsetsM)) : Arrays.toString(offsetsM);
+
+ assert(!USE_EXTENDED_SCORE || (baseScoresP!=null && (qual==null || baseScoresP.length==qual.length)));
+ assert(keyScoresP!=null);
+ assert(keyScoresP.length==offsetsP.length) : keyScoresP.length+", "+offsetsP.length+", "+Arrays.toString(keyScoresP);
+ final byte[] baseScoresM=Tools.reverseAndCopy(baseScoresP, getBaseScoreArray(baseScoresP.length, 1));
+ final int[] keyScoresM=Tools.reverseAndCopy(keyScoresP, getKeyScoreArray(keyScoresP.length, 1));
+ final int maxQuickScore=maxQuickScore(offsetsP, keyScoresP);
+
+ assert(offsetsM.length==offsetsP.length);
+ assert(maxQuickScore==maxQuickScore(offsetsM, keyScoresM));
+
+ /*
+ * bestScores:
+ *
+ * bestScores[0] currentTopScore
+ * bestScores[1] maxHits
+ * bestScores[2] qcutoff
+ * bestScores[3] bestqscore
+ * bestScores[4] maxQuickScore
+ * bestScores[5] perfectsFound
+ */
+ final int[] bestScores=new int[6];
+
+ //This prevents filtering by qscore when a low-quality read only uses a few keys.
+ //In that case, extending is more important.
+ final boolean prescan_qscore=(PRESCAN_QSCORE && numHits>=5);
+
+ int[][] prescanResults=null;
+ int[] precounts=null;
+ int[] prescores=null;
+
+ int hitsCutoff=0;
+ int qscoreCutoff=(int)(MIN_QSCORE_MULT*maxQuickScore);
+
+ boolean allBasesCovered=true;
+ {
+ if(offsetsP[0]!=0){allBasesCovered=false;}
+ else if(offsetsP[offsetsP.length-1]!=(basesP.length-KEYLEN)){allBasesCovered=false;}
+ else{
+ for(int i=1; ioffsetsP[i-1]+KEYLEN){
+ allBasesCovered=false;
+ break;
+ }
+ }
+ }
+ }
+
+ //TODO I don't understand this logic
+ final boolean pretendAllBasesAreCovered=//false;
+ (allBasesCovered ||
+ keysP.length>=keysOriginal.length-4 ||
+ (keysP.length>=9 && (offsetsP[offsetsP.length-1]-offsetsP[0]+KEYLEN)>Tools.max(40, (int)(basesP.length*.75f))));
+
+// System.err.println(allBasesCovered+"\t"+Arrays.toString(offsetsP));
+// assert(allBasesCovered);
+
+ if(prescan_qscore){
+ prescanResults=prescanAllBlocks(bestScores,
+ keysP, keyScoresP, offsetsP,
+ keysM, keyScoresM, offsetsM,
+ pretendAllBasesAreCovered);
+
+ if(prescanResults!=null){
+ precounts=prescanResults[0];
+ prescores=prescanResults[1];
+ }
+
+ if(bestScores[1]=maxQuickScore && pretendAllBasesAreCovered){
+ assert(bestScores[3]==maxQuickScore);
+ assert(bestScores[1]==numHits);
+
+ hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, true);
+ qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*DYNAMIC_QSCORE_THRESH_PERFECT));
+ }else{
+ hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, false);
+ qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*PRESCAN_QSCORE_THRESH));
+ }
+ }
+
+ final int maxScore=maxScore(offsetsP, baseScoresP, keyScoresP, basesP.length, true);
+ final boolean fullyDefined=AminoAcid.isFullyDefined(basesP);
+ assert(bestScores[2]<=0) : Arrays.toString(bestScores);
+
+ int cycle=0;
+ for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+ if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){
+ find(keysP, basesP, baseScoresP, keyScoresP, chrom, Gene.PLUS,
+ offsetsP, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined);
+ }
+ if(QUIT_AFTER_TWO_PERFECTS){
+ if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human
+ }
+ cycle++;
+ if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){
+ find(keysM, basesM, baseScoresM, keyScoresM, chrom, Gene.MINUS,
+ offsetsM, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined);
+ }
+ if(QUIT_AFTER_TWO_PERFECTS){
+ if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human
+ }
+ cycle++;
+ }
+
+// assert(Read.CHECKSITES(result, basesP, basesM, id)); //TODO: Comment out once checked
+
+ return result;
+ }
+
+ /** Search blocks rapidly to find max hits, and perfect sites. May indicate some blocks can be skipped. */
+ private final int[][] prescanAllBlocks(int[] bestScores,
+ int[] keysP, int[] keyScoresP, int[] offsetsP,
+ int[] keysM, int[] keyScoresM, int[] offsetsM,
+ final boolean allBasesCovered){
+
+ int[][][] pm=new int[][][] {{keysP, keyScoresP, offsetsP}, {keysM, keyScoresM, offsetsM}};
+
+ int bestqscore=0;
+ int maxHits=0;
+ int minHitsToScore=MIN_APPROX_HITS_TO_KEEP;
+
+ final int maxQuickScore=maxQuickScore(offsetsP, keyScoresP);
+
+ final int[] counts=precountArray;
+ final int[] scores=prescoreArray;
+ final int[][] ret=prescanReturn;
+ Arrays.fill(counts, keysP.length);
+ Arrays.fill(scores, maxQuickScore);
+ ret[0]=counts;
+ ret[1]=scores;
+
+ int cycle=0;
+ for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+ final int baseChrom=baseChrom(chrom);
+ for(int pmi=0; pmi<2; pmi++, cycle++){
+
+ int[] keys=pm[pmi][0];
+ int[] keyScores=pm[pmi][1];
+ int[] offsets=pm[pmi][2];
+// int[][] hits=getHitArray(offsets.length);
+
+ int[] starts=startArray;
+ int[] stops=stopArray;
+ int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops);
+
+ if(numHits=maxQuickScore && allBasesCovered);
+
+ scores[cycle]=temp[0];
+ counts[cycle]=temp[1];
+
+ bestqscore=Tools.max(temp[0], bestqscore);
+ maxHits=Tools.max(maxHits, temp[1]);
+ if(bestqscore>=maxQuickScore && allBasesCovered){
+ assert(bestqscore==maxQuickScore);
+ assert(maxHits==keysP.length) :
+ "\nTemp: \t"+Arrays.toString(temp)+", cycle="+cycle+"\n" +
+ "Scores: \t"+Arrays.toString(scores)+
+ "Counts: \t"+Arrays.toString(counts)+
+ "bestqscore: \t"+bestqscore+
+ "maxHits: \t"+maxHits+
+ "maxQuickScore: \t"+maxQuickScore+
+ "numHits: \t"+numHits+
+ "minHitsToScore: \t"+minHitsToScore+
+ "keys.length: \t"+keys.length;
+
+ minHitsToScore=Tools.max(minHitsToScore, maxHits);
+
+ {
+ //This early exit is optional. Does not seem to impact speed much either way.
+ bestScores[1]=Tools.max(bestScores[1], maxHits);
+ bestScores[3]=Tools.max(bestScores[3], bestqscore);
+ return ret;
+ }
+ }
+ }
+ }
+ }
+
+ bestScores[1]=Tools.max(bestScores[1], maxHits);
+ bestScores[3]=Tools.max(bestScores[3], bestqscore);
+
+ if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
+
+ return ret;
+ }
+
+
+ /** Search a single block and strand */
+ public final ArrayList find(int[] keys, final byte[] bases, final byte[] baseScores, int[] keyScores,
+ final int chrom, final byte strand,
+ int[] offsets, final boolean obeyLimits, ArrayList ssl, int[] bestScores,
+ final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
+
+ assert(checkOffsets(offsets)) : Arrays.toString(offsets);
+
+ //Index of first location of each key
+ int[] starts=startArray;
+ //Index of first location of next key (i.e., (last location of key)+1)
+ int[] stops=stopArray;
+
+ int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops);
+ if(numHits=0){numHits++;}
+ }
+
+ if(numHits==offsets.length){
+ return null;
+ }else{
+ int[][] r=shrinkReturn3;
+ int[] starts2=startArray;
+ int[] stops2=stopArray;
+ int[] offsets2=getOffsetArray(numHits);
+ int[] keyScores2=new int[numHits];
+
+ for(int i=0, j=0; i=0){
+ starts2[j]=starts[i];
+ stops2[j]=stops[i];
+ offsets2[j]=offsets[i];
+ keyScores2[j]=keyScores[i];
+ j++;
+ }
+ }
+ r[0]=starts2;
+ r[1]=stops2;
+ r[2]=offsets2;
+ r[4]=keyScores2;
+ return r;
+ }
+ }
+
+ /** Removes "-1" keys. */
+ private final int[][] shrink2(int[] offsets, int[] keys, int[] keyScores){
+
+
+ int numHits=0;
+ for(int i=0; i=0){numHits++;}
+ }
+
+
+ assert(checkOffsets(offsets)) : Arrays.toString(offsets);
+ if(numHits==keys.length){
+ return null;
+ }else{
+ int[][] r=shrinkReturn2;
+ int[] offsets2=getOffsetArray(numHits);
+ assert(offsets2!=offsets);
+ assert(offsets2.length=0){
+ offsets2[j]=offsets[i];
+ keys2[j]=keys[i];
+ keyScores2[j]=keyScores[i];
+ j++;
+ }
+ }
+ assert(checkOffsets(offsets2)) : "\nnumHits="+numHits+"\n"+Arrays.toString(offsets)+" -> \n"+Arrays.toString(offsets2)+"\n"+
+ "\n"+Arrays.toString(keys)+" -> \n"+Arrays.toString(keys2)+"\n";
+ r[0]=offsets2;
+ r[1]=keys2;
+ r[2]=keyScores2;
+ return r;
+ }
+ }
+
+
+ /** This uses a heap to track next column to increment */
+ private final ArrayList slowWalk2(int[] starts, int[] stops, final byte[] bases,
+ final byte[] baseScores, int[] keyScores, int[] offsets,
+ final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList ssl, final boolean fullyDefined){
+
+ if(SHRINK_BEFORE_WALK){
+ int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
+ if(r!=null){
+ starts=r[0];
+ stops=r[1];
+ offsets=r[2];
+ keyScores=r[4];
+ }
+ }
+
+ final int numHits=offsets.length; //After shrink
+
+ assert(numHits==offsets.length);
+ assert(numHits==keyScores.length);
+
+ assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
+
+ //This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
+ final int maxScore=maxScore(offsets, baseScores, keyScores, bases.length, true);
+// final int maxQuickScore=(!USE_EXTENDED_SCORE ? maxScore : maxQuickScore(offsets));
+// System.err.println("maxScore = "+maxScore);
+
+// final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
+ final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
+// final int minQuickScore=(!USE_EXTENDED_SCORE ? minScore : (int)(maxQuickScore*0.15f));
+// final int minScore=(int)(MIN_SCORE_MULT*maxScore);
+// System.err.println("minScore = "+minScore);
+
+ final int baseChrom=baseChrom(baseChrom_);
+
+
+ heap.clear();
+ final Quad[] triples=tripleStorage;
+
+ final Block b=index[baseChrom];
+ final int[] values=valueArray;
+ final int[] sizes=sizeArray;
+ final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
+
+ for(int i=0; i0);
+
+ int a=sites[start];
+ int a2;
+ if((a&SITE_MASK)>=offsets[i]){
+ a2=a-offsets[i];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[i], 0);
+ a2=toNumber(st2, ch);
+ }
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
+
+ Quad t=triples[i];
+ assert(t!=null) : "Should be using tripleStorage";
+ assert(i==t.column);
+ t.row=start;
+ t.site=a2;
+ t.list=sites;
+ values[i]=a2;
+
+ heap.add(t);
+ }
+
+ if(ssl==null){ssl=new ArrayList(8);}
+
+ int currentTopScore=-999999999;
+
+ int cutoff=minScore;
+
+ int maxHits=0;
+ int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP;
+
+// System.out.println("\nEntering SS loop:");
+// System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
+// "cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
+// System.out.println();
+
+ SiteScore prevSS=null;
+ while(!heap.isEmpty()){
+ Quad t=heap.peek();
+ final int site=t.site;
+ final int centerIndex=t.column;
+
+ int maxNearbySite=site;
+ int approxHits=0;
+
+ {//Inner loop
+ final int minsite=site-MAX_INDEL, maxsite=site+MAX_INDEL2;
+ for(int column=0, chances=numHits-approxHitsCutoff; column=0; column++){
+ final int x=values[column];
+ assert(x==triples[column].site);
+ if(x>=minsite && x<=maxsite){
+ maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
+ approxHits++;
+ }else{chances--;}
+ }
+ }
+ hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
+
+ assert(centerIndex>=0) : centerIndex;
+ assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
+ if(approxHits>=approxHitsCutoff){
+
+ int score;
+
+ int mapStart=site, mapStop=maxNearbySite;
+
+ if(USE_EXTENDED_SCORE){
+ final int chrom=numberToChrom(site, baseChrom);
+ score=extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray, numHits, approxHits);
+ if(true/*USE_AFFINE_SCORE*/){
+ //Correct begin and end positions if they changed.
+ int min=Integer.MAX_VALUE;
+ int max=Integer.MIN_VALUE;
+ for(int i=0; i-1){
+ if(xmax){max=x;}
+ }
+ }
+
+// assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
+ if(min<0 || max<0){
+ //Note: This error can trigger if minChrom and maxChrom do not align to block boundaries
+ if(!Shared.anomaly){
+ Shared.anomaly=true;
+ System.err.println("Anomaly in "+getClass().getName()+".slowWalk: "+
+ chrom+", "+mapStart+", "+mapStop+", "+centerIndex+", "+
+ Arrays.toString(locArray)+"\n"+
+ Arrays.toString(values)+"\n"+
+ new String(bases)+"\nstrand="+strand+"\n");
+ System.err.println();
+ }
+ score=-99999;
+ }
+
+
+ mapStart=toNumber(min, chrom);
+ mapStop=toNumber(max, chrom);
+
+
+
+// System.err.println("site="+site+", maxNearbySite="+maxNearbySite+", min="+min+", max="+max+
+// ", mapStart="+mapStart+", mapStop="+mapStop);
+
+// if(chrom==17 && absdif(min, 30354420)<2000){
+// System.err.println("\n*****\n");
+// System.err.println("site="+site+" ("+numberToSite(site)+"), maxNearbySite="+maxNearbySite+
+// " ("+numberToSite(maxNearbySite)+"), min="+min+", max="+max+
+// ", mapStart="+mapStart+", mapStop="+mapStop);
+// System.err.println();
+// System.err.println(Arrays.toString(locArray));
+// System.err.println();
+// System.err.println("chrom="+chrom);
+// System.err.println("score="+score);
+// }
+ }
+ }else{
+ score=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits);
+ if(ADD_SCORE_Z){
+ int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
+ score+=scoreZ;
+ }
+ }
+
+ if(score>=cutoff){
+
+ if(score>currentTopScore){
+// System.err.println("New top score!");
+
+ if(DYNAMICALLY_TRIM_LOW_SCORES){
+
+ maxHits=Tools.max(approxHits, maxHits);
+// approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits);
+ approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits-1); //More sensitive, but slower
+
+ cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
+
+ // cutoff=Tools.max(cutoff, minScore+(int)((score-minScore)*DYNAMIC_SCORE_THRESH));
+ if(USE_EXTENDED_SCORE && score>=maxScore){
+ cutoff=Tools.max(cutoff, (int)(score*0.95f));
+ }
+ }
+
+ currentTopScore=score;
+
+// System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")");
+ }
+
+ final int chrom=numberToChrom(mapStart, baseChrom);
+ final int site2=numberToSite(mapStart);
+ final int site3=numberToSite(mapStop)+bases.length-1;
+
+ assert(NUM_CHROM_BITS==0 || site2=offsets[col]){
+ a2=a-offsets[col];
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[col], 0);
+ a2=toNumber(st2, ch);
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+
+ t2.site=a2;
+ values[col]=a2;
+ heap.add(t2);
+ }
+ if(heap.isEmpty()){break;}
+ }
+
+ }
+
+ return ssl;
+ }
+
+ /** This uses a heap to track next column to increment */
+ private final ArrayList slowWalk3(int[] starts, int[] stops, final byte[] bases,
+ final byte[] baseScores, int[] keyScores, int[] offsets,
+ final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList ssl,
+ int[] bestScores, final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
+ assert(USE_EXTENDED_SCORE);
+
+ final int numKeys=offsets.length; //Before shrink
+
+ //This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
+ final int maxQuickScore=maxQuickScore(offsets, keyScores);
+
+ if(SHRINK_BEFORE_WALK){
+ int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
+ if(r!=null){
+ starts=r[0];
+ stops=r[1];
+ offsets=r[2];
+ keyScores=r[4];
+ }
+ }
+
+ final int numHits=offsets.length; //After shrink
+
+
+ assert(numHits==offsets.length);
+ assert(numHits==keyScores.length);
+
+ usedKeys+=numHits;
+ usedKeyIterations++;
+
+ final boolean filter_by_qscore=(FILTER_BY_QSCORE && numKeys>=5);
+
+ assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
+
+
+// final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
+ final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
+ final int minQuickScore=(int)(MIN_QSCORE_MULT*maxQuickScore);
+
+ final int baseChrom=baseChrom(baseChrom_);
+
+ heap.clear();
+
+ final Quad[] triples=tripleStorage;
+
+ final int[] values=valueArray;
+ final int[] sizes=sizeArray;
+ final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
+ final Block b=index[baseChrom];
+
+ if(ssl==null){ssl=new ArrayList(8);}
+
+ int currentTopScore=bestScores[0];
+ int cutoff=Tools.max(minScore, (int)(currentTopScore*DYNAMIC_SCORE_THRESH));
+
+ int qcutoff=Tools.max(bestScores[2], minQuickScore);
+ int bestqscore=bestScores[3];
+ int maxHits=bestScores[1];
+ int perfectsFound=bestScores[5];
+ assert((currentTopScore>=maxScore) == (perfectsFound>0)) : currentTopScore+", "+maxScore+", "+perfectsFound+", "+maxHits+", "+numHits+", "+new String(bases);
+ int approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, currentTopScore>=maxScore);
+ if(approxHitsCutoff>numHits){return ssl;}
+
+ final boolean shortCircuit=(allBasesCovered && numKeys==numHits && filter_by_qscore);
+
+
+ assert(USE_EXTENDED_SCORE);
+
+ if(currentTopScore>=maxScore){
+ assert(currentTopScore==maxScore);
+ qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
+ }
+
+
+ for(int i=0; i0);
+
+ int a=sites[start];
+ int a2;
+ if((a&SITE_MASK)>=offsets[i]){
+ a2=a-offsets[i];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[i], 0);
+ a2=toNumber(st2, ch);
+ }
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
+
+ Quad t=triples[i];
+ assert(t!=null) : "Should be using tripleStorage";
+ assert(i==t.column);
+ t.row=start;
+ t.site=a2;
+ t.list=sites;
+ values[i]=a2;
+
+ heap.add(t);
+ }
+
+// System.out.println("\nEntering SS loop:");
+// System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
+// "cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
+// System.out.println("maxQuickScore="+maxQuickScore+"\tminQuickScore="+minQuickScore+"\tqcutoff="+qcutoff);
+
+
+ SiteScore prevSS=null;
+ while(!heap.isEmpty()){
+ Quad t=heap.peek();
+ final int site=t.site;
+ final int centerIndex=t.column;
+
+ int maxNearbySite=site;
+
+
+ int approxHits=0;
+
+ {//Inner loop
+ final int minsite=site-MAX_INDEL, maxsite=site+MAX_INDEL2;
+ for(int column=0, chances=numHits-approxHitsCutoff; column=0; column++){
+ final int x=values[column];
+ assert(x==triples[column].site);
+ if(x>=minsite && x<=maxsite){
+ maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
+ approxHits++;
+ }else{chances--;}
+ }
+ }
+ hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
+
+ assert(centerIndex>=0) : centerIndex;
+ assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
+ if(approxHits>=approxHitsCutoff){
+
+ int score;
+ int qscore=(filter_by_qscore ? quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits) : qcutoff);
+ if(ADD_SCORE_Z){
+ int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
+ qscore+=scoreZ;
+ }
+
+ int mapStart=site, mapStop=maxNearbySite;
+
+ assert(USE_EXTENDED_SCORE);
+
+ boolean locArrayValid=false;
+ if(qscore-1){
+ if(xmax){max=x;}
+ }
+ }
+
+ if(score>=maxScore){
+ assert(min==max && min>-1) : "\n"+score+", "+maxScore+", "+min+", "+max+
+ ", "+(max-min)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
+ }
+
+ // assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
+ if(min<0 || max<0){
+ if(!Shared.anomaly){
+ Shared.anomaly=true;
+ System.err.println("\nAnomaly in "+getClass().getName()+".slowWalk:\n"+
+ "chrom="+chrom+", mapStart="+mapStart+", mapStop="+mapStop+", centerIndex="+centerIndex+", strand="+strand+"\n"+
+ "score="+score+", maxScore="+maxScore+", qscore="+qscore+", filter_by_qscore="+filter_by_qscore+"\n"+
+ "numHits="+approxHits+", approxHits="+approxHits+"\n"+
+ "min="+min+", max="+max+", (max-min)="+(max-min)+"\n"+
+ "bases.length="+bases.length+"\n"+Arrays.toString(locArray)+"\n"+
+ "locArray:\t"+Arrays.toString(locArray)+"\n"+
+ "values:\t"+Arrays.toString(values)+"\n"+
+ "bases:\t"+new String(bases));
+ System.err.println();
+ assert(false);
+ }
+ score=-99999;
+ }
+
+ //mapStart and mapStop are indices
+ mapStart=toNumber(min, chrom);
+ mapStop=toNumber(max, chrom);
+
+ if(score>=maxScore){
+ assert(mapStop-mapStart==0) : "\n"+score+", "+maxScore+", "+min+", "+max+
+ ", "+(max-min)+", "+(mapStop-mapStart)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
+ }
+ }
+
+ if(score==maxScore){
+ qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
+ approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, true);
+ }
+
+ if(score>=cutoff){
+ qcutoff=Tools.max(qcutoff, (int)(qscore*DYNAMIC_QSCORE_THRESH));
+ bestqscore=Tools.max(qscore, bestqscore);
+ }
+ }
+
+ if(score>=cutoff){
+
+ if(score>currentTopScore){
+// System.err.println("New top score!");
+
+ if(DYNAMICALLY_TRIM_LOW_SCORES){
+
+ maxHits=Tools.max(approxHits, maxHits);
+ approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, approxHitsCutoff, currentTopScore>=maxScore);
+
+ cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
+ if(score>=maxScore){
+ assert(USE_EXTENDED_SCORE);
+ cutoff=Tools.max(cutoff, (int)(score*0.95f));
+ }
+ }
+
+ currentTopScore=score;
+
+// System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")");
+ }
+
+ final int chrom=numberToChrom(mapStart, baseChrom);
+ final int site2=numberToSite(mapStart);
+ final int site3=numberToSite(mapStop)+bases.length-1;
+
+ assert(site2!=site3) : site2+", "+site3+", "+mapStart+", "+mapStop;
+
+ assert(NUM_CHROM_BITS==0 || site2 "+site2);
+// System.err.println(mapStop+" -> "+site3);
+
+ assert(gapArray[0]>=site2 && gapArray[0]-site2 "+site2+"\n"+
+ mapStop+" -> "+site3+"\n\n"+
+ Arrays.toString(gapArray)+"\n\n"+
+// Arrays.toString(clone)+"\n\n"+
+ Arrays.toString(locArray)+"\n"+
+ "numHits="+numHits+", "+
+ "heap.size="+heap.size()+", "+
+ "numHits="+numHits+", "+
+ "approxHits="+approxHits+"\n";
+ gapArray[0]=Tools.min(gapArray[0], site2);
+ gapArray[gapArray.length-1]=Tools.max(gapArray[gapArray.length-1], site3);
+ }
+ if(verbose){System.err.println("@ site "+site2+", made gap array: "+Arrays.toString(gapArray));}
+// assert(false) : Arrays.toString(locArray);
+ }
+
+
+ //This block is optional, but tries to eliminate multiple identical alignments
+
+ SiteScore ss=null;
+ final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined;
+ final boolean inbounds=(site2>=0 && site3=offsets[col]){
+ a2=a-offsets[col];
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[col], 0);
+ a2=toNumber(st2, ch);
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+
+ t2.site=a2;
+ values[col]=a2;
+ heap.add(t2);
+ }else if(heap.size() camelWalk3(int[] starts, int[] stops, final byte[] bases,
+ final byte[] baseScores, int[] keyScores, int[] offsets,
+ final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList ssl,
+ int[] bestScores, final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
+ assert(USE_EXTENDED_SCORE);
+
+ final int numKeys=offsets.length; //Before shrink
+
+ //This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
+ final int maxQuickScore=maxQuickScore(offsets, keyScores);
+
+ if(SHRINK_BEFORE_WALK){
+ int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
+ if(r!=null){
+ starts=r[0];
+ stops=r[1];
+ offsets=r[2];
+ keyScores=r[4];
+ }
+ }
+
+ final int numHits=offsets.length; //After shrink
+
+
+ assert(numHits==offsets.length);
+ assert(numHits==keyScores.length);
+
+ usedKeys+=numHits;
+ usedKeyIterations++;
+
+ final boolean filter_by_qscore=(FILTER_BY_QSCORE && numKeys>=5);
+
+ assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
+
+
+// final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
+ final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
+ final int minQuickScore=(int)(MIN_QSCORE_MULT*maxQuickScore);
+
+ final int baseChrom=baseChrom(baseChrom_);
+
+ heap.clear();
+ active.clear();
+
+ final Quad[] triples=tripleStorage;
+
+ final int[] values=valueArray;
+ final int[] sizes=sizeArray;
+ final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
+ final Block b=index[baseChrom];
+
+ if(ssl==null){ssl=new ArrayList(8);}
+
+ int currentTopScore=bestScores[0];
+ int cutoff=Tools.max(minScore, (int)(currentTopScore*DYNAMIC_SCORE_THRESH));
+
+ int qcutoff=Tools.max(bestScores[2], minQuickScore);
+ int bestqscore=bestScores[3];
+ int maxHits=bestScores[1];
+ int perfectsFound=bestScores[5];
+ assert((currentTopScore>=maxScore) == (perfectsFound>0)) : currentTopScore+", "+maxScore+", "+perfectsFound+", "+maxHits+", "+numHits;
+ int approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, currentTopScore>=maxScore);
+ if(approxHitsCutoff>numHits){return ssl;}
+
+ final boolean shortCircuit=(allBasesCovered && numKeys==numHits && filter_by_qscore);
+
+ if(currentTopScore>=maxScore){
+ assert(currentTopScore==maxScore);
+ qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
+ }
+
+
+ for(int i=0; i0);
+
+ int a=sites[start];
+ int a2;
+ if((a&SITE_MASK)>=offsets[i]){
+ a2=a-offsets[i];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[i], 0);
+ a2=toNumber(st2, ch);
+ }
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
+
+ Quad t=triples[i];
+ assert(t!=null) : "Should be using tripleStorage";
+ assert(i==t.column);
+ t.row=start;
+ t.site=a2;
+ t.list=sites;
+ values[i]=a2;
+
+ heap.add(t);
+ }
+
+ assert(numHits>0);
+ assert(heap.size()==numHits);
+
+ /* Tracks largest element allowed in 'active' */
+
+// System.err.println("\nEntering SS loop:");
+// System.err.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
+// "cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
+// System.err.println("maxQuickScore="+maxQuickScore+"\tminQuickScore="+minQuickScore+"\tqcutoff="+qcutoff);
+
+// int iter=0;
+ SiteScore prevSS=null;
+ int maxNearbySite=0;
+ int site=0;
+ int horizon=0;
+ assert(active.isEmpty());
+ while(!heap.isEmpty() || !active.isEmpty()){
+// iter++;
+
+ do{
+ while(!active.isEmpty() && active.peek().site==site){ //Remove all identical elements, and add subsequent elements
+ final Quad t2=active.poll();
+ final int row=t2.row+1, col=t2.column;
+
+ //This is called the "increment" operation. Very messy and slow due to rare cases at beginning of a chrom.
+ if(row=offsets[col]){
+ a2=a-offsets[col];
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[col], 0);
+ a2=toNumber(st2, ch);
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+
+ t2.site=a2;
+ values[col]=a2;
+ if(a2<=horizon){
+ active.add(t2);
+ maxNearbySite=Tools.max(t2.site, maxNearbySite);
+ }else{heap.add(t2);}
+ }else if((heap.size()+active.size())=0; column++){
+// final int x=values[column];
+// assert(x==triples[column].site);
+// if(x>=minsite && x<=maxsite){
+// maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
+// approxHits++;
+// }else{chances--;}
+//// if(verbose){
+//// System.err.println("column="+column+", numHits="+numHits+", approxHits="+approxHits+
+//// ", approxHitsCutoff="+approxHitsCutoff+", chances="+chances);
+//// }
+// }
+// //Invalid assertion due to loop early exit
+//// assert(approxHits>0) : "\niter="+iter+", maxHits="+maxHits+", numHits="+numHits+", approxHitsCutoff="+approxHitsCutoff+
+//// "\nheap.size()="+heap.size()+", minsite="+minsite+", maxsite="+maxsite+", values[center]="+values[centerIndex]+", t="+t;
+// }
+// assert(approxHits<=active.size()) : "approxHits="+approxHits+", active.size()="+active.size()+", maxNearbySite="+maxNearbySite+"\nvalues="+Arrays.toString(values);
+
+ hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
+
+ assert(centerIndex>=0) : centerIndex;
+ assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
+ if(approxHits>=approxHitsCutoff){
+
+// if(verbose){System.err.println("A");}
+
+ int score;
+ int qscore=(filter_by_qscore ? quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits) : qcutoff);
+ if(ADD_SCORE_Z){
+ int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
+ qscore+=scoreZ;
+ }
+
+ int mapStart=site, mapStop=maxNearbySite;
+ assert(mapStart<=mapStop);
+
+ assert(USE_EXTENDED_SCORE);
+
+ boolean locArrayValid=false;
+ if(qscore-1){
+ if(xmax){max=x;}
+ }
+ }
+
+ if(score>=maxScore){
+ assert(min==max && min>-1) : "\n"+score+", "+maxScore+", "+min+", "+max+
+ ", "+(max-min)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
+ }
+
+ // assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
+ if(min<0 || max<0){
+ if(!Shared.anomaly){
+ Shared.anomaly=true;
+ System.err.println("\nAnomaly in "+getClass().getName()+".slowWalk:\n"+
+ "chrom="+chrom+", mapStart="+mapStart+", mapStop="+mapStop+", centerIndex="+centerIndex+", strand="+strand+"\n"+
+ "score="+score+", maxScore="+maxScore+", qscore="+qscore+", filter_by_qscore="+filter_by_qscore+"\n"+
+ "numHits="+approxHits+", approxHits="+approxHits+"\n"+
+ "min="+min+", max="+max+", (max-min)="+(max-min)+"\n"+
+ "bases.length="+bases.length+"\n"+Arrays.toString(locArray)+"\n"+
+ "locArray:\t"+Arrays.toString(locArray)+"\n"+
+ "values:\t"+Arrays.toString(values)+"\n"+
+ "bases:\t"+new String(bases));
+ System.err.println();
+ assert(false);
+ }
+ score=-99999;
+ }
+
+ //mapStart and mapStop are indices
+ mapStart=toNumber(min, chrom);
+ mapStop=toNumber(max, chrom);
+ assert(mapStart<=mapStop);
+
+ if(score>=maxScore){
+ assert(mapStop-mapStart==0) : "\n"+score+", "+maxScore+", "+min+", "+max+
+ ", "+(max-min)+", "+(mapStop-mapStart)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
+ }
+ }
+// if(verbose){System.err.println("F");}
+
+ if(score==maxScore){
+ qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
+ approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, true);
+ }
+
+ if(score>=cutoff){
+ qcutoff=Tools.max(qcutoff, (int)(qscore*DYNAMIC_QSCORE_THRESH));
+ bestqscore=Tools.max(qscore, bestqscore);
+ }
+ }
+// if(verbose){System.err.println("G");}
+
+ if(score>=cutoff){
+// if(verbose){System.err.println("H");}
+
+ if(score>currentTopScore){
+// if(verbose){System.err.println("I");}
+// System.err.println("New top score!");
+
+ if(DYNAMICALLY_TRIM_LOW_SCORES){
+
+ maxHits=Tools.max(approxHits, maxHits);
+ approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, approxHitsCutoff, currentTopScore>=maxScore);
+
+ cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
+ if(score>=maxScore){
+ assert(USE_EXTENDED_SCORE);
+ cutoff=Tools.max(cutoff, (int)(score*0.95f));
+ }
+ }
+
+ currentTopScore=score;
+
+// System.err.println("New top score: "+currentTopScore+" \t("+cutoff+")");
+ }
+// if(verbose){System.err.println("J");}
+
+ final int chrom=numberToChrom(mapStart, baseChrom);
+ final int site2=numberToSite(mapStart);
+ final int site3=numberToSite(mapStop)+bases.length-1;
+
+ assert(NUM_CHROM_BITS==0 || site2 "+site2);
+// System.err.println(mapStop+" -> "+site3);
+
+ assert(gapArray[0]>=site2 && gapArray[0]-site2 "+site2+"\n"+
+ mapStop+" -> "+site3+"\n\n"+
+ Arrays.toString(gapArray)+"\n\n"+
+// Arrays.toString(clone)+"\n\n"+
+ Arrays.toString(locArray)+"\n"+
+ "numHits="+numHits+", "+
+ "heap.size="+heap.size()+", "+
+ "numHits="+numHits+", "+
+ "approxHits="+approxHits+"\n";
+ gapArray[0]=Tools.min(gapArray[0], site2);
+ gapArray[gapArray.length-1]=Tools.max(gapArray[gapArray.length-1], site3);
+ }
+ if(verbose){System.err.println("@ site "+site2+", made gap array: "+Arrays.toString(gapArray));}
+// assert(false) : Arrays.toString(locArray);
+ }
+
+
+ //This block is optional, but tries to eliminate multiple identical alignments
+
+ SiteScore ss=null;
+ final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined;
+ final boolean inbounds=(site2>=0 && site3=offsets[col]){
+// a2=a-offsets[col];
+//
+// assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+// "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+// ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+// }else{
+// int ch=numberToChrom(a, baseChrom);
+// int st=numberToSite(a);
+// int st2=Tools.max(st-offsets[col], 0);
+// a2=toNumber(st2, ch);
+//
+// assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+// "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+// ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+// }
+//
+// assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+// "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+// ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+//
+// t2.site=a2;
+// values[col]=a2;
+// if()
+// heap.add(t2);
+// }else if((heap.size()+active.size())=prevMaxHits);
+
+ final int baseChrom=baseChrom(baseChrom_);
+ final Block b=index[baseChrom];
+ final int[] sizes=sizeArray;
+
+ heap.clear();
+ for(int i=0; i0);
+
+ int a=sites[start];
+ int a2;
+ if((a&SITE_MASK)>=offsets[i]){
+ a2=a-offsets[i];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[i], 0);
+ a2=toNumber(st2, ch);
+ }
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
+
+ Quad t=triples[i];
+ assert(t!=null) : "Should be using tripleStorage";
+ assert(i==t.column);
+ t.row=start;
+ t.site=a2;
+ t.list=sites;
+ values[i]=a2;
+
+ heap.add(t);
+ }
+
+ final int maxQuickScore=maxQuickScore(offsets, keyScores);
+
+ int topQscore=-999999999;
+
+ int maxHits=0;
+// int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP;
+
+
+ int approxHitsCutoff;
+ final int indelCutoff;
+ if(perfectOnly){
+ approxHitsCutoff=numHits;
+ indelCutoff=0;
+ }else{
+ approxHitsCutoff=Tools.max(prevMaxHits, Tools.min(MIN_APPROX_HITS_TO_KEEP, numHits-1)); //Faster, same accuracy
+ indelCutoff=MAX_INDEL2;
+ }
+
+
+ while(!heap.isEmpty()){
+ Quad t=heap.peek();
+ final int site=t.site;
+ final int centerIndex=t.column;
+
+ int maxNearbySite=site;
+
+
+ int approxHits=0;
+ {//Inner loop
+ final int minsite=site-Tools.min(MAX_INDEL, indelCutoff), maxsite=site+MAX_INDEL2;
+ for(int column=0, chances=numHits-approxHitsCutoff; column=0; column++){
+ final int x=values[column];
+ assert(x==triples[column].site);
+ if(x>=minsite && x<=maxsite){
+ maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
+ approxHits++;
+ }else{chances--;}
+ }
+ }
+ hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
+
+ assert(centerIndex>=0) : centerIndex;
+ assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
+ if(approxHits>=approxHitsCutoff){
+
+ int qscore=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits);
+
+ if(ADD_SCORE_Z){
+ int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
+ qscore+=scoreZ;
+ }
+
+ if(qscore>topQscore){
+
+// maxHits=Tools.max(approxHits, maxHits);
+// approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits); //Best setting for pre-scan
+
+ maxHits=Tools.max(approxHits, maxHits);
+ approxHitsCutoff=Tools.max(approxHitsCutoff, approxHits-1); //Best setting for pre-scan
+
+ topQscore=qscore;
+
+ if(qscore>=maxQuickScore){
+ assert(qscore==maxQuickScore);
+ assert(approxHits==numHits);
+ if(earlyExit){
+ return new int[] {topQscore, maxHits};
+ }
+ }
+ }
+ }
+
+ while(heap.peek().site==site){ //Remove all identical elements, and add subsequent elements
+ final Quad t2=heap.poll();
+ final int row=t2.row+1, col=t2.column;
+ if(row=offsets[col]){
+ a2=a-offsets[col];
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[col], 0);
+ a2=toNumber(st2, ch);
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+
+ t2.site=a2;
+ values[col]=a2;
+ heap.add(t2);
+ }else if(earlyExit && (perfectOnly || heap.size()b ? a-b : b-a;
+ }
+
+
+ final int maxScore(int[] offsets, byte[] baseScores, int[] keyScores, int readlen, boolean useQuality){
+
+ if(useQuality){
+ //These lines apparently MUST be used if quality is used later on for slow align.
+ if(USE_AFFINE_SCORE){return msa.maxQuality(baseScores);}
+ if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5)+Tools.sum(baseScores);}
+ }else{
+ if(USE_AFFINE_SCORE){return msa.maxQuality(readlen);}
+ if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5);}
+ }
+
+ return maxQuickScore(offsets, keyScores);
+ }
+
+
+ public final int maxQuickScore(int[] offsets, int[] keyScores){
+
+// int x=offsets.length*BASE_KEY_HIT_SCORE;
+ int x=Tools.intSum(keyScores);
+ int y=Y_SCORE_MULT*(offsets[offsets.length-1]-offsets[0]);
+// if(ADD_LIST_SIZE_BONUS){x+=(LIST_SIZE_BONUS[1]*offsets.length);}
+// assert(!ADD_SCORE_Z) : "Need to make sure this is correct...";
+
+// if(ADD_SCORE_Z){x+=((offsets[offsets.length-1]+CHUNKSIZE)*Z_SCORE_MULT);}
+ if(ADD_SCORE_Z){x+=maxScoreZ(offsets);}
+
+ return x+y;
+// int bonus=(2*(HIT_SCORE/2)); //For matching both ends
+// return x+y+bonus;
+ }
+
+
+ private final int quickScore(final int[] locs, final int[] keyScores, final int centerIndex, final int offsets[],
+ int[] sizes, final boolean penalizeIndels, final int numApproxHits, final int numHits){
+
+ hist_hits_score[Tools.min(HIT_HIST_LEN, numApproxHits)]++;
+ if(numApproxHits==1){return keyScores[centerIndex];}
+
+ //Done!
+ //Correct way to calculate score:
+ //Find the first chunk that exactly hits the center.
+ //Then, align leftward of it, and align rightward of it, and sum the scores.
+
+ //"-centerIndex" is a disambiguating term that, given otherwise identical match patterns
+ //(for example, a small indel will generate two valid site candidates), choose the lower site.
+
+ int x=keyScores[centerIndex]+scoreLeft(locs, keyScores, centerIndex, sizes, penalizeIndels)+
+ scoreRight(locs, keyScores, centerIndex, sizes, penalizeIndels, numHits)-centerIndex;
+
+ int y=Y_SCORE_MULT*scoreY(locs, centerIndex, offsets);
+ if(ADD_LIST_SIZE_BONUS){x+=calcListSizeBonus(sizes[centerIndex]);}
+// int z=scoreZ(locs, hits);
+ return x+y;
+ }
+
+
+// /** Generates a term that increases score with how many bases in the read match the ref. */
+// private static final int scoreZ(int[] locs, int centerIndex, int offsets[]){
+// final int center=locs[centerIndex];
+//
+// final int[] refLoc=new int[offsets[offsets.length-1]+CHUNKSIZE];
+//
+// final int maxLoc=center+MAX_INDEL2;
+// final int minLoc=Tools.max(0, center-MAX_INDEL);
+//
+// int score=0;
+//
+// for(int i=0; i=minLoc && loc<=maxLoc){
+//// assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+
+//// "\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets);
+//
+// int offset=offsets[i];
+// int max=CHUNKSIZE+offset;
+//
+// for(int j=offset; jloc){
+// refLoc[j]=loc;
+// score-=2;
+// }else if(old==loc){
+// score-=1;
+// //do nothing, perhaps, or add 1?
+// }else{
+// score-=2;
+// assert(old=minVal && value<=maxVal){
+ final int refbase=numberToSite(value);
+// if(verbose){System.err.println("refbase="+refbase);}
+ assert(refbase>=minLoc && refbase<=maxLoc);
+
+ // System.err.println("Reverse: Trying key "+refbase+" @ "+offsets[i]);
+ // System.err.println("Passed!");
+ keynum++;
+ final int callbase=offsets[i];
+
+ int misses=0;
+ for(int cloc=callbase+KEYLEN-1, rloc=refbase+cloc; cloc>=0 && rloc>=0 && rloc=minVal && value<=maxVal){
+ final int refbase=numberToSite(value);
+// if(verbose){System.err.println("refbase="+refbase);}
+ assert(refbase>=minLoc && refbase<=maxLoc);
+ final int callbase=offsets[i];
+
+ int misses=0;
+ for(int cloc=callbase+KEYLEN, rloc=refbase+cloc; cloc0 && old>=0){break;} //Already filled with something that has no errors
+ byte c=bases[cloc];
+ byte r=ref[rloc];
+
+ if(c==r){
+ if(old<0 || refbase==centerLoc){ //If the cell is empty or this key corresponds to center
+ locArray[cloc]=refbase;
+ }
+ }else{
+ misses++;
+ if(old>=0){break;} //Already filled with something that has no errors
+ }
+ }
+ }
+ }
+
+ //Try to subsume out-of-order locs where higher numbers come before lower numbers. Made things worse.
+// for(int i=1; i-1){
+// final byte c=bases[i];
+//// final int rloc1=loc+i;
+// final int rloc2=last+i;
+// byte r2=ref[rloc2];
+// if(r2==c){
+// locArray[i]=last;
+// }
+// }
+// }
+
+// //Change 'N' to -2. A bit slow.
+// {
+// int firstMatch=0;
+// while(firstMatch=0; i--){
+// final byte c=bases[i];
+// if(c=='N'){locArray[i]=-2;}
+// else{
+// assert(locArray[i]==-1);
+// final int rloc=last+i;
+// byte r=ref[rloc];
+// if(r=='N'){locArray[i]=-2;}
+// }
+// }
+// for(int i=firstMatch; i0){
+// final int rloc=last+i;
+// byte r=ref[rloc];
+// if(r=='N'){locArray[i]=-2;}
+// }
+// }
+// }
+
+ //Change 'N' to -2, but only for nocalls, not norefs. Much faster.
+ {
+ final byte nb=(byte)'N';
+ for(int i=0; i0){
+// if(last<1){last=loc;}
+// }else{
+// if(last>0){
+// final byte c=bases[i];
+// final int rloc2=last+i;
+// byte r2=ref[rloc2];
+// if(c=='N'){
+//
+// }else if(c==r2){
+// locArray[i]=last;
+// }
+// }
+// last=-1;
+// }
+// }
+// }
+// {
+// int last=locArray[locArray.length-1];
+// for(int i=locArray.length-2; i>=0; i--){
+// final int loc=locArray[i];
+// if(loc>0){
+// if(last<1){last=loc;}
+// }else{
+// if(last>0){
+// final byte c=bases[i];
+// final int rloc2=last+i;
+// byte r2=ref[rloc2];
+// if(c=='N'){
+//
+// }else if(c==r2){
+// locArray[i]=last;
+// }
+// }
+// last=-1;
+// }
+// }
+// }
+
+// for(int i=locArray.length-2; i>=0; i--){
+// final int loc=locArray[i];
+// final int last=locArray[i+1];
+// if(loc>last && last>-1){
+// final byte c=bases[i];
+//// final int rloc1=loc+i;
+// final int rloc2=last+i;
+// byte r2=ref[rloc2];
+// if(r2==c){
+// locArray[i]=last;
+// }
+// }
+// }
+
+ if(verbose){
+// System.err.println("locArray:\t"+Arrays.toString(locArray));
+
+ int centerOffset=offsets[centerIndex];
+ int lim=centerOffset+KEYLEN;
+ for(int i=centerOffset; i=0){
+ score+=BASE_HIT_SCORE+baseScores[i];
+ if(loc==centerLoc){score+=centerBonus;}
+ if(loc!=lastLoc && lastLoc>=0){
+ int dif=absdif(loc, lastLoc);
+ int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*dif, MAX_PENALTY_FOR_MISALIGNED_HIT);
+ score-=penalty;
+ }
+ lastLoc=loc;
+ }
+ }
+
+// System.err.println("Extended score: "+score);
+// System.err.println(Arrays.toString(locArray));
+
+
+ return score;
+ }
+
+
+ /** NOTE! This destroys the locArray, so use a copy if needed. */
+ private static final int[] makeGapArray(int[] locArray, int minLoc, int minGap){
+ int gaps=0;
+ boolean doSort=false;
+
+ if(locArray[0]<0){locArray[0]=minLoc;}
+ for(int i=1; i=0);
+ if(dif>minGap){
+ gaps++;
+ }
+ }
+ if(gaps<1){return null;}
+ int[] out=new int[2+gaps*2];
+ out[0]=locArray[0];
+ out[out.length-1]=locArray[locArray.length-1];
+
+ for(int i=1, j=1; i=0);
+ if(dif>minGap){
+ out[j]=locArray[i-1];
+ out[j+1]=locArray[i];
+ j+=2;
+ }
+ }
+ return out;
+ }
+
+
+ /** Generates a term that increases score with how many bases in the read match the ref. */
+ private final int scoreZ2(int[] locs, int centerIndex, int offsets[], int numApproxHits, int numHits){
+
+ if(numApproxHits==1){return SCOREZ_1KEY;}
+
+ final int center=locs[centerIndex];
+
+ final int maxLoc=center+MAX_INDEL2;
+ final int minLoc=Tools.max(0, center-MAX_INDEL);
+
+ int score=0;
+
+ int a0=-1, b0=-1;
+
+ for(int i=0; i=minLoc && loc<=maxLoc){
+// assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+
+// "\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets);
+ int a=offsets[i];
+
+ if(b0=minLoc && loc<=maxLoc){
+ int pos=offsets[i];
+// if(true){
+// System.err.println("\ni="+i+", pos="+pos+", array=["+array.length+"], limit="+(pos+CHUNKSIZE-1));
+// }
+ for(int j=pos; j=0){
+ prev=loc;
+ loc=locs[i];
+
+ int offset=absdif(loc, prev);
+
+ if(offset<=MAX_INDEL){
+ score+=keyScores[i];
+ if(ADD_LIST_SIZE_BONUS){score+=calcListSizeBonus(sizes[i]);}
+
+// if(i==locs.length-1){score+=HIT_SCORE/2;} //Adds a bonus for matching the first or last key
+
+ if(penalizeIndels && offset!=0){
+ int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*offset, MAX_PENALTY_FOR_MISALIGNED_HIT);
+ score-=penalty;
+// score-=(INDEL_PENALTY+Tools.min(INDEL_PENALTY_MULT*offset, 1+HIT_SCORE/4));
+ }
+ }else{
+ loc=prev;
+ }
+ }
+
+ }
+ return score;
+
+ }
+
+ private final int scoreLeft(int[] locs, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels){
+
+ callsToScore++;
+
+ int score=0;
+
+ int prev, loc=locs[centerIndex];
+
+ for(int i=centerIndex-1; i>=0; i--){
+
+ if(locs[i]>=0){
+ prev=loc;
+ loc=locs[i];
+
+ int offset=absdif(loc, prev);
+
+ if(offset<=MAX_INDEL){
+ score+=keyScores[i];
+ if(ADD_LIST_SIZE_BONUS){score+=calcListSizeBonus(sizes[i]);}
+
+// if(i==0){score+=HIT_SCORE/2;} //Adds a bonus for matching the first or last key
+ if(penalizeIndels && offset!=0){
+ int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*offset, MAX_PENALTY_FOR_MISALIGNED_HIT);
+ score-=penalty;
+ }
+ }else{
+ loc=prev;
+ }
+ }
+
+ }
+ return score;
+
+ }
+
+ /** Encode a (location, chrom) pair to an index */
+ private static final int toNumber(int site, int chrom){
+ int out=(chrom&CHROM_MASK_LOW);
+ out=out<=0 && f<1);
+ FRACTION_GENOME_TO_EXCLUDE=f;
+ MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810
+ MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840
+ MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910
+ MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935
+ MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860
+ }
+
+
+ /** Default .75. Range: 0 to 1 (but 0 will break everything). Lower is faster and less accurate. */
+ static final float HIT_FRACTION_TO_RETAIN=0.85f; //default: .8
+ /** Range: 0 to 1000. Lower should be faster and less accurate. */
+ static int MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810
+ /** Range: 2 to infinity. Lower should be faster and less accurate. */
+ static final int MIN_HIT_LISTS_TO_RETAIN=6;
+
+ static int MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840
+ //lower is faster
+ static int MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910
+ //lower is faster
+ static int MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935
+ //lower is faster
+ static int MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860
+
+ /** To increase accuracy on small genomes, override greedy list dismissal when the list is at most this long. */
+ public static final int SMALL_GENOME_LIST=20;
+
+ static{assert(!(TRIM_BY_GREEDY && TRIM_BY_TOTAL_SITE_COUNT)) : "Pick one.";}
+
+ static final int CLUMPY_MAX_DIST=5; //Keys repeating over intervals of this or less are clumpy.
+
+ /** Minimum length of list before clumpiness is considered. This is an index in the length histogram, from 0 to 1000. */
+ static final int CLUMPY_MIN_LENGTH_INDEX=2000;
+ static final float CLUMPY_FRACTION=0.75f; //0 to 1; higher is slower but more stringent. 0.5 means the median distance is clumpy.
+
+ static final int MAX_SUBSUMPTION_LENGTH=MAX_INDEL2;
+
+ /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION1 when slowWalk3 is first entered */
+ public static final int MAX_HITS_REDUCTION1=0;
+
+ /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION2 dynamically when best score is exceeded */
+ public static int MAX_HITS_REDUCTION2=2; //default 1; higher is more accurate (more mapping and less FP) but slower
+
+ /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION_PERFECT when perfect score is found */
+ public static final int MAX_HITS_REDUCTION_PERFECT=0;
+
+ public static int MAXIMUM_MAX_HITS_REDUCTION=3;
+ public static int HIT_REDUCTION_DIV=5;
+
+ private static final int calcApproxHitsCutoff(final int keys, final int hits, int currentCutoff, final boolean perfect){ //***$
+ assert(keys>=hits) : keys+", "+hits;
+ assert(hits>=0);
+
+ int mahtk=MIN_APPROX_HITS_TO_KEEP;
+ if(SEMIPERFECTMODE || PERFECTMODE){
+ if(keys==1){return 1;}
+ else if(MIN_APPROX_HITS_TO_KEEP=0);
+ int r=hits-reduction;
+
+ r=Tools.max(mahtk, currentCutoff, r);
+
+ if(perfect){
+ r=Tools.max(r, keys-MAX_HITS_REDUCTION_PERFECT);
+ }
+ return r;
+ }
+
+ public static final boolean USE_SLOWALK3=true && USE_EXTENDED_SCORE;
+ public static boolean PRESCAN_QSCORE=true && USE_EXTENDED_SCORE; //Decrease quality and increase speed
+ public static final boolean FILTER_BY_QSCORE=true; //Slightly lower quality, but very fast.
+ public static final float MIN_SCORE_MULT=(USE_AFFINE_SCORE ? 0.15f : USE_EXTENDED_SCORE ? .3f : 0.10f); //Fraction of max score to use as cutoff. Default 0.15, max is 1; lower is more accurate
+ public static final float MIN_QSCORE_MULT=0.025f; //Fraction of max score to use as cutoff. Default 0.15, max is 1; lower is more accurate
+ public static final float MIN_QSCORE_MULT2=0.1f;
+ static final float DYNAMIC_SCORE_THRESH=(USE_AFFINE_SCORE ? 0.84f : USE_EXTENDED_SCORE ? .84f : 0.6f); //Default .85f; lower is more accurate
+ static final float DYNAMIC_QSCORE_THRESH=0.6f; //default .58f
+ static final float DYNAMIC_QSCORE_THRESH_PERFECT=0.8f; //***$
+ static final float PRESCAN_QSCORE_THRESH=DYNAMIC_QSCORE_THRESH*.95f; //default 1.0f; lower is more accurate and 0 essentially sets PRESCAN_QSCORE=false
+ static{
+ assert(MIN_SCORE_MULT>=0 && MIN_SCORE_MULT<1);
+ assert(DYNAMIC_SCORE_THRESH>=0 && DYNAMIC_SCORE_THRESH<1);
+ }
+
+
+}
diff --git a/current/align2/BBIndex5.java b/current/align2/BBIndex5.java
new file mode 100755
index 0000000..eb861e7
--- /dev/null
+++ b/current/align2/BBIndex5.java
@@ -0,0 +1,2647 @@
+package align2;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import stream.Read;
+import stream.SiteScore;
+
+import dna.AminoAcid;
+import dna.Data;
+import dna.Gene;
+
+
+/**
+ * Based on Index11f
+ * Index stored in single array per block.
+ * Supports 32-bit unsigned index.
+ *
+ * @author Brian Bushnell
+ * @date Jan 3, 2013
+ *
+ */
+public final class BBIndex5 extends AbstractIndex {
+
+
+ public static void main(String[] args){
+
+ int k=13;
+
+ for(int i=0; iData.numChroms){maxChrom=Data.numChroms;}
+ assert(minChrom<=maxChrom);
+ Data.sysout.println("Loading index for chrom "+minChrom+"-"+maxChrom+", genome "+Data.GENOME_BUILD);
+ index=IndexMaker5.makeIndex(Data.GENOME_BUILD, minChrom, maxChrom,
+ k, NUM_CHROM_BITS, MAX_ALLOWED_CHROM_INDEX, CHROM_MASK_LOW, CHROM_MASK_HIGH, SITE_MASK, SHIFT_LENGTH, COLORSPACE, writeToDisk, diskInvalid, index);
+
+ }
+
+ /** Calculate statistics of index, such as list lengths, and find clumpy keys */
+ public static final synchronized void analyzeIndex(int minChrom, int maxChrom, boolean cs, float fractionToExclude, int k){
+
+ assert(lengthHistogram==null);
+ assert(COUNTS==null);
+
+ int KEYSPACE=1<<(2*k);
+ COUNTS=new int[KEYSPACE];
+
+ maxChrom=maxChrom(maxChrom);
+
+ for(int key=0; keyCLUMPY_MIN_LENGTH_INDEX && clumps>(CLUMPY_FRACTION*len)){
+ COUNTS[key]=0;
+ COUNTS[rkey]=0;
+ for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+ Block b=index[chrom];
+ final int[] sites=b.sites;
+ sites[b.starts[key]]=-1;
+ sites[b.starts[rkey]]=-1;
+ }
+ }
+
+// System.err.println("COUNTS["+key+"] = "+COUNTS[key]+", COUNTS["+rkey+"] = "+COUNTS[rkey]);
+ }
+ }
+
+ lengthHistogram=Tools.makeLengthHistogram3(COUNTS, 1000, verbose2);
+
+ if(verbose2){System.err.println("lengthHistogram: "+Arrays.toString(lengthHistogram));}
+
+ if(REMOVE_FREQUENT_GENOME_FRACTION){
+
+ int lengthLimitIndex=(int)((1-fractionToExclude)*(lengthHistogram.length-1));
+ int lengthLimitIndex2=(int)((1-fractionToExclude*DOUBLE_SEARCH_THRESH_MULT)*(lengthHistogram.length-1));
+
+ MAX_USABLE_LENGTH=Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex]);
+ MAX_USABLE_LENGTH2=Tools.max(6*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex2]);
+
+ if(verbose2){System.err.println("MAX_USABLE_LENGTH: "+MAX_USABLE_LENGTH+"\nMAX_USABLE_LENGTH2: "+MAX_USABLE_LENGTH2);}
+ }
+
+ Solver.POINTS_PER_SITE=(int)Math.floor((Solver.BASE_POINTS_PER_SITE*4000f)/Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH]));
+ if(Solver.POINTS_PER_SITE==0){Solver.POINTS_PER_SITE=-1;}
+ if(verbose2){System.err.println("POINTS_PER_SITE: "+Solver.POINTS_PER_SITE);}
+ assert(Solver.POINTS_PER_SITE<0) : Solver.POINTS_PER_SITE;
+ }
+
+
+ /** Returns the filename for the block holding this chrom */
+ public static final String fname(int chrom, int k){
+ return IndexMaker5.fname(minChrom(chrom), maxChrom(chrom), k, NUM_CHROM_BITS, COLORSPACE);
+ }
+
+ /** Ensure key offsets are strictly ascending. */
+ private static boolean checkOffsets(int[] offsets){
+ for(int i=1; i0){
+ if(x=shortest);
+ if(initialHitCountlimit3){
+ for(int i=0; ilimit || sum/finalHitCount>limit2; i--){
+ Pointer p=ptrs[i];
+ sum-=hits[p.key].length;
+ hits[p.key]=null;
+ finalHitCount--;
+ }
+
+ return finalHitCount;
+ }
+
+ /** Remove least useful keys to accelerate search */
+ private final int trimExcessHitListsByGreedy(int[] offsets, int[] keyScores, int maxHitLists, int[] keys){
+
+ float[] keyWeights=getKeyWeightArray(keyScores.length);
+ for(int i=0; ilimitS){hits[i]=null;}
+// }
+
+ final int[] lengths=getGenericArray(keys.length);
+
+ for(int i=0; i0){
+ if(x=shortest);
+ if(initialHitCountlimit3){
+ for(int i=0; i=MIN_APPROX_HITS_TO_KEEP && (sum>limit || sum/initialHitCount>limit2 || hitsCount>maxHitLists/* || worstValue<0*/)){
+ final int[] lists=getGreedyListArray(hitsCount);
+ for(int i=0, j=0; j0){
+ lists[j]=i;
+ j++;
+ }
+ }
+
+ Solver.findWorstGreedy(offsets, lengths, keyWeights, KEYLEN, lists, greedyReturn);
+ int worstIndex=greedyReturn[0];
+ int worst=lists[worstIndex];
+ worstValue=greedyReturn[1];
+ sum-=lengths[worst];
+
+// if(worstValue>0 && (hitsCount<=maxHitLists || lengths[worst]0 || lengths[worst]=0){
+ final int len=count(key);
+ if(len>0 && len0){
+ starts[i]=b.starts[key];
+ stops[i]=starts[i]+len2;
+ numHits++;
+ }
+ }
+ }
+ }
+ return numHits;
+ }
+
+
+ private final int countHits(final int[] keys, final int maxLen, boolean clearBadKeys){
+ int numHits=0;
+ for(int i=0; i=0){
+ final int len=count(key);
+ if(len>0 && len findAdvanced(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsets, long id){
+ assert(minChrom<=maxChrom && minChrom>=0);
+ ArrayList result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, true, id);
+ if(DOUBLE_SEARCH_NO_HIT && (result==null || result.isEmpty())){result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, false, id);}
+
+ return result;
+ }
+
+
+ public final ArrayList find(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsetsP, boolean obeyLimits, long id){
+
+ assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
+ final int[] keysOriginal=KeyRing.makeKeys(basesP, offsetsP, KEYLEN, COLORSPACE);
+ int[] keysP=Arrays.copyOf(keysOriginal, keysOriginal.length);
+
+ initialKeys+=offsetsP.length;
+ initialKeyIterations++;
+
+ final int maxLen=(obeyLimits ? MAX_USABLE_LENGTH : MAX_USABLE_LENGTH2);
+
+ int numHits=0;
+ numHits=countHits(keysP, maxLen, true);
+ if(numHits>0){ //TODO: Change these to higher numbers
+ int trigger=(3*keysP.length)/4;
+ if(numHits<4 && numHitsMIN_APPROX_HITS_TO_KEEP){
+ int cutoffIndex=((int) (HIT_FRACTION_TO_RETAIN*(keysP.length)-0.01f))+(keysP.length-numHits);
+
+ int zeroes=keysP.length-numHits;
+ int altMinIndex=(zeroes+(MIN_HIT_LISTS_TO_RETAIN-1));
+ cutoffIndex=Tools.max(cutoffIndex, altMinIndex);
+
+ assert(cutoffIndex>0) : cutoffIndex+"\n"+numHits;
+
+ if(cutoffIndex<(keysP.length-1)){
+ int[] lens=getGenericArray(keysP.length);
+ for(int i=0; icutoff){
+ keysP[i]=-1;
+ removed++;
+ numHits--;
+ }
+ }
+ }
+ }
+// assert(checkOffsets(offsets)) : Arrays.toString(offsets);
+
+ final ArrayList result=new ArrayList(8);
+ if(numHits=5);
+
+ int[][] prescanResults=null;
+ int[] precounts=null;
+ int[] prescores=null;
+
+ int hitsCutoff=0;
+ int qscoreCutoff=(int)(MIN_QSCORE_MULT*maxQuickScore);
+
+ boolean allBasesCovered=true;
+ {
+ if(offsetsP[0]!=0){allBasesCovered=false;}
+ else if(offsetsP[offsetsP.length-1]!=(basesP.length-KEYLEN)){allBasesCovered=false;}
+ else{
+ for(int i=1; ioffsetsP[i-1]+KEYLEN){
+ allBasesCovered=false;
+ break;
+ }
+ }
+ }
+ }
+
+ //TODO I don't understand this logic
+ final boolean pretendAllBasesAreCovered=(allBasesCovered ||
+ keysP.length>=keysOriginal.length-4 ||
+ (keysP.length>=9 && (offsetsP[offsetsP.length-1]-offsetsP[0]+KEYLEN)>Tools.max(40, (int)(basesP.length*.75f))));
+
+// System.err.println(allBasesCovered+"\t"+Arrays.toString(offsetsP));
+// assert(allBasesCovered);
+
+ if(prescan_qscore){
+ prescanResults=prescanAllBlocks(bestScores,
+ keysP, keyScoresP, offsetsP,
+ keysM, keyScoresM, offsetsM,
+ pretendAllBasesAreCovered);
+
+ if(prescanResults!=null){
+ precounts=prescanResults[0];
+ prescores=prescanResults[1];
+ }
+
+ if(bestScores[1]=maxQuickScore && pretendAllBasesAreCovered){
+ assert(bestScores[3]==maxQuickScore);
+ assert(bestScores[1]==numHits);
+
+ hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, true);
+ qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*DYNAMIC_QSCORE_THRESH_PERFECT));
+ }else{
+ hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, false);
+ qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*PRESCAN_QSCORE_THRESH));
+ }
+ }
+
+ final int maxScore=maxScore(offsetsP, baseScoresP, keyScoresP, basesP.length, true);
+ final boolean fullyDefined=AminoAcid.isFullyDefined(basesP);
+ assert(bestScores[2]<=0) : Arrays.toString(bestScores);
+
+ int cycle=0;
+ for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+ if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){
+ find(keysP, basesP, baseScoresP, keyScoresP, chrom, Gene.PLUS,
+ offsetsP, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined);
+ }
+ if(QUIT_AFTER_TWO_PERFECTS){
+ if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human
+ }
+ cycle++;
+ if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){
+ find(keysM, basesM, baseScoresM, keyScoresM, chrom, Gene.MINUS,
+ offsetsM, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined);
+ }
+ if(QUIT_AFTER_TWO_PERFECTS){
+ if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human
+ }
+ cycle++;
+ }
+
+ assert(Read.CHECKSITES(result, basesP, basesM, id)); //TODO: Comment out once checked
+
+ return result;
+ }
+
+ /** Search blocks rapidly to find max hits, and perfect sites. May indicate some blocks can be skipped. */
+ private final int[][] prescanAllBlocks(int[] bestScores,
+ int[] keysP, int[] keyScoresP, int[] offsetsP,
+ int[] keysM, int[] keyScoresM, int[] offsetsM,
+ final boolean allBasesCovered){
+
+ int[][][] pm=new int[][][] {{keysP, keyScoresP, offsetsP}, {keysM, keyScoresM, offsetsM}};
+
+ int bestqscore=0;
+ int maxHits=0;
+ int minHitsToScore=MIN_APPROX_HITS_TO_KEEP;
+
+ final int maxQuickScore=maxQuickScore(offsetsP, keyScoresP);
+
+ final int[] counts=precountArray;
+ final int[] scores=prescoreArray;
+ final int[][] ret=prescanReturn;
+ Arrays.fill(counts, keysP.length);
+ Arrays.fill(scores, maxQuickScore);
+ ret[0]=counts;
+ ret[1]=scores;
+
+ int cycle=0;
+ for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+ final int baseChrom=baseChrom(chrom);
+ for(int pmi=0; pmi<2; pmi++, cycle++){
+
+ int[] keys=pm[pmi][0];
+ int[] keyScores=pm[pmi][1];
+ int[] offsets=pm[pmi][2];
+// int[][] hits=getHitArray(offsets.length);
+
+ int[] starts=startArray;
+ int[] stops=stopArray;
+ final int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops);
+
+ if(numHits=maxQuickScore && allBasesCovered);
+
+ scores[cycle]=temp[0];
+ counts[cycle]=temp[1];
+
+ bestqscore=Tools.max(temp[0], bestqscore);
+ maxHits=Tools.max(maxHits, temp[1]);
+ if(bestqscore>=maxQuickScore && allBasesCovered){
+ assert(bestqscore==maxQuickScore);
+ assert(maxHits==keysP.length) :
+ "\nTemp: \t"+Arrays.toString(temp)+", cycle="+cycle+"\n" +
+ "Scores: \t"+Arrays.toString(scores)+
+ "Counts: \t"+Arrays.toString(counts)+
+ "bestqscore: \t"+bestqscore+
+ "maxHits: \t"+maxHits+
+ "maxQuickScore: \t"+maxQuickScore+
+ "numHits: \t"+numHits+
+ "minHitsToScore: \t"+minHitsToScore+
+ "keys.length: \t"+keys.length;
+
+ minHitsToScore=Tools.max(minHitsToScore, maxHits);
+
+ {
+ //This early exit is optional. Does not seem to impact speed much either way.
+ bestScores[1]=Tools.max(bestScores[1], maxHits);
+ bestScores[3]=Tools.max(bestScores[3], bestqscore);
+ return ret;
+ }
+ }
+ }
+ }
+ }
+
+ bestScores[1]=Tools.max(bestScores[1], maxHits);
+ bestScores[3]=Tools.max(bestScores[3], bestqscore);
+
+ if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
+
+ return ret;
+ }
+
+
+ /** Search a single block and strand */
+ public final ArrayList find(int[] keys, final byte[] bases, final byte[] baseScores, int[] keyScores,
+ final int chrom, final byte strand,
+ int[] offsets, final boolean obeyLimits, ArrayList ssl, int[] bestScores,
+ final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
+
+ assert(checkOffsets(offsets)) : Arrays.toString(offsets);
+
+ int[] starts=startArray;
+ int[] stops=stopArray;
+
+ int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops);
+ if(numHits=0){numHits++;}
+ }
+
+ if(numHits==offsets.length){
+ return null;
+ }else{
+ int[][] r=shrinkReturn3;
+ int[] starts2=startArray;
+ int[] stops2=stopArray;
+ int[] offsets2=getOffsetArray(numHits);
+ int[] keyScores2=new int[numHits];
+
+ for(int i=0, j=0; i=0){
+ starts2[j]=starts[i];
+ stops2[j]=stops[i];
+ offsets2[j]=offsets[i];
+ keyScores2[j]=keyScores[i];
+ j++;
+ }
+ }
+ r[0]=starts2;
+ r[1]=stops2;
+ r[2]=offsets2;
+ r[4]=keyScores2;
+ return r;
+ }
+ }
+
+ /** Removes "-1" keys. */
+ private final int[][] shrink2(int[] offsets, int[] keys, int[] keyScores){
+
+
+ int numHits=0;
+ for(int i=0; i=0){numHits++;}
+ }
+
+
+ assert(checkOffsets(offsets)) : Arrays.toString(offsets);
+ if(numHits==keys.length){
+ return null;
+ }else{
+ int[][] r=shrinkReturn2;
+ int[] offsets2=getOffsetArray(numHits);
+ assert(offsets2!=offsets);
+ assert(offsets2.length=0){
+ offsets2[j]=offsets[i];
+ keys2[j]=keys[i];
+ keyScores2[j]=keyScores[i];
+ j++;
+ }
+ }
+ assert(checkOffsets(offsets2)) : "\nnumHits="+numHits+"\n"+Arrays.toString(offsets)+" -> \n"+Arrays.toString(offsets2)+"\n"+
+ "\n"+Arrays.toString(keys)+" -> \n"+Arrays.toString(keys2)+"\n";
+ r[0]=offsets2;
+ r[1]=keys2;
+ r[2]=keyScores2;
+ return r;
+ }
+ }
+
+
+ /** This uses a heap to track next column to increment */
+ private final ArrayList slowWalk2(int[] starts, int[] stops, final byte[] bases,
+ final byte[] baseScores, int[] keyScores, int[] offsets,
+ final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList ssl, final boolean fullyDefined){
+
+ if(SHRINK_BEFORE_WALK){
+ int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
+ if(r!=null){
+ starts=r[0];
+ stops=r[1];
+ offsets=r[2];
+ keyScores=r[4];
+ }
+ }
+
+ final int numHits=offsets.length; //After shrink
+
+
+ assert(numHits==offsets.length);
+ assert(numHits==keyScores.length);
+
+//// System.out.println("After SHRINK_BEFORE_WALK: numHits = "+hits.length);
+// Block b=index[baseChrom_];
+// int[][] hits=b.getHitLists(starts, stops);
+// if(SHRINK_BEFORE_WALK){
+// Object[] r=shrink(hits, offsets, keyScores);
+// if(r!=null){
+// hits=(int[][])r[0];
+// offsets=(int[])r[1];
+// keyScores=(int[])r[3];
+// }
+// }
+//
+// final int numHits=hits.length; //After shrink
+
+
+ assert(numHits==offsets.length);
+ assert(numHits==keyScores.length);
+
+ assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
+
+ //This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
+ final int maxScore=maxScore(offsets, baseScores, keyScores, bases.length, true);
+// final int maxQuickScore=(!USE_EXTENDED_SCORE ? maxScore : maxQuickScore(offsets));
+// System.err.println("maxScore = "+maxScore);
+
+// final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
+ final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
+// final int minQuickScore=(!USE_EXTENDED_SCORE ? minScore : (int)(maxQuickScore*0.15f));
+// final int minScore=(int)(MIN_SCORE_MULT*maxScore);
+// System.err.println("minScore = "+minScore);
+
+ final int baseChrom=baseChrom(baseChrom_);
+
+
+// final PriorityQueue heap=new PriorityQueue(numHits);
+ heap.clear();
+// final Quad64[] triples=new Quad64[numHits];
+ final Quad64[] triples=tripleStorage;
+
+ final Block b=index[baseChrom];
+ final int[] values=valueArray;
+ final int[] sizes=sizeArray;
+ final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
+
+ for(int i=0; i0);
+
+ int a=sites[start];
+ int a2;
+ if((a&SITE_MASK)>=offsets[i]){
+ a2=a-offsets[i];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[i], 0);
+ a2=toNumber(st2, ch);
+ }
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
+
+ Quad64 t=triples[i];
+ assert(t!=null) : "Should be using tripleStorage";
+ assert(i==t.column);
+ t.row=start;
+ t.site=((long)a2)&0xFFFFFFFFL;
+ t.list=sites;
+ values[i]=a2;
+
+ heap.add(t);
+ }
+
+ if(ssl==null){ssl=new ArrayList(8);}
+
+ int currentTopScore=-999999999;
+
+ int cutoff=minScore;
+
+ int maxHits=0;
+ int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP;
+
+// System.out.println("\nEntering SS loop:");
+// System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
+// "cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
+// System.out.println();
+
+ SiteScore prevSS=null;
+ while(!heap.isEmpty()){
+ Quad64 t=heap.peek();
+ final int site=(int)t.site; //*** TODO
+ final int centerIndex=t.column;
+
+ int maxNearbySite=site;
+
+
+ int approxHits=0;
+
+ {//Inner loop
+ final int minsite=subUnsigned(site, MAX_INDEL);
+ final int maxsite=addUnsigned(site, MAX_INDEL2);
+ for(int column=0, chances=numHits-approxHitsCutoff; column=0; column++){
+ final int x=values[column];
+ assert(x==(int)triples[column].site);
+ if(x>=minsite && x<=maxsite){
+ maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
+ approxHits++;
+ }else{chances--;}
+ }
+ }
+
+ assert(centerIndex>=0) : centerIndex;
+ assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
+ if(approxHits>=approxHitsCutoff){
+
+ int score;
+
+ int mapStart=site, mapStop=maxNearbySite;
+
+ if(USE_EXTENDED_SCORE){
+ final int chrom=numberToChrom(site, baseChrom);
+ score=extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray, numHits, approxHits);
+ if(true/*USE_AFFINE_SCORE*/){
+ //Correct begin and end positions if they changed.
+ int min=Integer.MAX_VALUE;
+ int max=Integer.MIN_VALUE;
+ for(int i=0; i-1){
+ if(xmax){max=x;}
+ }
+ }
+
+// assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
+ if(min<0 || max<0){
+ //Note: This error can trigger if minChrom and maxChrom do not align to block boundaries
+ if(!Shared.anomaly){
+ Shared.anomaly=true;
+ System.err.println("Anomaly in "+getClass().getName()+".slowWalk: "+
+ chrom+", "+mapStart+", "+mapStop+", "+centerIndex+", "+
+ Arrays.toString(locArray)+"\n"+
+ Arrays.toString(values)+"\n"+
+ new String(bases)+"\nstrand="+strand+"\n");
+ System.err.println();
+ }
+ score=-99999;
+ }
+
+
+ mapStart=toNumber(min, chrom);
+ mapStop=toNumber(max, chrom);
+
+
+
+// System.err.println("site="+site+", maxNearbySite="+maxNearbySite+", min="+min+", max="+max+
+// ", mapStart="+mapStart+", mapStop="+mapStop);
+
+// if(chrom==17 && absdif(min, 30354420)<2000){
+// System.err.println("\n*****\n");
+// System.err.println("site="+site+" ("+numberToSite(site)+"), maxNearbySite="+maxNearbySite+
+// " ("+numberToSite(maxNearbySite)+"), min="+min+", max="+max+
+// ", mapStart="+mapStart+", mapStop="+mapStop);
+// System.err.println();
+// System.err.println(Arrays.toString(locArray));
+// System.err.println();
+// System.err.println("chrom="+chrom);
+// System.err.println("score="+score);
+// }
+ }
+ }else{
+ score=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits);
+ if(ADD_SCORE_Z){
+ int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
+ score+=scoreZ;
+ }
+ }
+
+
+// score=score(values, centerIndex, offsets, hits);
+// if(ADD_SCORE_Z){
+// int scoreZ=scoreZ2(values, centerIndex, offsets);
+// score+=scoreZ;
+// }
+//
+// if(USE_EXTENDED_SCORE){
+// if(score>minQuickScore){
+//// System.out.println(score+" > "+minQuickScore);
+// score=extendScore(read, offsets, values, numberToChrom(site, baseChrom), centerIndex, locArray);
+// }else{
+//// System.out.print(".");
+// score=-1;
+// }
+// }
+
+
+// System.err.println("maxScore = "+maxScore);
+// System.err.println("hits = "+approxHits+" / "+approxHitsCutoff);
+// System.err.println("score = "+score+" / "+cutoff);
+
+ if(score>=cutoff){
+
+// System.err.println("Passed!");
+
+// System.out.println("approxHits="+approxHits+" / "+approxHitsCutoff);
+// System.out.println("score="+score+" / "+cutoff);
+// System.out.println("strand="+Gene.strandCodes[strand]);
+// System.out.println("center="+values[centerIndex]);
+// System.out.println("values="+Arrays.toString(values));
+// extendScore(read, offsets, values, numberToChrom(site, baseChrom), centerIndex);
+// System.out.println();
+
+ if(score>currentTopScore){
+// System.err.println("New top score!");
+
+ if(DYNAMICALLY_TRIM_LOW_SCORES){
+
+ maxHits=Tools.max(approxHits, maxHits);
+// approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits);
+ approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits-1); //More sensitive, but slower
+
+ cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
+
+ // cutoff=Tools.max(cutoff, minScore+(int)((score-minScore)*DYNAMIC_SCORE_THRESH));
+ if(USE_EXTENDED_SCORE && score>=maxScore){
+ cutoff=Tools.max(cutoff, (int)(score*0.95f));
+ }
+ }
+
+ currentTopScore=score;
+
+// System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")");
+ }
+
+// final int chrom=numberToChrom(site, baseChrom);
+// final int site2=numberToSite(site);
+// final int site3=numberToSite(maxNearbySite)+read.length;
+
+ final int chrom=numberToChrom(mapStart, baseChrom);
+ final int site2=numberToSite(mapStart);
+ final int site3=numberToSite(mapStop)+bases.length-1;
+
+ assert(NUM_CHROM_BITS==0 || site2=offsets[col]){
+ a2=a-offsets[col];
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[col], 0);
+ a2=toNumber(st2, ch);
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+
+ t2.site=((long)a2)&0xFFFFFFFFL;
+ values[col]=a2;
+ heap.add(t2);
+ }
+ if(heap.isEmpty()){break;}
+ }
+
+ }
+
+ return ssl;
+ }
+
+ /** This uses a heap to track next column to increment */
+ private final ArrayList slowWalk3(int[] starts, int[] stops, final byte[] bases,
+ final byte[] baseScores, int[] keyScores, int[] offsets,
+ final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList ssl,
+ int[] bestScores, final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
+ assert(USE_EXTENDED_SCORE);
+
+ final int numKeys=offsets.length; //Before shrink
+
+ //This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
+ final int maxQuickScore=maxQuickScore(offsets, keyScores);
+
+ if(SHRINK_BEFORE_WALK){
+ int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
+ if(r!=null){
+ starts=r[0];
+ stops=r[1];
+ offsets=r[2];
+ keyScores=r[4];
+ }
+ }
+
+ final int numHits=offsets.length; //After shrink
+
+
+ assert(numHits==offsets.length);
+ assert(numHits==keyScores.length);
+
+ usedKeys+=numHits;
+ usedKeyIterations++;
+
+ final boolean filter_by_qscore=(FILTER_BY_QSCORE && numKeys>=5);
+
+ assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
+
+
+// final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
+ final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
+ final int minQuickScore=(int)(MIN_QSCORE_MULT*maxQuickScore);
+
+ final int baseChrom=baseChrom(baseChrom_);
+
+ heap.clear();
+
+ final Quad64[] triples=tripleStorage;
+
+ final int[] values=valueArray;
+ final int[] sizes=sizeArray;
+ final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
+ final Block b=index[baseChrom];
+
+ if(ssl==null){ssl=new ArrayList(8);}
+
+ int currentTopScore=bestScores[0];
+ int cutoff=Tools.max(minScore, (int)(currentTopScore*DYNAMIC_SCORE_THRESH));
+
+ int qcutoff=Tools.max(bestScores[2], minQuickScore);
+ int bestqscore=bestScores[3];
+ int maxHits=bestScores[1];
+ int perfectsFound=bestScores[5];
+ assert((currentTopScore>=maxScore) == (perfectsFound>0)) : currentTopScore+", "+maxScore+", "+perfectsFound+", "+maxHits+", "+numHits;
+ int approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, currentTopScore>=maxScore);
+ if(approxHitsCutoff>numHits){return ssl;}
+
+ final boolean shortCircuit=(allBasesCovered && numKeys==numHits && filter_by_qscore);
+
+
+ assert(USE_EXTENDED_SCORE);
+
+ if(currentTopScore>=maxScore){
+ assert(currentTopScore==maxScore);
+ qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
+ }
+
+
+ for(int i=0; i0);
+
+ int a=sites[start];
+ int a2;
+ if((a&SITE_MASK)>=offsets[i]){
+ a2=a-offsets[i];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[i], 0);
+ a2=toNumber(st2, ch);
+ }
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
+
+ Quad64 t=triples[i];
+ assert(t!=null) : "Should be using tripleStorage";
+ assert(i==t.column);
+ t.row=start;
+ t.site=((long)a2)&0xFFFFFFFFL;
+ t.list=sites;
+ values[i]=a2;
+
+ heap.add(t);
+ }
+
+// System.out.println("\nEntering SS loop:");
+// System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
+// "cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
+// System.out.println("maxQuickScore="+maxQuickScore+"\tminQuickScore="+minQuickScore+"\tqcutoff="+qcutoff);
+
+
+ SiteScore prevSS=null;
+ while(!heap.isEmpty()){
+ Quad64 t=heap.peek();
+ final int site=(int)t.site;
+ final int centerIndex=t.column;
+
+ int maxNearbySite=site;
+
+
+ int approxHits=0;
+
+ {//Inner loop
+ final int minsite=subUnsigned(site, MAX_INDEL);
+ final int maxsite=addUnsigned(site, MAX_INDEL2);
+ for(int column=0, chances=numHits-approxHitsCutoff; column=0; column++){
+ final int x=values[column];
+ assert(x==(int)triples[column].site);
+ if(x>=minsite && x<=maxsite){
+ maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
+ approxHits++;
+ }else{chances--;}
+ }
+ }
+
+ assert(centerIndex>=0) : centerIndex;
+ assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
+ if(approxHits>=approxHitsCutoff){
+
+ int score;
+ int qscore=(filter_by_qscore ? quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits) : qcutoff);
+ if(ADD_SCORE_Z){
+ int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
+ qscore+=scoreZ;
+ }
+
+ int mapStart=site, mapStop=maxNearbySite;
+
+ assert(USE_EXTENDED_SCORE);
+
+ boolean locArrayValid=false;
+ if(qscore-1){
+ if(xmax){max=x;}
+ }
+ }
+
+ if(score>=maxScore){
+ assert(min==max && min>-1) : "\n"+score+", "+maxScore+", "+min+", "+max+
+ ", "+(max-min)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
+ }
+
+ // assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
+ if(min<0 || max<0){
+ if(!Shared.anomaly){
+ Shared.anomaly=true;
+ System.err.println("Anomaly in "+getClass().getName()+".slowWalk: "+
+ chrom+", "+mapStart+", "+mapStop+", "+centerIndex+", "+
+ Arrays.toString(locArray)+"\n"+
+ Arrays.toString(values)+"\n"+
+ new String(bases)+"\nstrand="+strand+"\n");
+ System.err.println();
+ }
+ score=-99999;
+ }
+
+ //mapStart and mapStop are indices
+ mapStart=toNumber(min, chrom);
+ mapStop=toNumber(max, chrom);
+
+ if(score>=maxScore){
+ assert(mapStop-mapStart==0) : "\n"+score+", "+maxScore+", "+min+", "+max+
+ ", "+(max-min)+", "+(mapStop-mapStart)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
+ }
+ }
+
+ if(score==maxScore){
+ qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
+ approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, true);
+ }
+
+ if(score>=cutoff){
+ qcutoff=Tools.max(qcutoff, (int)(qscore*DYNAMIC_QSCORE_THRESH));
+ bestqscore=Tools.max(qscore, bestqscore);
+ }
+ }
+
+// System.err.println("maxScore = "+maxScore);
+// System.err.println("hits = "+approxHits+" / "+approxHitsCutoff);
+// System.err.println("score = "+score+" / "+cutoff);
+
+ if(score>=cutoff){
+
+// System.err.println("Passed!");
+
+// System.out.println("approxHits="+approxHits+" / "+approxHitsCutoff);
+// System.out.println("score="+score+" / "+cutoff);
+// System.out.println("strand="+Gene.strandCodes[strand]);
+// System.out.println("center="+values[centerIndex]);
+// System.out.println("values="+Arrays.toString(values));
+// extendScore(read, offsets, values, numberToChrom(site, baseChrom), centerIndex);
+// System.out.println();
+
+ if(score>currentTopScore){
+// System.err.println("New top score!");
+
+ if(DYNAMICALLY_TRIM_LOW_SCORES){
+
+ maxHits=Tools.max(approxHits, maxHits);
+ approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, approxHitsCutoff, currentTopScore>=maxScore);
+
+ cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
+ if(score>=maxScore){
+ assert(USE_EXTENDED_SCORE);
+ cutoff=Tools.max(cutoff, (int)(score*0.95f));
+ }
+ }
+
+ currentTopScore=score;
+
+// System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")");
+ }
+
+ final int chrom=numberToChrom(mapStart, baseChrom);
+ final int site2=numberToSite(mapStart);
+ final int site3=numberToSite(mapStop)+bases.length-1;
+
+ assert(NUM_CHROM_BITS==0 || site2 "+site2);
+// System.err.println(mapStop+" -> "+site3);
+
+ assert(gapArray[0]>=site2 && gapArray[0]-site2 "+site2+"\n"+
+ mapStop+" -> "+site3+"\n\n"+
+ Arrays.toString(gapArray)+"\n\n"+
+// Arrays.toString(clone)+"\n\n"+
+ Arrays.toString(locArray)+"\n"+
+ "numHits="+numHits+", "+
+ "heap.size="+heap.size()+", "+
+ "numHits="+numHits+", "+
+ "approxHits="+approxHits+"\n";
+ gapArray[0]=Tools.min(gapArray[0], site2);
+ gapArray[gapArray.length-1]=Tools.max(gapArray[gapArray.length-1], site3);
+ }
+ if(verbose){System.err.println("@ site "+site2+", made gap array: "+Arrays.toString(gapArray));}
+// assert(false) : Arrays.toString(locArray);
+ }
+
+
+ //This block is optional, but tries to eliminate multiple identical alignments
+
+ SiteScore ss=null;
+ final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined;
+ final boolean inbounds=(site2>=0 && site3=offsets[col]){
+ a2=a-offsets[col];
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[col], 0);
+ a2=toNumber(st2, ch);
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+
+ t2.site=((long)a2)&0xFFFFFFFFL;
+ values[col]=a2;
+ heap.add(t2);
+ }else if(heap.size()=prevMaxHits);
+
+ final int baseChrom=baseChrom(baseChrom_);
+ final Block b=index[baseChrom];
+ final int[] sizes=sizeArray;
+
+ heap.clear();
+ for(int i=0; i0);
+
+ int a=sites[start];
+ int a2;
+ if((a&SITE_MASK)>=offsets[i]){
+ a2=a-offsets[i];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[i], 0);
+ a2=toNumber(st2, ch);
+ }
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
+
+ Quad64 t=triples[i];
+ assert(t!=null) : "Should be using tripleStorage";
+ assert(i==t.column);
+ t.row=start;
+ t.site=((long)a2)&0xFFFFFFFFL;
+ t.list=sites;
+ values[i]=a2;
+
+ heap.add(t);
+ }
+
+ final int maxQuickScore=maxQuickScore(offsets, keyScores);
+
+ int topQscore=-999999999;
+
+ int maxHits=0;
+// int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP;
+
+
+ int approxHitsCutoff;
+ final int indelCutoff;
+ if(perfectOnly){
+ approxHitsCutoff=numHits;
+ indelCutoff=0;
+ }else{
+ approxHitsCutoff=Tools.max(prevMaxHits, Tools.min(MIN_APPROX_HITS_TO_KEEP, numHits-1)); //Faster, same accuracy
+ indelCutoff=MAX_INDEL2;
+ }
+
+
+ while(!heap.isEmpty()){
+ Quad64 t=heap.peek();
+ final int site=(int)t.site; //*** TODO
+ final int centerIndex=t.column;
+
+ int maxNearbySite=site;
+
+
+ int approxHits=0;
+
+ {//Inner loop
+ final int minsite=subUnsigned(site, MAX_INDEL);
+ final int maxsite=addUnsigned(site, MAX_INDEL2);
+ for(int column=0, chances=numHits-approxHitsCutoff; column=0; column++){
+ final int x=values[column];
+ assert(x==(int)triples[column].site);
+ if(x>=minsite && x<=maxsite){
+ maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
+ approxHits++;
+ }else{chances--;}
+ }
+ }
+
+ assert(centerIndex>=0) : centerIndex;
+ assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
+ if(approxHits>=approxHitsCutoff){
+
+ int qscore=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits);
+
+ if(ADD_SCORE_Z){
+ int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
+ qscore+=scoreZ;
+ }
+
+ if(qscore>topQscore){
+
+// maxHits=Tools.max(approxHits, maxHits);
+// approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits); //Best setting for pre-scan
+
+ maxHits=Tools.max(approxHits, maxHits);
+ approxHitsCutoff=Tools.max(approxHitsCutoff, approxHits-1); //Best setting for pre-scan
+
+ topQscore=qscore;
+
+ if(qscore>=maxQuickScore){
+ assert(qscore==maxQuickScore);
+ assert(approxHits==numHits);
+ if(earlyExit){
+ return new int[] {topQscore, maxHits};
+ }
+ }
+ }
+ }
+
+ while(site==(int)heap.peek().site){ //Remove all identical elements, and add subsequent elements
+ final Quad64 t2=heap.poll();
+ final int row=t2.row+1, col=t2.column;
+ if(row=offsets[col]){
+ a2=a-offsets[col];
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }else{
+ int ch=numberToChrom(a, baseChrom);
+ int st=numberToSite(a);
+ int st2=Tools.max(st-offsets[col], 0);
+ a2=toNumber(st2, ch);
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+ }
+
+ assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
+ "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
+ ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
+
+ t2.site=((long)a2)&0xFFFFFFFFL;
+ values[col]=a2;
+ heap.add(t2);
+ }else if(earlyExit && (perfectOnly || heap.size()b ? a-b : b-a;
+ return (a<0 == b<0) ? a>b ? a-b : b-a : Integer.MAX_VALUE;
+ }
+
+
+ final int maxScore(int[] offsets, byte[] baseScores, int[] keyScores, int readlen, boolean useQuality){
+
+ if(useQuality){
+ //These lines apparently MUST be used if quality is used later on for slow align.
+ if(USE_AFFINE_SCORE){return msa.maxQuality(baseScores);}
+ if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5)+Tools.sum(baseScores);}
+ }else{
+ if(USE_AFFINE_SCORE){return msa.maxQuality(readlen);}
+ if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5);}
+ }
+
+ return maxQuickScore(offsets, keyScores);
+ }
+
+
+ public final int maxQuickScore(int[] offsets, int[] keyScores){
+
+// int x=offsets.length*BASE_KEY_HIT_SCORE;
+ int x=Tools.intSum(keyScores);
+ int y=Y_SCORE_MULT*(offsets[offsets.length-1]-offsets[0]);
+// if(ADD_LIST_SIZE_BONUS){x+=(LIST_SIZE_BONUS[1]*offsets.length);}
+// assert(!ADD_SCORE_Z) : "Need to make sure this is correct...";
+
+// if(ADD_SCORE_Z){x+=((offsets[offsets.length-1]+CHUNKSIZE)*Z_SCORE_MULT);}
+ if(ADD_SCORE_Z){x+=maxScoreZ(offsets);}
+
+ return x+y;
+// int bonus=(2*(HIT_SCORE/2)); //For matching both ends
+// return x+y+bonus;
+ }
+
+
+ private final int quickScore(final int[] values, final int[] keyScores, final int centerIndex, final int offsets[],
+ int[] sizes, final boolean penalizeIndels, final int numApproxHits, final int numHits){
+
+ if(numApproxHits==1){return keyScores[centerIndex];}
+
+ //Done!
+ //Correct way to calculate score:
+ //Find the first chunk that exactly hits the center.
+ //Then, align leftward of it, and align rightward of it, and sum the scores.
+
+ //"-centerIndex" is a disambiguating term that, given otherwise identical match patterns
+ //(for example, a small indel will generate two valid site candidates), choose the lower site.
+
+ int x=keyScores[centerIndex]+scoreLeft(values, keyScores, centerIndex, sizes, penalizeIndels)+
+ scoreRight(values, keyScores, centerIndex, sizes, penalizeIndels, numHits)-centerIndex;
+
+ int y=Y_SCORE_MULT*scoreY(values, centerIndex, offsets);
+ if(ADD_LIST_SIZE_BONUS){x+=calcListSizeBonus(sizes[centerIndex]);}
+// int z=scoreZ(locs, hits);
+ return x+y;
+ }
+
+
+// /** Generates a term that increases score with how many bases in the read match the ref. */
+// private static final int scoreZ(int[] locs, int centerIndex, int offsets[]){
+// final int center=locs[centerIndex];
+//
+// final int[] refLoc=new int[offsets[offsets.length-1]+CHUNKSIZE];
+//
+// final int maxLoc=center+MAX_INDEL2;
+// final int minLoc=Tools.max(0, center-MAX_INDEL);
+//
+// int score=0;
+//
+// for(int i=0; i=minLoc && loc<=maxLoc){
+//// assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+
+//// "\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets);
+//
+// int offset=offsets[i];
+// int max=CHUNKSIZE+offset;
+//
+// for(int j=offset; jloc){
+// refLoc[j]=loc;
+// score-=2;
+// }else if(old==loc){
+// score-=1;
+// //do nothing, perhaps, or add 1?
+// }else{
+// score-=2;
+// assert(old=minVal && value<=maxVal){
+ final int refbase=numberToSite(value);
+ assert(refbase>=minLoc && refbase<=maxLoc);
+
+ // System.out.println("Reverse: Trying key "+refbase+" @ "+offsets[i]);
+ // System.out.println("Passed!");
+ keynum++;
+ final int callbase=offsets[i];
+
+ int misses=0;
+ for(int cloc=callbase+KEYLEN-1, rloc=refbase+cloc; cloc>=0 && rloc>=0 && rloc=0);
+ if(dif>minGap){
+ gaps++;
+ }
+ }
+ if(gaps<1){return null;}
+ int[] out=new int[2+gaps*2];
+ out[0]=locArray[0];
+ out[out.length-1]=locArray[locArray.length-1];
+
+ for(int i=1, j=1; i=0);
+ if(dif>minGap){
+ out[j]=locArray[i-1];
+ out[j+1]=locArray[i];
+ j+=2;
+ }
+ }
+ return out;
+ }
+
+
+ /** Generates a term that increases score with how many bases in the read match the ref. */
+ private final int scoreZ2(int[] values, int centerIndex, int offsets[], int numApproxHits, int numHits){
+
+ if(numApproxHits==1){return SCOREZ_1KEY;}
+
+ final int center=values[centerIndex];
+
+ final int maxLoc=center+MAX_INDEL2;
+ final int minLoc=Tools.max(0, center-MAX_INDEL);
+
+
+
+// final int minVal=subUnsigned(centerVal, MAX_INDEL);
+// final int maxVal=addUnsigned(centerVal, MAX_INDEL2);
+
+ int score=0;
+
+ int a0=-1, b0=-1;
+
+ for(int i=0; i=minLoc && loc<=maxLoc){
+// assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+
+// "\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets);
+ int a=offsets[i];
+
+ if(b0=minLoc && loc<=maxLoc){
+ int pos=offsets[i];
+// if(true){
+// System.err.println("\ni="+i+", pos="+pos+", array=["+array.length+"], limit="+(pos+CHUNKSIZE-1));
+// }
+ for(int j=pos; j=0; i--){
+
+ if(values[i]!=-1){
+ prev=loc;
+ loc=values[i];
+
+ int offset=absdif(loc, prev);
+
+ if(offset<=MAX_INDEL){
+ score+=keyScores[i];
+ if(ADD_LIST_SIZE_BONUS){score+=calcListSizeBonus(sizes[i]);}
+
+// if(i==0){score+=HIT_SCORE/2;} //Adds a bonus for matching the first or last key
+ if(penalizeIndels && offset!=0){
+ int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*offset, MAX_PENALTY_FOR_MISALIGNED_HIT);
+ score-=penalty;
+ }
+ }else{
+ loc=prev;
+ }
+ }
+
+ }
+ return score;
+
+ }
+
+ /** Encode a (location, chrom) pair to an index */
+ private static final int toNumber(int site, int chrom){
+ int out=(chrom&CHROM_MASK_LOW);
+ out=out<=0); //max is 3 for human; perhaps more for other organisms
+// assert((1<<(NUM_CHROM_BITS))>=CHROMSPERBLOCK) : (1<<(NUM_CHROM_BITS))+" < "+CHROMSPERBLOCK;
+ assert((1<<(NUM_CHROM_BITS))==CHROMS_PER_BLOCK) : (1<<(NUM_CHROM_BITS))+" < "+CHROMS_PER_BLOCK;
+ assert(Integer.bitCount(CHROMS_PER_BLOCK)==1);
+ assert(Integer.numberOfLeadingZeros(SITE_MASK)==(NUM_CHROM_BITS)) : Integer.toHexString(SITE_MASK);
+ }
+
+ private final int cycles;
+
+ public static final int BASE_HIT_SCORE=100;
+ public static final int ALIGN_COLUMNS=3000;
+ public static int MAX_INDEL=16000; //Max indel length, min 0, default 400; longer is more accurate
+ public static int MAX_INDEL2=2*MAX_INDEL;
+
+ private final float INV_BASE_KEY_HIT_SCORE;
+ private final int INDEL_PENALTY; //default (HIT_SCORE/2)-1
+ private final int INDEL_PENALTY_MULT; //default 20; penalty for indel length
+ private final int MAX_PENALTY_FOR_MISALIGNED_HIT;
+ private final int SCOREZ_1KEY;
+
+ public static final boolean GENERATE_KEY_SCORES_FROM_QUALITY=true; //True: Much faster and more accurate.
+ public static final boolean GENERATE_BASE_SCORES_FROM_QUALITY=true; //True: Faster, and at least as accurate.
+ public static final boolean ADD_SCORE_Z=true; //Increases quality, decreases speed
+ public static final int Z_SCORE_MULT=20;
+ public static final int Y_SCORE_MULT=10;
+
+
+ /**
+ * Return only sites that match completely or with partial no-reference
+ */
+ public static void setSemiperfectMode() {
+ assert(!PERFECTMODE);
+ SEMIPERFECTMODE=true;
+ PRESCAN_QSCORE=false;
+// MIN_APPROX_HITS_TO_KEEP++;
+
+
+
+ MAX_INDEL=0;
+ MAX_INDEL2=0;
+ }
+
+ /**
+ * Return only sites that match completely
+ */
+ public static void setPerfectMode() {
+ assert(!SEMIPERFECTMODE);
+ PERFECTMODE=true;
+ PRESCAN_QSCORE=false;
+// MIN_APPROX_HITS_TO_KEEP++;
+
+
+
+ MAX_INDEL=0;
+ MAX_INDEL2=0;
+ }
+
+ static float FRACTION_GENOME_TO_EXCLUDE=0.03f; //Default .03; lower is slower and more accurate. For perfect reads and small genomes, lower is FASTER.
+
+ public static final void setFractionToExclude(float f){
+ assert(f>=0 && f<1);
+ FRACTION_GENOME_TO_EXCLUDE=f;
+ MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810
+ MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840
+ MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910
+ MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935
+ MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860
+ }
+
+
+ /** Default .75. Range: 0 to 1 (but 0 will break everything). Lower is faster and less accurate. */
+ static final float HIT_FRACTION_TO_RETAIN=0.85f; //default: .8
+ /** Range: 0 to 1000. Lower should be faster and less accurate. */
+ static int MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810
+ /** Range: 2 to infinity. Lower should be faster and less accurate. */
+ static final int MIN_HIT_LISTS_TO_RETAIN=6;
+
+ static int MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840
+ //lower is faster
+ static int MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910
+ //lower is faster
+ static int MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935
+ //lower is faster
+ static int MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860
+
+ /** To increase accuracy on small genomes, override greedy list dismissal when the list is at most this long. */
+ public static final int SMALL_GENOME_LIST=20;
+
+ static{assert(!(TRIM_BY_GREEDY && TRIM_BY_TOTAL_SITE_COUNT)) : "Pick one.";}
+
+ static final int CLUMPY_MAX_DIST=5; //Keys repeating over intervals of this or less are clumpy.
+
+ /** Minimum length of list before clumpiness is considered. This is an index in the length histogram, from 0 to 1000. */
+ static final int CLUMPY_MIN_LENGTH_INDEX=2000;
+ static final float CLUMPY_FRACTION=0.75f; //0 to 1; higher is slower but more stringent. 0.5 means the median distance is clumpy.
+
+ static final int MAX_SUBSUMPTION_LENGTH=MAX_INDEL2;
+
+ /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION1 when slowWalk3 is first entered */
+ public static final int MAX_HITS_REDUCTION1=0;
+
+ /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION2 dynamically when best score is exceeded */
+ public static int MAX_HITS_REDUCTION2=2; //default 1; higher is more accurate (more mapping and less FP) but slower
+
+ /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION_PERFECT when perfect score is found */
+ public static final int MAX_HITS_REDUCTION_PERFECT=0;
+
+ public static int MAXIMUM_MAX_HITS_REDUCTION=3;
+ public static int HIT_REDUCTION_DIV=5;
+
+ private static final int calcApproxHitsCutoff(final int keys, final int hits, int currentCutoff, final boolean perfect){ //***$
+ assert(keys>=hits) : keys+", "+hits;
+ assert(hits>=0);
+
+ int mahtk=MIN_APPROX_HITS_TO_KEEP;
+ if(SEMIPERFECTMODE || PERFECTMODE){
+ if(keys==1){return 1;}
+ else if(MIN_APPROX_HITS_TO_KEEP=0);
+ int r=hits-reduction;
+
+ r=Tools.max(mahtk, currentCutoff, r);
+
+ if(perfect){
+ r=Tools.max(r, keys-MAX_HITS_REDUCTION_PERFECT);
+ }
+ return r;
+ }
+
+ public static final boolean USE_SLOWALK3=true && USE_EXTENDED_SCORE;
+ public static boolean PRESCAN_QSCORE=true && USE_EXTENDED_SCORE; //Decrease quality and increase speed
+ public static final boolean FILTER_BY_QSCORE=true; //Slightly lower quality, but very fast.
+ public static final float MIN_SCORE_MULT=(USE_AFFINE_SCORE ? 0.15f : USE_EXTENDED_SCORE ? .3f : 0.10f); //Fraction of max score to use as cutoff. Default 0.15, max is 1; lower is more accurate
+ public static final float MIN_QSCORE_MULT=0.025f; //Fraction of max score to use as cutoff. Default 0.15, max is 1; lower is more accurate
+ public static final float MIN_QSCORE_MULT2=0.1f;
+ static final float DYNAMIC_SCORE_THRESH=(USE_AFFINE_SCORE ? 0.84f : USE_EXTENDED_SCORE ? .84f : 0.6f); //Default .85f; lower is more accurate
+ static final float DYNAMIC_QSCORE_THRESH=0.6f; //default .58f
+ static final float DYNAMIC_QSCORE_THRESH_PERFECT=0.8f; //***$
+ static final float PRESCAN_QSCORE_THRESH=DYNAMIC_QSCORE_THRESH*.95f; //default 1.0f; lower is more accurate and 0 essentially sets PRESCAN_QSCORE=false
+ static{
+ assert(MIN_SCORE_MULT>=0 && MIN_SCORE_MULT<1);
+ assert(DYNAMIC_SCORE_THRESH>=0 && DYNAMIC_SCORE_THRESH<1);
+ }
+
+
+}
diff --git a/current/align2/BBIndexAcc.java b/current/align2/BBIndexAcc.java
new file mode 100755
index 0000000..00511cf
--- /dev/null
+++ b/current/align2/BBIndexAcc.java
@@ -0,0 +1,2809 @@
+package align2;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import stream.Read;
+import stream.SiteScore;
+
+import dna.AminoAcid;
+import dna.Data;
+import dna.Gene;
+
+
+/**
+ * Based on Index11a
+ *
+ *
+ *
+ *
+ * @author Brian Bushnell
+ * @date Jul 11, 2012
+ *
+ */
+public final class BBIndexAcc extends AbstractIndex {
+
+
+ public static void main(String[] args){
+
+ int k=13;
+
+ for(int i=0; iData.numChroms){maxChrom=Data.numChroms;}
+ assert(minChrom<=maxChrom);
+ Data.sysout.println("Loading index for chrom "+minChrom+"-"+maxChrom+", genome "+Data.GENOME_BUILD);
+ index=IndexMaker4.makeIndex(Data.GENOME_BUILD, minChrom, maxChrom,
+ k, NUM_CHROM_BITS, MAX_ALLOWED_CHROM_INDEX, CHROM_MASK_LOW, CHROM_MASK_HIGH, SITE_MASK, SHIFT_LENGTH, COLORSPACE, writeToDisk, diskInvalid, index);
+ }
+
+ /** Calculate statistics of index, such as list lengths, and find clumpy keys */
+ public static final synchronized void analyzeIndex(int minChrom, int maxChrom, boolean cs, float fractionToExclude, int k){
+ assert(!cs) : "Re-enable old reverse complement mode.";
+ assert(lengthHistogram==null);
+ assert(COUNTS==null);
+
+ int KEYSPACE=1<<(2*k);
+ COUNTS=new int[KEYSPACE];
+ maxChrom=maxChrom(maxChrom);
+
+ HashMap cmap=new HashMap();
+
+ for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+ Block b=index[chrom];
+ final int[] sites=b.sites;
+ final int[] starts=b.starts;
+
+ for(int key=0; key0 && dif<=CLUMPY_MAX_DIST){
+ clumps++;
+ }
+ }
+ if(clumps>0){
+ final int x=Tools.min(key, AminoAcid.reverseComplementBinaryFast(key, k));
+ final Integer ko=x;
+ LongM lm=cmap.get(ko);
+ if(lm==null){
+ lm=new LongM(0);
+ cmap.put(ko, lm);
+ }
+ lm.increment(clumps);
+ }
+ }
+ }
+ }
+
+ for(int key=0; keyCLUMPY_MIN_LENGTH_INDEX && clumps>CLUMPY_FRACTION*len)/* || (len>8*CLUMPY_MIN_LENGTH_INDEX && clumps>.75f*CLUMPY_FRACTION*len)*/){
+ int rkey=AminoAcid.reverseComplementBinaryFast(key, k);
+ assert(key<=rkey);
+ assert(key==KeyRing.reverseComplementKey(rkey, k, cs));
+ COUNTS[key]=0;
+ COUNTS[rkey]=0;
+ }
+ }
+ }
+
+ lengthHistogram=Tools.makeLengthHistogram3(COUNTS, 1000, verbose2);
+
+ if(verbose2){System.err.println("lengthHistogram: "+Arrays.toString(lengthHistogram));}
+
+ if(REMOVE_FREQUENT_GENOME_FRACTION){
+
+ int lengthLimitIndex=(int)((1-fractionToExclude)*(lengthHistogram.length-1));
+ int lengthLimitIndex2=(int)((1-fractionToExclude*DOUBLE_SEARCH_THRESH_MULT)*(lengthHistogram.length-1));
+
+ MAX_USABLE_LENGTH=Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex]);
+ MAX_USABLE_LENGTH2=Tools.max(6*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex2]);
+
+ if(verbose2){System.err.println("MAX_USABLE_LENGTH: "+MAX_USABLE_LENGTH+"\nMAX_USABLE_LENGTH2: "+MAX_USABLE_LENGTH2);}
+ }
+
+ Solver.POINTS_PER_SITE=(int)Math.floor((Solver.BASE_POINTS_PER_SITE*4000f)/Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH]));
+ if(Solver.POINTS_PER_SITE==0){Solver.POINTS_PER_SITE=-1;}
+ if(verbose2){System.err.println("POINTS_PER_SITE: "+Solver.POINTS_PER_SITE);}
+ assert(Solver.POINTS_PER_SITE<0) : Solver.POINTS_PER_SITE;
+ }
+
+// /** Calculate statistics of index, such as list lengths, and find clumpy keys */
+// public static final synchronized void analyzeIndex(int minChrom, int maxChrom, boolean cs, float fractionToExclude, int k){
+//
+// assert(lengthHistogram==null);
+// assert(COUNTS==null);
+//
+// int KEYSPACE=1<<(2*k);
+// COUNTS=new int[KEYSPACE];
+//
+// maxChrom=maxChrom(maxChrom);
+//
+// for(int key=0; key0 && dif<=CLUMPY_MAX_DIST){
+// clumps++;
+// }
+// }
+//
+// for(int i=start2+1; i0 && dif<=CLUMPY_MAX_DIST){
+// clumps++;
+// }
+// }
+// }
+//
+// }
+//
+// COUNTS[key]=(int)Tools.min(Integer.MAX_VALUE, COUNTS[key]+len);
+// if(key!=rkey){COUNTS[rkey]=(int)Tools.min(Integer.MAX_VALUE, COUNTS[rkey]+len);}
+// assert(COUNTS[key]==COUNTS[rkey]) : key+", "+rkey;
+//
+// if(REMOVE_CLUMPY && len>CLUMPY_MIN_LENGTH_INDEX && clumps>(CLUMPY_FRACTION*len)){
+// COUNTS[key]=0;
+// COUNTS[rkey]=0;
+// for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
+// Block b=index[chrom];
+// final int[] sites=b.sites;
+// sites[b.starts[key]]=-1;
+// sites[b.starts[rkey]]=-1;
+// }
+// }
+//
+//// System.err.println("COUNTS["+key+"] = "+COUNTS[key]+", COUNTS["+rkey+"] = "+COUNTS[rkey]);
+// }
+// }
+//
+// lengthHistogram=Tools.makeLengthHistogram3(COUNTS, 1000, verbose2);
+//
+// if(verbose2){System.err.println("lengthHistogram: "+Arrays.toString(lengthHistogram));}
+//
+// if(REMOVE_FREQUENT_GENOME_FRACTION){
+//
+// int lengthLimitIndex=(int)((1-fractionToExclude)*(lengthHistogram.length-1));
+// int lengthLimitIndex2=(int)((1-fractionToExclude*DOUBLE_SEARCH_THRESH_MULT)*(lengthHistogram.length-1));
+//
+// MAX_USABLE_LENGTH=Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex]);
+// MAX_USABLE_LENGTH2=Tools.max(6*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex2]);
+//
+// if(verbose2){System.err.println("MAX_USABLE_LENGTH: "+MAX_USABLE_LENGTH+"\nMAX_USABLE_LENGTH2: "+MAX_USABLE_LENGTH2);}
+// }
+//
+// Solver.POINTS_PER_SITE=(int)Math.floor((Solver.BASE_POINTS_PER_SITE*4000f)/Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH]));
+// if(Solver.POINTS_PER_SITE==0){Solver.POINTS_PER_SITE=-1;}
+// if(verbose2){System.err.println("POINTS_PER_SITE: "+Solver.POINTS_PER_SITE);}
+// assert(Solver.POINTS_PER_SITE<0) : Solver.POINTS_PER_SITE;
+// }
+
+
+ /** Returns the filename for the block holding this chrom */
+ public static final String fname(int chrom, int k){
+ return IndexMaker4.fname(minChrom(chrom), maxChrom(chrom), k, NUM_CHROM_BITS, COLORSPACE);
+ }
+
+ /** Ensure key offsets are strictly ascending. */
+ private static boolean checkOffsets(int[] offsets){
+ for(int i=1; i0){
+ if(x=shortest);
+ if(initialHitCountlimit3){
+ for(int i=0; ilimit || sum/finalHitCount>limit2; i--){
+ Pointer p=ptrs[i];
+ sum-=hits[p.key].length;
+ hits[p.key]=null;
+ finalHitCount--;
+ }
+
+ return finalHitCount;
+ }
+
+ /** Remove least useful keys to accelerate search */
+ private final int trimExcessHitListsByGreedy(int[] offsets, int[] keyScores, int maxHitLists, int[] keys){
+
+ float[] keyWeights=getKeyWeightArray(keyScores.length);
+ for(int i=0; ilimitS){hits[i]=null;}
+// }
+
+ final int[] lengths=getGenericArray(keys.length);
+
+ for(int i=0; i0){
+ if(x=shortest);
+ if(initialHitCountlimit3){
+ for(int i=0; i=MIN_APPROX_HITS_TO_KEEP && (sum>limit || sum/initialHitCount>limit2 || hitsCount>maxHitLists/* || worstValue<0*/)){
+ final int[] lists=getGreedyListArray(hitsCount);
+ for(int i=0, j=0; j0){
+ lists[j]=i;
+ j++;
+ }
+ }
+
+ Solver.findWorstGreedy(offsets, lengths, keyWeights, KEYLEN, lists, greedyReturn);
+ int worstIndex=greedyReturn[0];
+ int worst=lists[worstIndex];
+ worstValue=greedyReturn[1];
+ sum-=lengths[worst];
+
+// if(worstValue>0 && (hitsCount<=maxHitLists || lengths[worst]0 || lengths[worst]=0){
+ final int len=count(key);
+ if(len>0 && len0){
+ starts[i]=b.starts[key];
+ stops[i]=starts[i]+len2;
+ numHits++;
+ }
+ }
+ }
+ }
+ return numHits;
+ }
+
+
+ private final int countHits(final int[] keys, final int maxLen, boolean clearBadKeys){
+ int numHits=0;
+ for(int i=0; i=0){
+ final int len=count(key);
+ if(len>0 && len findAdvanced(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsets, long id){
+ assert(minChrom<=maxChrom && minChrom>=0);
+ ArrayList result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, true, id);
+ if(DOUBLE_SEARCH_NO_HIT && (result==null || result.isEmpty())){result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, false, id);}
+
+ return result;
+ }
+
+
+ public final ArrayList find(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsetsP, boolean obeyLimits, long id){
+
+ assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
+ final int[] keysOriginal=KeyRing.makeKeys(basesP, offsetsP, KEYLEN, COLORSPACE);
+ int[] keysP=Arrays.copyOf(keysOriginal, keysOriginal.length);
+
+ initialKeys+=offsetsP.length;
+ initialKeyIterations++;
+
+ final int maxLen=(obeyLimits ? MAX_USABLE_LENGTH : MAX_USABLE_LENGTH2);
+
+ int numHits=0;
+ numHits=countHits(keysP, maxLen, true);
+ if(numHits>0){ //TODO: Change these to higher numbers
+ int trigger=(3*keysP.length)/4;
+ if(numHits<6 && numHitsMIN_APPROX_HITS_TO_KEEP){
+ int cutoffIndex=((int) (HIT_FRACTION_TO_RETAIN*(keysP.length)-0.01f))+(keysP.length-numHits);
+
+ int zeroes=keysP.length-numHits;
+ int altMinIndex=(zeroes+(MIN_HIT_LISTS_TO_RETAIN-1));
+ cutoffIndex=Tools.max(cutoffIndex, altMinIndex);
+
+ assert(cutoffIndex>0) : cutoffIndex+"\n"+numHits;
+
+ if(cutoffIndex<(keysP.length-1)){
+ int[] lens=getGenericArray(keysP.length);
+ for(int i=0; i