Skip to content

Commit

Permalink
minimap2 support
Browse files Browse the repository at this point in the history
  • Loading branch information
richardmleggett committed Jun 18, 2018
1 parent 521b6c3 commit b509ed0
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 50 deletions.
Binary file modified dist/NanoOK.jar
Binary file not shown.
51 changes: 35 additions & 16 deletions src/nanook/CIGARString.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,35 +134,50 @@ public boolean processString() {
int insCount = 0;
int matchCount = 0;
boolean processed = true;
boolean debug = false;

//System.out.println("Query filename: "+queryFilename);
//System.out.println("CIGAR: "+cigarString);
//System.out.println(" Hit: "+hitSeq.length()+" "+hitSeq);
//System.out.println("Query: "+querySeq.length()+" "+querySeq);
//if (queryFilename.startsWith("N79596_Lambda8kbp_LCv4_test_3559_1_ch60_file49_strand_BaseCalled_Complement.fasta.sam")) {
//if (queryFilename.startsWith("N79596_Lambda8kbp_LCv4_test_3559_1_ch96_file14_strand_BaseCalled_2D.fasta.sam")) {
// System.out.println("\n\nDEBUGGING THIS...");
// debug = true;
//}

if (debug) {
System.out.println("Query filename: "+queryFilename);
System.out.println("CIGAR: "+cigarString);
System.out.println(" Hit: "+hitSeq.length()+" "+hitSeq);
System.out.println("Query: "+querySeq.length()+" "+querySeq);
}

try {
hitAlnSize = 0;
queryAlnSize = 0;
hitAlnSize = 0;
while ((i<cigarString.length()) && (continueParsing)) {
//for (int i=0; i<cigarString.length(); i++) {
//System.out.println("hitPtr="+hitPtr+" queryPtr="+queryPtr);
//System.out.println("Query: " + queryString.toString());
//System.out.println(" Hit: " + hitString.toString());
if (debug) {
System.out.println("hitPtr="+hitPtr+" queryPtr="+queryPtr);
//System.out.println("Query: " + queryString.toString());
//System.out.println(" Hit: " + hitString.toString());
}
char c = cigarString.charAt(i);

if (Character.isDigit(c)) {
value = value + c;
} else {
int n = Integer.parseInt(value);
totalCount += n;
//System.out.println(n + " " + c);
if (debug) {
System.out.println(n + " " + c);
}
switch(c) {
case 'M':
case '=':
case 'X':
//System.out.println(hitString.length() + " " + hitPtr);
//System.out.println("Hit up: " + hitSeq.substring(hitPtr));
if (debug) {
System.out.println(hitString.length() + " " + hitPtr);
//System.out.println("Hit up: " + hitSeq.substring(hitPtr));
}
queryString.append(querySeq.substring(queryPtr, queryPtr + n));
hitString.append(hitSeq.substring(hitPtr, hitPtr + n));
queryPtr += n;
Expand All @@ -173,7 +188,7 @@ public boolean processString() {
matchCount+=n;
break;
case 'I':
if (n > 100) {
if (n > 200) {
// DEBUG MODE TURNS OFF THIS
System.out.println("");
System.out.println("Error: large I ("+n+") - read "+queryID+" ignored");
Expand All @@ -191,7 +206,7 @@ public boolean processString() {
insCount+=n;
break;
case 'D':
if (n > 100) {
if (n > 500) {
System.out.println("Error: large D ("+n+") - read "+queryID+" ignored");
processed = false;
continueParsing = false;
Expand Down Expand Up @@ -248,13 +263,17 @@ public boolean processString() {
}
value="";
tagCtr++;
//System.out.println("qseq="+querySeq.length()+" matchCount="+matchCount+" insCount="+insCount+" delCount="+delCount+" totalCount="+totalCount);
//System.out.println("Query: "+queryString.toString());
//System.out.println(" Hit: "+hitString.toString());
if (debug) {
//System.out.println("qseq="+querySeq.length()+" matchCount="+matchCount+" insCount="+insCount+" delCount="+delCount+" totalCount="+totalCount);
//System.out.println("Query: "+queryString.toString());
//System.out.println(" Hit: "+hitString.toString());
}
}

i++;
//System.out.println("i="+i+" and length="+cigarString.length());
if (debug) {
System.out.println("i="+i+" and length="+cigarString.length());
}
}
} catch (Exception e) {
e.printStackTrace();
Expand Down
2 changes: 1 addition & 1 deletion src/nanook/GraphMapParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import java.io.File;

/**
* Parser for BWA files
* Parser for GraphMap files
* @author Richard Leggett
*/
public class GraphMapParser extends SAMParser implements AlignmentFileParser {
Expand Down
70 changes: 70 additions & 0 deletions src/nanook/Minimap2Parser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Program: NanoOK
* Author: Richard M. Leggett
*
* Copyright 2015 The Genome Analysis Centre (TGAC)
*/

package nanook;

import java.io.File;

/**
* Parser for Minimap2 files
* @author Richard Leggett
*/
public class Minimap2Parser extends SAMParser implements AlignmentFileParser {
private String alignmentParams = "-x map-ont";
private NanoOKOptions options;

public Minimap2Parser(NanoOKOptions o, References r) {
super(o, r);
options = o;
}

public String getProgramID() {
return "minimap2";
}

public int getReadFormat() {
int or = options.getReadFormat();
return or;

//return NanoOKOptions.FASTA;
}

public void setAlignmentParams(String p) {
alignmentParams = p;
}

public boolean outputsToStdout() {
return true;
}

public String getRunCommand(String query, String output, String reference) {
//reference = reference.replaceAll("\\.fasta$", "");
//reference = reference.replaceAll("\\.fa$", "");

if (alignmentParams == "") {
return "minimap2 -a " + reference + ".mmi " + query;
} else {
return "minimap2 " + alignmentParams + " -a " + reference + ".mmi " + query;
}
}

public void checkForIndex(String referenceFile) {
File f = new File(referenceFile + ".fasta.mmi");

System.out.println("Checking!!!");

if (!f.exists()) {
System.out.println("");
System.out.println("Error:");
System.out.println("Can't find file " + f.getPath());
System.out.println("Have you indexed the reference with minimap2 -d ref.fasta.mmi ref.fasta?");
System.exit(1);
}

return;
}
}
2 changes: 1 addition & 1 deletion src/nanook/NanoOK.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* @author Richard Leggett
*/
public class NanoOK {
public final static String VERSION_STRING = "v1.31";
public final static String VERSION_STRING = "v1.32";
public final static long SERIAL_VERSION = 3L;

/**
Expand Down
15 changes: 13 additions & 2 deletions src/nanook/NanoOKOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ public void parseArgs(String[] args) {
System.out.println(" -force to force NanoOK to ignore warnings");
System.out.println(" -timeout to set the number of seconds before giving up waiting for new reads (default 2)");
System.out.println("");
System.out.println("Valid aligners: last, bwa, blasr, marginalign, graphmap");
System.out.println("Valid aligners: last, bwa, blasr, marginalign, graphmap, minimap2");
System.out.println("");
System.exit(0);
}
Expand Down Expand Up @@ -410,10 +410,13 @@ public void parseArgs(String[] args) {
System.out.println("Error: You must specify a reference");
System.exit(1);
}

if (!referenceFile.endsWith(".fa") && !referenceFile.endsWith(".fasta")) {
System.out.println("Error: reference must specify a .fa or .fasta file");
System.exit(1);
}

checkParser();
}

if (runMode == MODE_PROCESS) {
Expand Down Expand Up @@ -985,6 +988,9 @@ public AlignmentFileParser getParser() {
case "graphmap":
parser = new GraphMapParser(this, references);
break;
case "minimap2":
parser = new Minimap2Parser(this, references);
break;
default:
System.out.println("Aligner unknown!");
System.out.println("");
Expand All @@ -995,10 +1001,15 @@ public AlignmentFileParser getParser() {
if (alignerParams != "") {
parser.setAlignmentParams(alignerParams);
}

return parser;
}

public void checkParser() {
AlignmentFileParser parser = getParser();
parser.checkForIndex(getReferenceFile().substring(0, getReferenceFile().lastIndexOf('.')));
}

public boolean doKmerCounting() {
return doKmerCounting;
}
Expand Down
64 changes: 34 additions & 30 deletions src/nanook/SAMParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,43 +112,47 @@ private Alignment processAlignmentLine(String alignmentFile, String s, String ou
}

if (mapped) {
ReferenceSequence readReference = references.getReferenceById(hitName);
if (readReference != null) {
int readLength = overallStats.getReadLength(alignmentFile, queryName);
if (readLength != -1) {
CIGARString cs = new CIGARString(cigar, seq, leafName, queryName, hitStart, options.getReferenceFile(), readReference, alignmentFile);
if (cs.processString()) {
//System.out.println("hitName "+hitName);
al = new Alignment(mapQuality,
queryName,
readLength,
cs.getQueryStart(),
cs.getQueryAlnSize(),
cs.getQueryString(),
hitName,
readReference.getSize(),
hitStart,
cs.getHitAlnSize(),
cs.getHitString(),
false);
if (!seq.startsWith("*")) {
ReferenceSequence readReference = references.getReferenceById(hitName);
if (readReference != null) {
int readLength = overallStats.getReadLength(alignmentFile, queryName);
if (readLength != -1) {
CIGARString cs = new CIGARString(cigar, seq, leafName, queryName, hitStart, options.getReferenceFile(), readReference, alignmentFile);
if (cs.processString()) {
//System.out.println("hitName "+hitName);
al = new Alignment(mapQuality,
queryName,
readLength,
cs.getQueryStart(),
cs.getQueryAlnSize(),
cs.getQueryString(),
hitName,
readReference.getSize(),
hitStart,
cs.getHitAlnSize(),
cs.getHitString(),
false);

// Check for reverse complement
if ((flags & 0x10) == 0x10) {
al.setQueryStrand("-");
}

al.writeMafFile(outputFilename);

// Check for reverse complement
if ((flags & 0x10) == 0x10) {
al.setQueryStrand("-");
}

al.writeMafFile(outputFilename);

} else {
System.out.println("Error: can't find read length for ["+queryName+"]");
System.exit(1);
}

} else {
System.out.println("Error: can't find read length for ["+queryName+"]");
System.exit(1);
System.out.println("");
System.out.println("Error: Couldn't find reference "+hitName);
}
} else {
System.out.println("");
System.out.println("Error: Couldn't find reference "+hitName);
}
} else {
System.out.println("Not mapped: "+leafName);
}

return al;
Expand Down

0 comments on commit b509ed0

Please sign in to comment.