diff --git a/pom.xml b/pom.xml
index 3cf738669..80ab82d68 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
au.edu.wehi
gridss
jar
- 1.5.0
+ 1.5.1
gridss
https://github.com/PapenfussLab/gridss
diff --git a/src/main/java/au/edu/wehi/idsv/model/FastEmpiricalReferenceLikelihoodModel.java b/src/main/java/au/edu/wehi/idsv/model/FastEmpiricalReferenceLikelihoodModel.java
index e4aa414d4..81234c7ce 100644
--- a/src/main/java/au/edu/wehi/idsv/model/FastEmpiricalReferenceLikelihoodModel.java
+++ b/src/main/java/au/edu/wehi/idsv/model/FastEmpiricalReferenceLikelihoodModel.java
@@ -47,7 +47,7 @@ public double scoreReadPair(IdsvSamFileMetrics metrics, int fragmentSize, int ma
public double scoreUnmappedMate(IdsvSamFileMetrics metrics, int mapq) {
IdsvMetrics im = metrics.getIdsvMetrics();
// completely unmapped read pairs are excluded for consistency with sc and dp calculation
- double score = MathUtil.prToPhred((double)im.READ_PAIRS_ONE_MAPPED / (double)(im.READ_PAIRS - im.READ_PAIRS_ZERO_MAPPED));
+ double score = MathUtil.prToPhred((double)im.READ_PAIRS_ONE_MAPPED / (double)(im.MAPPED_READS));
score = Math.min(score, mapq);
return score;
}
diff --git a/src/main/java/gridss/ComputeCoverage.java b/src/main/java/gridss/ComputeCoverage.java
index ee13c9ee9..000267c84 100644
--- a/src/main/java/gridss/ComputeCoverage.java
+++ b/src/main/java/gridss/ComputeCoverage.java
@@ -93,6 +93,7 @@ private IntervalCoverageAccumulator initIntervalCoverageAccumulator() {
}
@Override
protected void acceptRead(SAMRecord record, ReferenceSequence refSeq) {
+ if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
ReadGcSummary gc = new ReadGcSummary(record, refSeq, UNPAIRED_FRAGMENT_SIZE, getReadPairConcordanceCalculator());
if (ica_gc != null) {
ica_gc.add(record, gc, gcAdjust.adjustmentMultiplier((int)gc.gcPercentage));
diff --git a/src/main/java/gridss/ExtractSVReads.java b/src/main/java/gridss/ExtractSVReads.java
index 770172a17..9d6eb19a8 100644
--- a/src/main/java/gridss/ExtractSVReads.java
+++ b/src/main/java/gridss/ExtractSVReads.java
@@ -191,6 +191,7 @@ public boolean[] shouldExtract(List records, ReferenceLookup lookup)
extract[i] = !hasConsistentReadAlignment[SAMRecordUtil.getSegmentIndex(r)] && !readfilter.filterOut(r);
// supp records should use the primary alignment when considering concordance
extract[i] |= !hasConsistentReadPair && !pairfilter.filterOut(primaryAlignmentForSupplementary(r));
+ extract[i] &= (!r.getDuplicateReadFlag() || INCLUDE_DUPLICATES);
}
return extract;
}
diff --git a/src/main/java/gridss/analysis/CollectCigarMetrics.java b/src/main/java/gridss/analysis/CollectCigarMetrics.java
index 8ed573cf3..7f642361c 100644
--- a/src/main/java/gridss/analysis/CollectCigarMetrics.java
+++ b/src/main/java/gridss/analysis/CollectCigarMetrics.java
@@ -57,6 +57,9 @@ public class CollectCigarMetrics extends SinglePassSamProgram {
@Argument(shortName="Z", doc="If set to true include a zero length operator for each operator not included in the alignment CIGAR.")
public boolean INCLUDE_OMITTED_OPERATORS = true;
+ @Argument(doc="If true, also include reads marked as duplicates.")
+ public boolean INCLUDE_DUPLICATES = false;
+
private EnumMap> cigar;
/** Required main method. */
@@ -79,6 +82,7 @@ protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
// Skip unwanted records
if (rec.getReadUnmappedFlag()) return;
if (rec.getCigar() == null) return;
+ if (rec.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
List list = rec.getCigar().getCigarElements();
if (list == null || list.size() == 0) return;
for (CigarElement ce : list) {
diff --git a/src/main/java/gridss/analysis/CollectFragmentGCMetrics.java b/src/main/java/gridss/analysis/CollectFragmentGCMetrics.java
index e6ff0207e..2f3e13ade 100644
--- a/src/main/java/gridss/analysis/CollectFragmentGCMetrics.java
+++ b/src/main/java/gridss/analysis/CollectFragmentGCMetrics.java
@@ -78,11 +78,8 @@ public class CollectFragmentGCMetrics extends GcSinglePassSamProgram {
}
@Override protected void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
- if (record.getDuplicateReadFlag() && IGNORE_DUPLICATES) {
- // ignore duplicates
- } else {
- multiCollector.acceptRecord(record, ref);
- }
+ if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
+ multiCollector.acceptRecord(record, ref);
}
@Override protected void finish() {
diff --git a/src/main/java/gridss/analysis/CollectIdsvMetrics.java b/src/main/java/gridss/analysis/CollectIdsvMetrics.java
index ef489153b..14d619ba1 100644
--- a/src/main/java/gridss/analysis/CollectIdsvMetrics.java
+++ b/src/main/java/gridss/analysis/CollectIdsvMetrics.java
@@ -1,31 +1,8 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
package gridss.analysis;
import java.io.File;
+import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import au.edu.wehi.idsv.sam.SAMRecordUtil;
@@ -48,6 +25,9 @@
public class CollectIdsvMetrics extends SinglePassSamProgram {
public static final String METRICS_SUFFIX = ".idsv_metrics";
+ @Argument(doc="If true, also include reads marked as duplicates.")
+ public boolean INCLUDE_DUPLICATES = false;
+
private IdsvMetrics idsv;
/** Required main method. */
@@ -63,6 +43,7 @@ public void setup(final SAMFileHeader header, final File samFile) {
@Override
public void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
+ if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
idsv.MAX_READ_LENGTH = Math.max(idsv.MAX_READ_LENGTH, record.getReadLength());
if (!record.getReadUnmappedFlag()) {
idsv.MAX_READ_MAPPED_LENGTH = Math.max(idsv.MAX_READ_MAPPED_LENGTH, record.getAlignmentEnd() - record.getAlignmentStart() + 1);
diff --git a/src/main/java/gridss/analysis/CollectMapqMetrics.java b/src/main/java/gridss/analysis/CollectMapqMetrics.java
index 2a7a18a58..8cfbb1087 100644
--- a/src/main/java/gridss/analysis/CollectMapqMetrics.java
+++ b/src/main/java/gridss/analysis/CollectMapqMetrics.java
@@ -1,27 +1,3 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2016 Daniel Cameron
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
package gridss.analysis;
import java.io.File;
@@ -59,6 +35,9 @@ public class CollectMapqMetrics extends SinglePassSamProgram {
public static final String METRICS_SUFFIX = ".mapq_metrics";
public static final String HISTOGRAM_SUFFIX = ".mapq_histogram.pdf";
private static final String Histogram_R_SCRIPT = "gridss/analysis/mapqHistogram.R";
+
+ @Argument(doc="If true, also include reads marked as duplicates.")
+ public boolean INCLUDE_DUPLICATES = false;
@Argument(shortName="H", doc="File to write insert size Histogram chart to.")
public File Histogram_FILE = null;
@@ -98,6 +77,7 @@ protected String[] customCommandLineValidation() {
}
@Override protected void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
+ if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
multiCollector.acceptRecord(record, ref);
}
diff --git a/src/main/java/gridss/analysis/CollectStructuralVariantReadMetrics.java b/src/main/java/gridss/analysis/CollectStructuralVariantReadMetrics.java
index cfedf6a21..34b0566e9 100644
--- a/src/main/java/gridss/analysis/CollectStructuralVariantReadMetrics.java
+++ b/src/main/java/gridss/analysis/CollectStructuralVariantReadMetrics.java
@@ -27,6 +27,7 @@
)
public class CollectStructuralVariantReadMetrics extends ProcessStructuralVariantReadsCommandLineProgram {
public static final String METRICS_SUFFIX = ".sv_metrics";
+
//private static final Log log = Log.getInstance(CollectStructuralVariantReadMetrics.class);
public static void main(String[] argv) {
System.exit(new CollectStructuralVariantReadMetrics().instanceMain(argv));
@@ -50,6 +51,13 @@ public void setup(SAMFileHeader header, File samFile) {
}
@Override
public void acceptFragment(List records, ReferenceLookup lookup) {
+ if (!INCLUDE_DUPLICATES) {
+ for (int i = records.size() - 1; i >= 0; i--) {
+ if (records.get(i).getDuplicateReadFlag()) {
+ records.remove(i);
+ }
+ }
+ }
boolean hasConsistentReadPair = ExtractSVReads.hasReadPairingConsistentWithReference(getReadPairConcordanceCalculator(), records);
boolean[] hasConsistentReadAlignment = ExtractSVReads.hasReadAlignmentConsistentWithReference(records);
boolean hasOeaAnchor = false;
diff --git a/src/main/java/gridss/analysis/CollectTagMetrics.java b/src/main/java/gridss/analysis/CollectTagMetrics.java
index d22316354..7b028b667 100644
--- a/src/main/java/gridss/analysis/CollectTagMetrics.java
+++ b/src/main/java/gridss/analysis/CollectTagMetrics.java
@@ -29,6 +29,7 @@
import java.util.HashMap;
import java.util.Map;
+import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import htsjdk.samtools.SAMFileHeader;
@@ -49,6 +50,9 @@
public class CollectTagMetrics extends SinglePassSamProgram {
public static final String METRICS_SUFFIX = ".tag_metrics";
+ @Argument(doc="If true, also include reads marked as duplicates.")
+ public boolean INCLUDE_DUPLICATES = false;
+
private Map tags = new HashMap<>();
/** Required main method. */
@@ -64,6 +68,7 @@ protected void setup(final SAMFileHeader header, final File samFile) {
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
+ if (rec.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
for (SAMTagAndValue attr : rec.getAttributes()) {
String tag = attr.tag;
TagSummaryMetrics metric = tags.get(tag);
diff --git a/src/main/java/gridss/cmdline/GcSinglePassSamProgram.java b/src/main/java/gridss/cmdline/GcSinglePassSamProgram.java
index 02cc0308b..19460f149 100644
--- a/src/main/java/gridss/cmdline/GcSinglePassSamProgram.java
+++ b/src/main/java/gridss/cmdline/GcSinglePassSamProgram.java
@@ -86,8 +86,8 @@ public ReadPairConcordanceCalculator getReadPairConcordanceCalculator() {
}
// --------- end chunk from ProcessStructuralVariantReadsCommandLineProgram ---------
// --------- start chunk from ReferenceCommandLineProgram ---------
- @Argument(doc = "Ignore reads marked as duplicates.", optional = true)
- public boolean IGNORE_DUPLICATES = true;
+ @Argument(doc="If true, also include reads marked as duplicates.")
+ public boolean INCLUDE_DUPLICATES = false;
private ReferenceLookup reference;
public ReferenceLookup getReference() {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
diff --git a/src/main/java/gridss/cmdline/ProcessStructuralVariantReadsCommandLineProgram.java b/src/main/java/gridss/cmdline/ProcessStructuralVariantReadsCommandLineProgram.java
index ec45294cf..e2e3a8019 100644
--- a/src/main/java/gridss/cmdline/ProcessStructuralVariantReadsCommandLineProgram.java
+++ b/src/main/java/gridss/cmdline/ProcessStructuralVariantReadsCommandLineProgram.java
@@ -44,6 +44,8 @@ public abstract class ProcessStructuralVariantReadsCommandLineProgram extends By
public File INSERT_SIZE_METRICS = null;
@Argument(doc="Include unmapped reads", optional=true)
public boolean UNMAPPED_READS = true;
+ @Argument(doc="If true, also include reads marked as duplicates.")
+ public boolean INCLUDE_DUPLICATES = false;
@Override
protected String[] customCommandLineValidation() {
if (READ_PAIR_CONCORDANCE_METHOD == ReadPairConcordanceMethod.PERCENTAGE && INSERT_SIZE_METRICS == null) {