Skip to content

Commit

Permalink
tools: In VariantMerger, add secondary alternate when overlapping wit…
Browse files Browse the repository at this point in the history
…h a symbolic NO_VARIATION. #162
  • Loading branch information
j-coll committed Aug 31, 2018
1 parent 9786188 commit a6725d1
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*/
package org.opencb.biodata.tools.variant.merge;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.vcf.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
Expand Down Expand Up @@ -474,8 +475,7 @@ private void mergeVariants(Variant current, List<Pair<Variant, List<AlternateCoo
StudyEntry currentStudy = getStudy(current);

// Build ALT index
List<AlternateCoordinate> altList = buildAltsList(current, varToAlts.stream()
.map(Pair::getRight).collect(Collectors.toList()));
List<AlternateCoordinate> altList = buildAltsList(current, varToAlts);
// Map<AlternateCoordinate, Integer> altIdx = index(altList);

// Update SecALt list
Expand Down Expand Up @@ -766,42 +766,22 @@ private List<Integer> collapseGT(List<String> gtsStr) {
return IntStream.range(0, gtsStr.size() - 1).boxed().collect(Collectors.toList());
}

/**
* @deprecated Use {@link VariantAlternateRearranger#rearrangeGenotype}
*/
@Deprecated
private String updateGT(String gt, Map<AlternateCoordinate, Integer> curr, Map<Integer, AlternateCoordinate> other) {
Genotype gto = new Genotype(gt);
int[] idx = gto.getAllelesIdx();
int len = idx.length;
IntStream.range(0, len).boxed().filter(i -> idx[i] > 0 && idx[i] <= other.size())
.forEach(i -> {
Integer allele = curr.get(other.get(idx[i]));
if (this.collapseDeletions && Objects.isNull(allele)) {
allele = 0; // change to '0' for 'missing' reference (missing because change to '0' GT)
}
gto.setAlleleIdx(i, allele);
});
if (!gto.isPhased()) {
Arrays.sort(idx);
}
return gto.toString();
}

private List<AlternateCoordinate> buildAltsList(Variant current, Collection<List<AlternateCoordinate>> alts) {
private List<AlternateCoordinate> buildAltsList(Variant current, List<Pair<Variant, List<AlternateCoordinate>>> varToAlts) {
Integer start = current.getStart();
Integer end = current.getEnd();
final List<AlternateCoordinate> currAlts = buildAltList(current);
final Set<AlternateCoordinate> altSets = new HashSet<>(currAlts);
if (this.collapseDeletions && isDeletion(current.getType(), current.getStart(), current.getEnd())) {
// remove all alts that are NOT fully overlap current deletion -> keep only larger or same
alts.stream()
varToAlts.stream()
.map(Pair::getValue)
.flatMap(Collection::stream)
.filter(a -> (start >= a.getStart() && end <= a.getEnd()))
.forEach(altSets::add);
} else if (this.collapseDeletions && isInsertion(current.getType(), current.getStart(), current.getEnd())) {
// remove all alts that are NOT fully overlap current deletion -> keep only larger or same
alts.stream()
varToAlts.stream()
.map(Pair::getValue)
.flatMap(Collection::stream)
.filter(a -> {
if (isInsertion(a)) {
Expand All @@ -815,11 +795,20 @@ private List<AlternateCoordinate> buildAltsList(Variant current, Collection<List
})
.forEach(altSets::add);
} else if (this.collapseDeletions && current.getType().equals(VariantType.SNP)) {
alts.forEach(l -> l.stream()
varToAlts.forEach(p -> p.getValue().stream()
.filter(a -> current.overlapWith(a.getChromosome(), a.getStart(), a.getEnd(), true))
.forEach(altSets::add));
} else {
alts.forEach(altSets::addAll);
for (Pair<Variant, List<AlternateCoordinate>> pair : varToAlts) {
Variant variant = pair.getKey();
List<AlternateCoordinate> alts = pair.getValue();
if (variant.getType().equals(VariantType.NO_VARIATION) && !alts.isEmpty()) {
AlternateCoordinate mainAlternate = alts.get(0);
mainAlternate.setStart(Math.max(mainAlternate.getStart(), current.getStart()));
mainAlternate.setEnd(Math.min(mainAlternate.getEnd(), current.getEnd()));
}
altSets.addAll(alts);
}
}
// remove current alts
altSets.removeAll(currAlts);
Expand Down Expand Up @@ -907,7 +896,10 @@ protected boolean equals(AlternateCoordinate alt1, AlternateCoordinate alt2) {
public List<AlternateCoordinate> buildAltList(Variant variant) {
AlternateCoordinate mainAlternate = getMainAlternate(variant);
List<AlternateCoordinate> alternates = new ArrayList<>();
if (!mainAlternate.getType().equals(VariantType.NO_VARIATION)) {
boolean emptyRefBlock = mainAlternate.getType().equals(VariantType.NO_VARIATION)
&& (mainAlternate.getAlternate().isEmpty() || mainAlternate.getAlternate().equals(Allele.NO_CALL_STRING));
// Skip Reference Blocks (NO_VARIATION) where the alternate is empty
if (!emptyRefBlock) {
alternates.add(mainAlternate);
}
StudyEntry se = getStudy(variant);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,7 @@ public void testCollapse_DEL_REF() {
Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:-", "S02", "0/1");
Variant s03 = VariantTestUtils.generateVariant("1:10:A:-", "S03", "0/1");
Variant s04 = VariantTestUtils.generateVariant("1:9:AATT:-", "S04", "0/1");
Variant s05 = VariantTestUtils.generateVariant("1:3:A:-", "S05", "0/0");
s05.setType(VariantType.NO_VARIATION);
s05.setEnd(15);
Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0");

Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05));
StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID);
Expand All @@ -140,9 +138,7 @@ public void testCollapse_SNP_REF() {
Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:", "S02", "0/1");
Variant s03 = VariantTestUtils.generateVariant("1:10:A:", "S03", "0/1");
Variant s04 = VariantTestUtils.generateVariant("1:9:AATT:", "S04", "0/1");
Variant s05 = VariantTestUtils.generateVariant("1:3:A:", "S05", "0/0");
s05.setType(VariantType.NO_VARIATION);
s05.setEnd(15);
Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0");

Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05));
StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID);
Expand All @@ -163,9 +159,7 @@ public void testCollapse_SNP_INS() {
Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:", "S02", "0/1");
Variant s03 = VariantTestUtils.generateVariant("1:11::T", "S03", "0/1");
Variant s04 = VariantTestUtils.generateVariant("1:11::TTTT", "S04", "0/1");
Variant s05 = VariantTestUtils.generateVariant("1:3:A:", "S05", "0/0");
s05.setType(VariantType.NO_VARIATION);
s05.setEnd(15);
Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0");

Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05));
StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID);
Expand Down Expand Up @@ -219,9 +213,7 @@ public void testCollapse_INS_REF() {
Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:", "S02", "0/1");
Variant s03 = VariantTestUtils.generateVariant("1:10:A:", "S03", "0/1");
Variant s04 = VariantTestUtils.generateVariant("1:9:AATT:", "S04", "0/1");
Variant s05 = VariantTestUtils.generateVariant("1:3:A:", "S05", "0/0");
s05.setType(VariantType.NO_VARIATION);
s05.setEnd(15);
Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0");

Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05));
StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID);
Expand Down Expand Up @@ -538,7 +530,6 @@ public void testMergeIndelOverlapping() throws NonStandardCompliantSampleField {
System.out.println("mergeVar = " + mergeVar.toJson());
}


@Test
public void testMergeReference() {
Variant v1 = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-",
Expand All @@ -562,6 +553,46 @@ public void testMergeReference() {
assertEquals("PASS", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_FILTER_KEY));
}

@Test
public void testMergeSymbolicReference() {
String format = VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY + "," + VCFConstants.GENOTYPE_ALLELE_DEPTHS;
Variant variant = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-",
format,
"S1", "0/1", "PASS", "10,11");

Variant ref1 = VariantTestUtils.generateVariantWithFormat("1:5-15:A:<NON_REF>",
format,
"S2", "0/0", "PASS", "8,2");

Variant mergeVar = variantMerger.merge(variant, ref1);
assertEquals(1, mergeVar.getStudies().get(0).getSecondaryAlternates().size());
assertEquals(new AlternateCoordinate("1", 10, 14, "A", "<NON_REF>", VariantType.NO_VARIATION), mergeVar.getStudies().get(0).getSecondaryAlternates().get(0));
assertEquals("0/0", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_KEY));
assertEquals("PASS", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_FILTER_KEY));
assertEquals("8,0,2", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_ALLELE_DEPTHS));
assertEquals("10,11", mergeVar.getStudies().get(0).getSampleData("S1", VCFConstants.GENOTYPE_ALLELE_DEPTHS));
}

@Test
public void testMergePartialSymbolicReference() {
String format = VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY + "," + VCFConstants.GENOTYPE_ALLELE_DEPTHS;
Variant variant = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-",
format,
"S1", "0/1", "PASS", "10,11");

Variant ref1 = VariantTestUtils.generateVariantWithFormat("1:11-13:A:<NON_REF>",
format,
"S2", "0/0", "PASS", "8,2");

Variant mergeVar = variantMerger.merge(variant, ref1);
assertEquals(1, mergeVar.getStudies().get(0).getSecondaryAlternates().size());
assertEquals(new AlternateCoordinate("1", 11, 13, "A", "<NON_REF>", VariantType.NO_VARIATION), mergeVar.getStudies().get(0).getSecondaryAlternates().get(0));
assertEquals("0/0", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_KEY));
assertEquals("PASS", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_FILTER_KEY));
assertEquals("10,11", mergeVar.getStudies().get(0).getSampleData("S1", VCFConstants.GENOTYPE_ALLELE_DEPTHS));
assertEquals("8,0,2", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_ALLELE_DEPTHS));
}

@Test
public void testMergeIndel() {
Variant v1 = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-",
Expand Down Expand Up @@ -933,9 +964,7 @@ public void testMergeMonoAllelicNocall() {

@Test
public void testMergeSnpBlock() {
Variant v = VariantTestUtils.generateVariant("1:10:A:", "S01", "0/0");
v.setType(VariantType.NO_VARIATION);
v.setEnd(100);
Variant v = VariantTestUtils.generateVariant("1:10:A-100:.", "S01", "0/0");

checkMergeVariants(VariantTestUtils.generateVariant("1:10:A:T", "S02", "0/1"), v, Collections.emptyList(), "0/0");
}
Expand Down

0 comments on commit a6725d1

Please sign in to comment.