diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/merge/VariantMerger.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/merge/VariantMerger.java index a2a25d1e9..d58caeeaf 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/merge/VariantMerger.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/merge/VariantMerger.java @@ -22,6 +22,7 @@ */ package org.opencb.biodata.tools.variant.merge; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.vcf.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; @@ -474,8 +475,7 @@ private void mergeVariants(Variant current, List altList = buildAltsList(current, varToAlts.stream() - .map(Pair::getRight).collect(Collectors.toList())); + List altList = buildAltsList(current, varToAlts); // Map altIdx = index(altList); // Update SecALt list @@ -766,42 +766,22 @@ private List collapseGT(List gtsStr) { return IntStream.range(0, gtsStr.size() - 1).boxed().collect(Collectors.toList()); } - /** - * @deprecated Use {@link VariantAlternateRearranger#rearrangeGenotype} - */ - @Deprecated - private String updateGT(String gt, Map curr, Map other) { - Genotype gto = new Genotype(gt); - int[] idx = gto.getAllelesIdx(); - int len = idx.length; - IntStream.range(0, len).boxed().filter(i -> idx[i] > 0 && idx[i] <= other.size()) - .forEach(i -> { - Integer allele = curr.get(other.get(idx[i])); - if (this.collapseDeletions && Objects.isNull(allele)) { - allele = 0; // change to '0' for 'missing' reference (missing because change to '0' GT) - } - gto.setAlleleIdx(i, allele); - }); - if (!gto.isPhased()) { - Arrays.sort(idx); - } - return gto.toString(); - } - - private List buildAltsList(Variant current, Collection> alts) { + private List buildAltsList(Variant current, List>> varToAlts) { Integer start = current.getStart(); Integer end = current.getEnd(); final List currAlts = buildAltList(current); final Set altSets = new HashSet<>(currAlts); if (this.collapseDeletions && isDeletion(current.getType(), current.getStart(), current.getEnd())) { // remove all alts that are NOT fully overlap current deletion -> keep only larger or same - alts.stream() + varToAlts.stream() + .map(Pair::getValue) .flatMap(Collection::stream) .filter(a -> (start >= a.getStart() && end <= a.getEnd())) .forEach(altSets::add); } else if (this.collapseDeletions && isInsertion(current.getType(), current.getStart(), current.getEnd())) { // remove all alts that are NOT fully overlap current deletion -> keep only larger or same - alts.stream() + varToAlts.stream() + .map(Pair::getValue) .flatMap(Collection::stream) .filter(a -> { if (isInsertion(a)) { @@ -815,11 +795,20 @@ private List buildAltsList(Variant current, Collection l.stream() + varToAlts.forEach(p -> p.getValue().stream() .filter(a -> current.overlapWith(a.getChromosome(), a.getStart(), a.getEnd(), true)) .forEach(altSets::add)); } else { - alts.forEach(altSets::addAll); + for (Pair> pair : varToAlts) { + Variant variant = pair.getKey(); + List alts = pair.getValue(); + if (variant.getType().equals(VariantType.NO_VARIATION) && !alts.isEmpty()) { + AlternateCoordinate mainAlternate = alts.get(0); + mainAlternate.setStart(Math.max(mainAlternate.getStart(), current.getStart())); + mainAlternate.setEnd(Math.min(mainAlternate.getEnd(), current.getEnd())); + } + altSets.addAll(alts); + } } // remove current alts altSets.removeAll(currAlts); @@ -907,7 +896,10 @@ protected boolean equals(AlternateCoordinate alt1, AlternateCoordinate alt2) { public List buildAltList(Variant variant) { AlternateCoordinate mainAlternate = getMainAlternate(variant); List alternates = new ArrayList<>(); - if (!mainAlternate.getType().equals(VariantType.NO_VARIATION)) { + boolean emptyRefBlock = mainAlternate.getType().equals(VariantType.NO_VARIATION) + && (mainAlternate.getAlternate().isEmpty() || mainAlternate.getAlternate().equals(Allele.NO_CALL_STRING)); + // Skip Reference Blocks (NO_VARIATION) where the alternate is empty + if (!emptyRefBlock) { alternates.add(mainAlternate); } StudyEntry se = getStudy(variant); diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java index 266b49573..6d56e004f 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java @@ -117,9 +117,7 @@ public void testCollapse_DEL_REF() { Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:-", "S02", "0/1"); Variant s03 = VariantTestUtils.generateVariant("1:10:A:-", "S03", "0/1"); Variant s04 = VariantTestUtils.generateVariant("1:9:AATT:-", "S04", "0/1"); - Variant s05 = VariantTestUtils.generateVariant("1:3:A:-", "S05", "0/0"); - s05.setType(VariantType.NO_VARIATION); - s05.setEnd(15); + Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0"); Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05)); StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID); @@ -140,9 +138,7 @@ public void testCollapse_SNP_REF() { Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:", "S02", "0/1"); Variant s03 = VariantTestUtils.generateVariant("1:10:A:", "S03", "0/1"); Variant s04 = VariantTestUtils.generateVariant("1:9:AATT:", "S04", "0/1"); - Variant s05 = VariantTestUtils.generateVariant("1:3:A:", "S05", "0/0"); - s05.setType(VariantType.NO_VARIATION); - s05.setEnd(15); + Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0"); Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05)); StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID); @@ -163,9 +159,7 @@ public void testCollapse_SNP_INS() { Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:", "S02", "0/1"); Variant s03 = VariantTestUtils.generateVariant("1:11::T", "S03", "0/1"); Variant s04 = VariantTestUtils.generateVariant("1:11::TTTT", "S04", "0/1"); - Variant s05 = VariantTestUtils.generateVariant("1:3:A:", "S05", "0/0"); - s05.setType(VariantType.NO_VARIATION); - s05.setEnd(15); + Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0"); Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05)); StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID); @@ -219,9 +213,7 @@ public void testCollapse_INS_REF() { Variant s02 = VariantTestUtils.generateVariant("1:10:ATT:", "S02", "0/1"); Variant s03 = VariantTestUtils.generateVariant("1:10:A:", "S03", "0/1"); Variant s04 = VariantTestUtils.generateVariant("1:9:AATT:", "S04", "0/1"); - Variant s05 = VariantTestUtils.generateVariant("1:3:A:", "S05", "0/0"); - s05.setType(VariantType.NO_VARIATION); - s05.setEnd(15); + Variant s05 = VariantTestUtils.generateVariant("1:3-15:A:.", "S05", "0/0"); Variant var = variantMergerCollapse.merge(s01, Arrays.asList(s02, s03, s04, s05)); StudyEntry se = var.getStudy(VariantTestUtils.STUDY_ID); @@ -538,7 +530,6 @@ public void testMergeIndelOverlapping() throws NonStandardCompliantSampleField { System.out.println("mergeVar = " + mergeVar.toJson()); } - @Test public void testMergeReference() { Variant v1 = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-", @@ -562,6 +553,46 @@ public void testMergeReference() { assertEquals("PASS", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_FILTER_KEY)); } + @Test + public void testMergeSymbolicReference() { + String format = VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY + "," + VCFConstants.GENOTYPE_ALLELE_DEPTHS; + Variant variant = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-", + format, + "S1", "0/1", "PASS", "10,11"); + + Variant ref1 = VariantTestUtils.generateVariantWithFormat("1:5-15:A:", + format, + "S2", "0/0", "PASS", "8,2"); + + Variant mergeVar = variantMerger.merge(variant, ref1); + assertEquals(1, mergeVar.getStudies().get(0).getSecondaryAlternates().size()); + assertEquals(new AlternateCoordinate("1", 10, 14, "A", "", VariantType.NO_VARIATION), mergeVar.getStudies().get(0).getSecondaryAlternates().get(0)); + assertEquals("0/0", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_KEY)); + assertEquals("PASS", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_FILTER_KEY)); + assertEquals("8,0,2", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_ALLELE_DEPTHS)); + assertEquals("10,11", mergeVar.getStudies().get(0).getSampleData("S1", VCFConstants.GENOTYPE_ALLELE_DEPTHS)); + } + + @Test + public void testMergePartialSymbolicReference() { + String format = VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY + "," + VCFConstants.GENOTYPE_ALLELE_DEPTHS; + Variant variant = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-", + format, + "S1", "0/1", "PASS", "10,11"); + + Variant ref1 = VariantTestUtils.generateVariantWithFormat("1:11-13:A:", + format, + "S2", "0/0", "PASS", "8,2"); + + Variant mergeVar = variantMerger.merge(variant, ref1); + assertEquals(1, mergeVar.getStudies().get(0).getSecondaryAlternates().size()); + assertEquals(new AlternateCoordinate("1", 11, 13, "A", "", VariantType.NO_VARIATION), mergeVar.getStudies().get(0).getSecondaryAlternates().get(0)); + assertEquals("0/0", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_KEY)); + assertEquals("PASS", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_FILTER_KEY)); + assertEquals("10,11", mergeVar.getStudies().get(0).getSampleData("S1", VCFConstants.GENOTYPE_ALLELE_DEPTHS)); + assertEquals("8,0,2", mergeVar.getStudies().get(0).getSampleData("S2", VCFConstants.GENOTYPE_ALLELE_DEPTHS)); + } + @Test public void testMergeIndel() { Variant v1 = VariantTestUtils.generateVariantWithFormat("1:10:ATGTA:-", @@ -933,9 +964,7 @@ public void testMergeMonoAllelicNocall() { @Test public void testMergeSnpBlock() { - Variant v = VariantTestUtils.generateVariant("1:10:A:", "S01", "0/0"); - v.setType(VariantType.NO_VARIATION); - v.setEnd(100); + Variant v = VariantTestUtils.generateVariant("1:10:A-100:.", "S01", "0/0"); checkMergeVariants(VariantTestUtils.generateVariant("1:10:A:T", "S02", "0/1"), v, Collections.emptyList(), "0/0"); }