Skip to content

Commit

Permalink
models: Improve VariantBuilder::inferType to recognize <NON_REF> and …
Browse files Browse the repository at this point in the history
…<*> as NO_VARIATION. #162
  • Loading branch information
j-coll committed Aug 31, 2018
1 parent 1c6eb89 commit c3c9a9e
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import java.util.*;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -44,6 +43,8 @@ public class VariantBuilder {
public static final String INS_ALT = "<INS>";
private static final String CNV_PREFIX_ALT = "<CN";
private static final Pattern CNV_ALT_PATTERN = Pattern.compile("<CN([0-9]+)>");
public static final String NON_REF_ALT = Allele.NON_REF_STRING;
public static final String REF_ONLY_ALT = "<*>";

private static final Set<String> VALID_NTS = new HashSet<>(Arrays.asList("A", "C", "G", "T", "N"));
protected static final String VARIANT_STRING_FORMAT
Expand Down Expand Up @@ -513,7 +514,7 @@ public Variant buildAvroVariant(Variant reuse) {
if (alternates.size() > 0) {
List<AlternateCoordinate> secondaryAlternates = new ArrayList<>(alternates.size() - 1);
for (int i = 1; i < alternates.size(); i++) {
secondaryAlternates.add(new AlternateCoordinate(chromosome, start, end, reference, alternates.get(i), type));
secondaryAlternates.add(new AlternateCoordinate(chromosome, start, end, reference, alternates.get(i), inferType(reference, alternates.get(i))));
}
studyEntry.setSecondaryAlternates(secondaryAlternates);
}
Expand Down Expand Up @@ -598,7 +599,7 @@ public VariantProto.Variant buildProtoVariant(VariantProto.VariantOrBuilder reus
.setEnd(end)
.setReference(reference)
.setAlternate(alternates.get(i))
.setType(builder.getType()));
.setType(getProtoVariantType(inferType(reference, alternates.get(i)))));
}

if (format != null) {
Expand Down Expand Up @@ -751,6 +752,8 @@ public static VariantType inferType(String reference, String alternate) {
} else if (alternate.contains("[") || alternate.contains("]") // mated breakend
|| alternateBytes[0] == '.' || alternateBytes[alternateBytes.length - 1] == '.') { // single breakend
return VariantType.BREAKEND;
} else if (alternate.equals(Allele.NON_REF_STRING) || alternate.equals(REF_ONLY_ALT)) {
return VariantType.NO_VARIATION;
} else {
return VariantType.SYMBOLIC;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ public void parseVariantTest() {
map.put("1:999<1000<1001-1999<2000<2001:<CN5>", new Variant("1", 1000, 2000, "", "<CN5>").setType(VariantType.CNV).setSv(new StructuralVariation(999, 1001, 1999, 2001, 5, null, null, StructuralVariantType.COPY_NUMBER_GAIN, null)));
map.put("1:1000:A:.", new Variant("1", 1000, 1000, "A", "").setType(VariantType.NO_VARIATION));
map.put("1:1000-1005:A:.", new Variant("1", 1000, 1005, "A", "").setLength(6).setType(VariantType.NO_VARIATION));
map.put("1:1000-1005:A:<*>", new Variant("1", 1000, 1005, "A", "<*>").setLength(6).setType(VariantType.NO_VARIATION));
map.put("1:1000-1005:A:<NON_REF>", new Variant("1", 1000, 1005, "A", "<NON_REF>").setLength(6).setType(VariantType.NO_VARIATION));
map.put("1:1000:ACACAC...GTGTGTGT", new Variant("1", 1000, 999, "", "<INS>").setLength(Variant.UNKNOWN_LENGTH).setType(VariantType.INSERTION).setSv(new StructuralVariation(null, null, null, null, null, "ACACAC", "GTGTGTGT", null, null)));
map.put("1:1000:...GTGTGTGT", new Variant("1", 1000, 999, "", "<INS>").setLength(Variant.UNKNOWN_LENGTH).setType(VariantType.INSERTION).setSv(new StructuralVariation(null, null, null, null, null, "", "GTGTGTGT", null, null)));
map.put("1:1000:ACACAC...", new Variant("1", 1000, 999, "", "<INS>").setLength(Variant.UNKNOWN_LENGTH).setType(VariantType.INSERTION).setSv(new StructuralVariation(null, null, null, null, null, "ACACAC", "", null, null)));
Expand Down

0 comments on commit c3c9a9e

Please sign in to comment.