Skip to content

Commit

Permalink
Merge pull request #2318 from opencb/TASK-4794
Browse files Browse the repository at this point in the history
TASK-4794 - Missing "FILTER" field in VCF generated (export step) while running Exomiser
  • Loading branch information
j-coll authored Jul 28, 2023
2 parents 476848b + 83dfca3 commit d69eb73
Show file tree
Hide file tree
Showing 21 changed files with 505 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.opencb.biodata.models.clinical.Phenotype;
import org.opencb.biodata.models.clinical.pedigree.Member;
import org.opencb.biodata.models.clinical.pedigree.Pedigree;
import org.opencb.biodata.models.core.SexOntologyTermAnnotation;
import org.opencb.biodata.models.pedigree.IndividualProperty;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.exec.Command;
Expand All @@ -21,12 +20,9 @@
import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -36,7 +32,6 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;

@ToolExecutor(id = ExomiserWrapperAnalysisExecutor.ID,
tool = ExomiserWrapperAnalysis.ID,
Expand Down Expand Up @@ -133,7 +128,8 @@ public void run() throws ToolException {
.sample(sampleId)
.includeSample(samples)
.includeSampleData("GT")
.unknownGenotype("./.");
.unknownGenotype("./.")
.append("includeAllFromSampleIndex", true);

QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id,studies.samples");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,15 @@ public List<IndexFieldConfiguration> getCustomFields() {
return customFields;
}

public IndexFieldConfiguration getCustomField(IndexFieldConfiguration.Source source, String key) {
for (IndexFieldConfiguration s : customFields) {
if (s.getKey().equals(key) && s.getSource() == source) {
return s;
}
}
return null;
}

public int getFilePositionBits() {
return filePositionBits;
}
Expand Down Expand Up @@ -705,4 +714,13 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(fileIndexConfiguration, annotationIndexConfiguration);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SampleIndexConfiguration{");
sb.append("fileIndexConfiguration=").append(fileIndexConfiguration);
sb.append(", annotationIndexConfiguration=").append(annotationIndexConfiguration);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=noPass,Description="No pass">
##FILTER=<ID=noPass2,Description="No pass other">
##FILTER=<ID=.,Description="unknown filter state">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods">
##FORMAT=<ID=DS,Number=1,Type=Float,Description="">
##command=seq 1000000 500 3000000 | while read i ; do echo -e "chr1\t$i\t.\tA\tC\t$RANDOM\tPASS\t.\tGT\t0/1\t1/1\t1|0\t0|1" ; done
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19600 NA19660 NA19661 NA19685
chr1 1000000 . A C,T 5 noPass,noPass2 . GT 1/2 1/1 0|0 0|1
chr1 1000010 . A AC,CA 20 PASS . GT 1/2 1/1 0|0 0|1
chr1 1000020 . AT T,A 60 . . GT 1/2 1/1 0|0 0|1
chr1 1000030 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000040 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000050 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000060 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000070 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000080 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000090 . C G 60 . PASS GT 1/0 1/1 0|0 0|1

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.MultiValueIndexFieldFilter;
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.SingleValueIndexFieldFilter;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

/**
Expand All @@ -22,7 +19,7 @@
* Value "0" represents NA.
*/
public class CategoricalIndexField<T> extends IndexField<T> implements IndexCodec<T> {
private final int numBits;
private final int bitLength;
private final IndexCodec<T> codec;

public static CategoricalIndexField<String> create(IndexFieldConfiguration configuration, int bitOffset) {
Expand All @@ -43,18 +40,18 @@ public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffse
numValues = values.length;
codec = new BasicCodec<>(values, valuesMapping);
}
this.numBits = Math.max(1, IndexUtils.log2(numValues - 1) + 1);
this.bitLength = Math.max(1, IndexUtils.log2(numValues - 1) + 1);
}

public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffset, int numValues, IndexCodec<T> codec) {
super(configuration, bitOffset);
this.numBits = IndexUtils.log2(numValues - 1) + 1;
this.bitLength = IndexUtils.log2(numValues - 1) + 1;
this.codec = codec;
}

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

@Override
Expand Down Expand Up @@ -124,6 +121,16 @@ public T decode(int code) {
public boolean ambiguous(int code) {
return ambiguousValues[code];
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("BasicCodec{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", valuesMappingRev=").append(valuesMappingRev);
sb.append(", ambiguousValues=").append(Arrays.toString(ambiguousValues));
sb.append('}');
return sb.toString();
}
}

private static class BasicCodecWithNa<T> implements IndexCodec<T> {
Expand Down Expand Up @@ -178,6 +185,20 @@ public T decode(int code) {
public boolean ambiguous(int code) {
return ambiguousValues[code];
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("BasicCodecWithNa{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", valuesMappingRev=").append(valuesMappingRev);
sb.append(", ambiguousValues=").append(Arrays.toString(ambiguousValues));
sb.append('}');
return sb.toString();
}
}

protected IndexCodec<T> getCodec() {
return codec;
}

@Override
Expand All @@ -195,4 +216,14 @@ public boolean ambiguous(int code) {
return codec.ambiguous(code);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CategoricalIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append(", codec=").append(codec);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
public class CategoricalMultiValuedIndexField<T> extends CategoricalIndexField<List<T>> {

private final int numBits;
private final int bitLength;

public static CategoricalMultiValuedIndexField<String> createMultiValued(IndexFieldConfiguration configuration, int bitOffset) {
return new CategoricalMultiValuedIndexField<>(
Expand All @@ -41,7 +41,7 @@ public CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, i

private CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values, MaskValueCodec<T> codec) {
super(configuration, bitOffset, values.length, codec);
numBits = codec.numBits;
bitLength = codec.numBits;
}

@Override
Expand All @@ -59,7 +59,7 @@ protected IndexFieldFilter getSingleValueIndexFilter(OpValue<List<T>> opValue) {

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

/**
Expand Down Expand Up @@ -145,6 +145,28 @@ public List<T> decode(int code) {
public boolean ambiguous(int code) {
return code == NA || (code & ambiguousValues) != 0;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("MaskValueCodec{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", otherValuePosition=").append(otherValuePosition);
sb.append(", valuesPosition=").append(valuesPosition);
sb.append(", numBits=").append(numBits);
sb.append(", ambiguousValues=").append(ambiguousValues);
sb.append('}');
return sb.toString();
}
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CategoricalMultiValuedIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", codec=").append(getCodec());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,12 @@ public BitBuffer read(BitBuffer buffer, int i) {
return buffer.getBitBuffer(i * indexSizeBits, indexSizeBits);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FixedSizeIndexSchema{");
sb.append("indexSizeBits=").append(indexSizeBits);
sb.append(", fields=").append(fields);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,12 @@ public R decode(int code) {
};
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("IndexField{");
sb.append("configuration=").append(configuration);
sb.append(", bitOffset=").append(bitOffset);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter;
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.RangeIndexFieldFilter;

import java.util.Arrays;
import java.util.List;

/**
Expand All @@ -18,7 +19,7 @@ public class RangeIndexField extends IndexField<Double> {
private final double[] thresholds;
private final double min;
private final double max;
private final int numBits;
private final int bitLength;
private final IndexCodec<Double> codec;
private int numRanges;

Expand All @@ -40,7 +41,7 @@ public RangeIndexField(IndexFieldConfiguration configuration, int bitOffset, dou
} else {
codec = new NonNullableRangeCodec();
}
numBits = Math.max(1, IndexUtils.log2(numRanges - 1) + 1);
bitLength = Math.max(1, IndexUtils.log2(numRanges - 1) + 1);
if (configuration.getType().equals(IndexFieldConfiguration.Type.RANGE_GT)) {
// Add one DELTA to each value to invert ranges from [s, e) to (s, e], therefore the operation ">" is exact
for (int i = 0; i < thresholds.length; i++) {
Expand Down Expand Up @@ -77,7 +78,7 @@ public double getMax() {

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

@Override
Expand Down Expand Up @@ -120,13 +121,20 @@ public int encode(Double value) {

@Override
public Double decode(int code) {
return code == thresholds.length ? max : code < 0 ? min : thresholds[code];
return code <= 0 ? min : thresholds[code - 1];
}

@Override
public boolean ambiguous(int code) {
return true;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("NonNullableRangeCodec{");
sb.append('}');
return sb.toString();
}
}

public class NullableRangeCodec extends NonNullableRangeCodec {
Expand All @@ -142,6 +150,13 @@ public int encode(Double value) {
public Double decode(int code) {
return code == NA ? null : super.decode(code - 1);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("NullableRangeCodec{");
sb.append('}');
return sb.toString();
}
}

/**
Expand Down Expand Up @@ -174,4 +189,19 @@ public static boolean lessThan(double a, double b) {
public static boolean equalsTo(double a, double b) {
return Math.abs(a - b) < (DELTA / 10);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("RangeIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append(", thresholds=").append(Arrays.toString(thresholds));
sb.append(", min=").append(min);
sb.append(", max=").append(max);
sb.append(", codec=").append(codec);
sb.append(", numRanges=").append(numRanges);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,12 @@ public int readFieldValue(BitBuffer buffer, int i) {
// return getField().read(read(buffer, i));
return buffer.getIntPartial(i * getBitsLength(), getBitsLength());
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SingleFieldIndexSchema{");
sb.append("field=").append(field);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,12 @@ public Variant next() {
@Override
public SampleVariantIndexEntry nextSampleVariantIndexEntry() {
AnnotationIndexEntry annotationIndexEntry = nextAnnotationIndexEntry();
BitBuffer fileIndex = null;
List<BitBuffer> filesIndex = new ArrayList<>();
if (hasFileIndex()) {
fileIndex = nextFileIndexEntry();
filesIndex.add(nextFileIndexEntry());
while (isMultiFileIndex()) {
filesIndex.add(nextMultiFileIndexEntry());
}
}
String genotype = nextGenotype();
int meCode = nextMendelianErrorCode();
Expand All @@ -136,7 +139,7 @@ public SampleVariantIndexEntry nextSampleVariantIndexEntry() {
parentsCode = nextParentsIndexEntry();
}
Variant variant = next();
return new SampleVariantIndexEntry(variant, fileIndex, genotype, annotationIndexEntry, parentsCode, meCode);
return new SampleVariantIndexEntry(variant, filesIndex, genotype, annotationIndexEntry, parentsCode, meCode);
}

@Override
Expand Down
Loading

0 comments on commit d69eb73

Please sign in to comment.