Skip to content

Commit

Permalink
storage: Revert default SampleIndexConfiguration changes. #TASK-4794
Browse files Browse the repository at this point in the history
  • Loading branch information
j-coll committed Jul 26, 2023
1 parent 2943017 commit b2f354a
Show file tree
Hide file tree
Showing 11 changed files with 164 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4)
.addFileIndexField(new IndexFieldConfiguration(
IndexFieldConfiguration.Source.FILE,
StudyEntry.FILTER,
IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE,
IndexFieldConfiguration.Type.CATEGORICAL,
VCFConstants.PASSES_FILTERS_v4))
.addFileIndexField(new IndexFieldConfiguration(
IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL, QUAL_THRESHOLDS).setNullable(false))
Expand Down Expand Up @@ -714,4 +714,13 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(fileIndexConfiguration, annotationIndexConfiguration);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SampleIndexConfiguration{");
sb.append("fileIndexConfiguration=").append(fileIndexConfiguration);
sb.append(", annotationIndexConfiguration=").append(annotationIndexConfiguration);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.MultiValueIndexFieldFilter;
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.SingleValueIndexFieldFilter;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

/**
Expand All @@ -22,7 +19,7 @@
* Value "0" represents NA.
*/
public class CategoricalIndexField<T> extends IndexField<T> implements IndexCodec<T> {
private final int numBits;
private final int bitLength;
private final IndexCodec<T> codec;

public static CategoricalIndexField<String> create(IndexFieldConfiguration configuration, int bitOffset) {
Expand All @@ -43,18 +40,18 @@ public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffse
numValues = values.length;
codec = new BasicCodec<>(values, valuesMapping);
}
this.numBits = Math.max(1, IndexUtils.log2(numValues - 1) + 1);
this.bitLength = Math.max(1, IndexUtils.log2(numValues - 1) + 1);
}

public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffset, int numValues, IndexCodec<T> codec) {
super(configuration, bitOffset);
this.numBits = IndexUtils.log2(numValues - 1) + 1;
this.bitLength = IndexUtils.log2(numValues - 1) + 1;
this.codec = codec;
}

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

@Override
Expand Down Expand Up @@ -124,6 +121,16 @@ public T decode(int code) {
public boolean ambiguous(int code) {
return ambiguousValues[code];
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("BasicCodec{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", valuesMappingRev=").append(valuesMappingRev);
sb.append(", ambiguousValues=").append(Arrays.toString(ambiguousValues));
sb.append('}');
return sb.toString();
}
}

private static class BasicCodecWithNa<T> implements IndexCodec<T> {
Expand Down Expand Up @@ -178,6 +185,20 @@ public T decode(int code) {
public boolean ambiguous(int code) {
return ambiguousValues[code];
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("BasicCodecWithNa{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", valuesMappingRev=").append(valuesMappingRev);
sb.append(", ambiguousValues=").append(Arrays.toString(ambiguousValues));
sb.append('}');
return sb.toString();
}
}

protected IndexCodec<T> getCodec() {
return codec;
}

@Override
Expand All @@ -195,4 +216,14 @@ public boolean ambiguous(int code) {
return codec.ambiguous(code);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CategoricalIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append(", codec=").append(codec);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
public class CategoricalMultiValuedIndexField<T> extends CategoricalIndexField<List<T>> {

private final int numBits;
private final int bitLength;

public static CategoricalMultiValuedIndexField<String> createMultiValued(IndexFieldConfiguration configuration, int bitOffset) {
return new CategoricalMultiValuedIndexField<>(
Expand All @@ -41,7 +41,7 @@ public CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, i

private CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values, MaskValueCodec<T> codec) {
super(configuration, bitOffset, values.length, codec);
numBits = codec.numBits;
bitLength = codec.numBits;
}

@Override
Expand All @@ -59,7 +59,7 @@ protected IndexFieldFilter getSingleValueIndexFilter(OpValue<List<T>> opValue) {

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

/**
Expand Down Expand Up @@ -145,6 +145,28 @@ public List<T> decode(int code) {
public boolean ambiguous(int code) {
return code == NA || (code & ambiguousValues) != 0;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("MaskValueCodec{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", otherValuePosition=").append(otherValuePosition);
sb.append(", valuesPosition=").append(valuesPosition);
sb.append(", numBits=").append(numBits);
sb.append(", ambiguousValues=").append(ambiguousValues);
sb.append('}');
return sb.toString();
}
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CategoricalMultiValuedIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", codec=").append(getCodec());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,12 @@ public BitBuffer read(BitBuffer buffer, int i) {
return buffer.getBitBuffer(i * indexSizeBits, indexSizeBits);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FixedSizeIndexSchema{");
sb.append("indexSizeBits=").append(indexSizeBits);
sb.append(", fields=").append(fields);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,12 @@ public R decode(int code) {
};
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("IndexField{");
sb.append("configuration=").append(configuration);
sb.append(", bitOffset=").append(bitOffset);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter;
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.RangeIndexFieldFilter;

import java.util.Arrays;
import java.util.List;

/**
Expand All @@ -18,7 +19,7 @@ public class RangeIndexField extends IndexField<Double> {
private final double[] thresholds;
private final double min;
private final double max;
private final int numBits;
private final int bitLength;
private final IndexCodec<Double> codec;
private int numRanges;

Expand All @@ -40,7 +41,7 @@ public RangeIndexField(IndexFieldConfiguration configuration, int bitOffset, dou
} else {
codec = new NonNullableRangeCodec();
}
numBits = Math.max(1, IndexUtils.log2(numRanges - 1) + 1);
bitLength = Math.max(1, IndexUtils.log2(numRanges - 1) + 1);
if (configuration.getType().equals(IndexFieldConfiguration.Type.RANGE_GT)) {
// Add one DELTA to each value to invert ranges from [s, e) to (s, e], therefore the operation ">" is exact
for (int i = 0; i < thresholds.length; i++) {
Expand Down Expand Up @@ -77,7 +78,7 @@ public double getMax() {

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

@Override
Expand Down Expand Up @@ -127,6 +128,13 @@ public Double decode(int code) {
public boolean ambiguous(int code) {
return true;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("NonNullableRangeCodec{");
sb.append('}');
return sb.toString();
}
}

public class NullableRangeCodec extends NonNullableRangeCodec {
Expand All @@ -142,6 +150,13 @@ public int encode(Double value) {
public Double decode(int code) {
return code == NA ? null : super.decode(code - 1);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("NullableRangeCodec{");
sb.append('}');
return sb.toString();
}
}

/**
Expand Down Expand Up @@ -174,4 +189,19 @@ public static boolean lessThan(double a, double b) {
public static boolean equalsTo(double a, double b) {
return Math.abs(a - b) < (DELTA / 10);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("RangeIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append(", thresholds=").append(Arrays.toString(thresholds));
sb.append(", min=").append(min);
sb.append(", max=").append(max);
sb.append(", codec=").append(codec);
sb.append(", numRanges=").append(numRanges);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,12 @@ public int readFieldValue(BitBuffer buffer, int i) {
// return getField().read(read(buffer, i));
return buffer.getIntPartial(i * getBitsLength(), getBitsLength());
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SingleFieldIndexSchema{");
sb.append("field=").append(field);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,12 @@ private Collection<T> filter(SampleIndexEntry entry, boolean count) {
entry.getSampleId(),
entry.getChromosome(), entry.getBatchStart(),
gtEntry.getGt());
logger.warn(gtEntry.toStringSummary());
try {
logger.warn(gtEntry.toStringSummary());
logger.warn(converter.getSchema().toString());
} catch (Exception exception) {
e.addSuppressed(exception);
}
throw e;
}
if (!variants.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,22 @@ public FileIndexSchema getFileIndex() {
return fileIndex;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SampleIndexSchema{");
sb.append("version=").append(version);
sb.append(", configuration=").append(configuration);
sb.append(", fileIndex=").append(fileIndex);
sb.append(", popFreqIndex=").append(popFreqIndex);
sb.append(", ctIndex=").append(ctIndex);
sb.append(", biotypeIndex=").append(biotypeIndex);
sb.append(", transcriptFlagIndexSchema=").append(transcriptFlagIndexSchema);
sb.append(", ctBtTfIndex=").append(ctBtTfIndex);
sb.append(", clinicalIndexSchema=").append(clinicalIndexSchema);
sb.append('}');
return sb.toString();
}

public static int getChunkStart(Integer start) {
return (start / BATCH_SIZE) * BATCH_SIZE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@ public MendelianErrorSampleIndexEntryIterator toMendelianIterator(SampleIndexEnt
return new MendelianErrorSampleIndexEntryIterator(sampleIndexEntry, schema);
}

public SampleIndexSchema getSchema() {
return schema;
}

private abstract static class SampleIndexGtEntryIterator implements SampleIndexEntryIterator {
protected SampleIndexEntry.SampleIndexGtEntry gtEntry;
private final SampleIndexSchema schema;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,11 @@ public VariantType decode(int code) {
public boolean ambiguous(int code) {
return code == TYPE_OTHER_CODE || code == TYPE_CNV_CODE;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("VariantTypeIndexCodec{");
sb.append('}');
return sb.toString();
}
}

0 comments on commit b2f354a

Please sign in to comment.