From 9c3113196e67024e5b5fd6fac36b76b05aba9bed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 16 May 2024 13:09:16 +0100 Subject: [PATCH] storage: Mark symbolic inversion as supported. #TASK-6240 --- .../src/test/resources/variant-test-sv.vcf | 2 +- .../src/test/resources/variant-test-sv_2.vcf | 2 +- .../HadoopLocalLoadVariantStoragePipeline.java | 7 ++++--- .../hadoop/variant/HadoopVariantStorageEngine.java | 13 +++---------- .../variant/gaps/AbstractFillFromArchiveTask.java | 4 ++-- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv.vcf b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv.vcf index bce755b9486..9fc4fe9c325 100644 --- a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv.vcf +++ b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv.vcf @@ -46,7 +46,7 @@ 1 700000 . C . PASS SVTYPE=DEL;END=700297;SVLEN=-297;CIPOS=-22,18;CIEND=-12,32 GT 0/1 0/1 1 800000 . A . PASS SVTYPE=INS;END=800000;SVLEN=6027;CIPOS=-16,22;RIGHT_SVINSSEQ=ACCACACCCACACAACACACA;LEFT_SVINSSEQ=TGTGGTGTGTGTGGTGTG GT 0/1 0/1 1 850000 . A . PASS SVTYPE=INS;END=850000;SVINSSEQ=ACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACAACCACACCCACACAACACACA GT 0/1 0/1 -1 860000 . A . PASS SVTYPE=INS;END=860000;SVLEN=1000 GT 0/1 0/1 +1 860000 . A . PASS SVTYPE=INVERSION;END=870000 GT 0/1 0/1 1 900000 . G . PASS SVTYPE=INS;END=900000;SVLEN=6027;CIPOS=-16,22 GT 0/1 0/1 1 1000000 . A . PASS SVTYPE=DUP;END=1021100;SVLEN=21100;CIPOS=-500,500;CIEND=-500,500 GT 0/1 0/1 1 1100000 . T . PASS SVTYPE=DUP;END=1100076;SVLEN=76;CIPOS=-10,10;CIEND=-10,10 GT 0/1 0/1 diff --git a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv_2.vcf b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv_2.vcf index c6e2b9176e4..32ad6f4c4f0 100644 --- a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv_2.vcf +++ b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-sv_2.vcf @@ -46,7 +46,7 @@ 1 700010 . T C . PASS . GT 0/1 0/1 1 800000 . A . PASS SVTYPE=INS;END=800000;SVLEN=6027;CIPOS=-16,22;RIGHT_SVINSSEQ=TGTGGTGTGTGTGGTGTG;LEFT_SVINSSEQ=ACCACACCCACACAACACACA GT 0/1 0/1 1 850000 . A . PASS SVTYPE=INS;END=850000;SVINSSEQ=TGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTGTGTGGTGTG GT 0/1 0/1 -1 860000 . A . PASS SVTYPE=INS;END=860000;SVLEN=1000 GT 0/1 0/1 +1 860000 . A . PASS SVTYPE=INVERSION;END=870000 GT 0/1 0/1 1 900000 . G . PASS SVTYPE=INS;END=900000;SVLEN=6027;CIPOS=-16,22 GT 0/1 0/1 1 1000000 . A . PASS SVTYPE=DUP;END=1021100;SVLEN=21100;CIPOS=-500,500;CIEND=-500,500 GT 0/1 0/1 1 1100000 . T . PASS SVTYPE=DUP;END=1100076;SVLEN=76;CIPOS=-10,10;CIEND=-10,10 GT 0/1 0/1 diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java index 5c3d0e7a0d1..4fdeca66595 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java @@ -70,6 +70,7 @@ import static org.opencb.opencga.storage.core.metadata.models.TaskMetadata.Type; import static org.opencb.opencga.storage.core.variant.VariantStorageOptions.*; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.TARGET_VARIANT_TYPE_SET; +import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.UNSUPPORTED_VARIANT_TYPE_SET; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.*; /** @@ -390,13 +391,13 @@ protected void loadFromAvroWithArchive(URI input, URI outdir, ArchiveTableHelper throw new StorageEngineException("Error loading file " + input, e); } - logLoadResults(variantReader.getVariantFileMetadata(), resolver, hadoopDBWriter); if (sampleIndexDBLoader != null) { // Update list of loaded genotypes this.loadedGenotypes = sampleIndexDBLoader.getLoadedGenotypes(); this.sampleIndexVersion = sampleIndexDBLoader.getSampleIndexVersion(); this.largestVariantLength = largestVariantTask.getMaxLength(); } + logLoadResults(variantReader.getVariantFileMetadata(), resolver, hadoopDBWriter); } protected void loadFromAvroWithoutArchive(URI input, URI outdir, ArchiveTableHelper helper, ProgressLogger progressLogger) @@ -438,13 +439,13 @@ protected void loadFromAvroWithoutArchive(URI input, URI outdir, ArchiveTableHel throw new StorageEngineException("Error loading file " + input, e); } - logLoadResults(variantReader.getVariantFileMetadata(), resolver, hadoopDBWriter); if (sampleIndexDBLoader != null) { // Update list of loaded genotypes this.loadedGenotypes = sampleIndexDBLoader.getLoadedGenotypes(); this.sampleIndexVersion = sampleIndexDBLoader.getSampleIndexVersion(); this.largestVariantLength = largestVariantTask.getMaxLength(); } + logLoadResults(variantReader.getVariantFileMetadata(), resolver, hadoopDBWriter); } private void logLoadResults(VariantFileMetadata variantFileMetadata, @@ -490,7 +491,7 @@ private void logLoadResults(VariantFileMetadata variantFileMetadata, int duplica if (skipped > 0) { logger.info("There were " + skipped + " skipped variants"); for (VariantType type : VariantType.values()) { - if (!TARGET_VARIANT_TYPE_SET.contains(type)) { + if (UNSUPPORTED_VARIANT_TYPE_SET.contains(type)) { Long countByType = variantFileMetadata.getStats().getTypeCount().get(type.toString()); if (countByType != null && countByType > 0) { logger.info(" * Of which " + countByType + " are " + type.toString() + " variants."); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 65927c62fb2..18c0f364329 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -121,17 +121,10 @@ public class HadoopVariantStorageEngine extends VariantStorageEngine implements Configurable { public static final String STORAGE_ENGINE_ID = "hadoop"; - public static final EnumSet TARGET_VARIANT_TYPE_SET = EnumSet.of( - VariantType.SNV, VariantType.SNP, - VariantType.INDEL, - VariantType.MNV, VariantType.MNP, - VariantType.INSERTION, VariantType.DELETION, - VariantType.CNV, - VariantType.COPY_NUMBER, VariantType.COPY_NUMBER_LOSS, VariantType.COPY_NUMBER_GAIN, - VariantType.DUPLICATION, VariantType.TANDEM_DUPLICATION, VariantType.TRANSLOCATION, - VariantType.BREAKEND, - VariantType.SV, VariantType.SYMBOLIC + public static final EnumSet UNSUPPORTED_VARIANT_TYPE_SET = EnumSet.of( + VariantType.NO_VARIATION, VariantType.MIXED ); + public static final EnumSet TARGET_VARIANT_TYPE_SET = EnumSet.complementOf(UNSUPPORTED_VARIANT_TYPE_SET); public static final String FILE_ID = "fileId"; public static final String STUDY_ID = "studyId"; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/gaps/AbstractFillFromArchiveTask.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/gaps/AbstractFillFromArchiveTask.java index d218cf0a470..1e0a01f89b4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/gaps/AbstractFillFromArchiveTask.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/gaps/AbstractFillFromArchiveTask.java @@ -32,7 +32,7 @@ import java.util.*; import java.util.concurrent.TimeUnit; -import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.TARGET_VARIANT_TYPE_SET; +import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.UNSUPPORTED_VARIANT_TYPE_SET; /** * Created on 31/10/17. @@ -348,7 +348,7 @@ protected static Scan buildScan(String regionStr, int fileId, Configuration conf protected static boolean isVariantAlreadyLoaded(VcfSliceProtos.VcfSlice slice, VcfSliceProtos.VcfRecord vcfRecord) { VariantType variantType = VcfRecordProtoToVariantConverter.getVariantType(vcfRecord.getType()); // The variant is not loaded if is a NO_VARIATION (fast check first) - if (!TARGET_VARIANT_TYPE_SET.contains(variantType)) { + if (UNSUPPORTED_VARIANT_TYPE_SET.contains(variantType)) { return false; }