diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java index bfef1fbca25..c608f5e2c1a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java @@ -77,7 +77,7 @@ public RestResponse login(CustomUsersCommandOptions.Logi String token = session.getSession().getToken(); String errorMsg = "Missing password. "; if (StringUtils.isNotEmpty(token)) { - errorMsg += "Active token detected "; + errorMsg += "Active token detected. Please logout first."; } CommandLineUtils.error(errorMsg); } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java index ef06e2c8944..8c43c340ab6 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java @@ -136,9 +136,13 @@ private boolean checkAutoRefresh(OpencgaCommandExecutor commandExecutor) { public void loadSessionStudies(OpencgaCommandExecutor commandExecutor) { Session session = commandExecutor.getSessionManager().getSession(); - logger.debug("Loading session studies using token: " - + session.getToken()); OpenCGAClient openCGAClient = commandExecutor.getOpenCGAClient(); + logger.debug("openCGAClient Token: " + openCGAClient.getToken()); + if(StringUtils.isEmpty(openCGAClient.getToken())) { + openCGAClient.setToken(session.getToken()); + } + logger.debug("Loading session studies using token: " + + openCGAClient.getToken()); try { // Query the server to retrieve the studies of user projects RestResponse res = openCGAClient.getProjectClient().search(new ObjectMap()); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java index aba2f366b77..e6a885fbafa 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java @@ -1,5 +1,6 @@ package org.opencb.opencga.app.migrations.v2.v2_12_5.storage; +import org.apache.commons.lang3.tuple.Pair; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -67,6 +68,8 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag if (fileSets.isEmpty()) { logger.info("No concurrent file loadings found in study '{}'", study); return; + } else { + logger.info("Found {} sets of files with shared samples in study '{}'", fileSets.size(), study); } Map fileTasks = new HashMap<>(); @@ -85,10 +88,12 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag } } - Set> fileSetsToInvalidate = new HashSet<>(); - Set affectedFiles = new HashSet<>(); - Set affectedSamples = new HashSet<>(); for (Set fileSet : fileSets) { + Set affectedFiles = new HashSet<>(); + Set affectedSamples = new HashSet<>(); + Set invalidFiles = new HashSet<>(); + Set invalidSampleIndexes = new HashSet<>(); + // Check if any task from this file set overlaps in time List tasks = new ArrayList<>(); for (Integer fileId : fileSet) { @@ -97,8 +102,11 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag tasks.add(task); } } - if (tasks.size() > 1) { - logger.info("Found {} tasks loading files {}", tasks.size(), fileSet); + if (tasks.size() <= 1) { + continue; + } else { + logger.info("--------------------"); + logger.info("Found {} tasks loading files {} in study {}", tasks.size(), fileSet, study); for (int i = 0; i < tasks.size(); i++) { TaskMetadata task1 = tasks.get(i); Date task1start = task1.getStatus().firstKey(); @@ -108,8 +116,7 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag Date task2start = task2.getStatus().firstKey(); Date task2end = task2.getStatus().lastKey(); if (task1start.before(task2end) && task1end.after(task2start)) { - fileSetsToInvalidate.add(fileSet); - affectedFiles.addAll(task1.getFileIds()); + affectedFiles.addAll(fileSet); List task1Files = task1.getFileIds().stream().map(fileId -> "'" + metadataManager.getFileName(studyId, fileId) + "'(" + fileId + ")").collect(Collectors.toList()); List task2Files = task2.getFileIds().stream().map(fileId -> "'" + metadataManager.getFileName(studyId, fileId) + "'(" + fileId + ")").collect(Collectors.toList()); @@ -131,8 +138,6 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag } } - Set invalidFiles = new HashSet<>(); - List invalidSampleIndexes = new ArrayList<>(); for (Integer sampleId : affectedSamples) { String sampleName = metadataManager.getSampleName(studyId, sampleId); SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(studyId, sampleId); @@ -145,7 +150,7 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag metadataManager.getFileName(studyId, file), file); } } - } else if (sampleMetadata.getSampleIndexStatus(Optional.of(sampleMetadata.getSampleIndexVersion()).orElse(-1)) == TaskMetadata.Status.READY) { + } else if (sampleMetadata.getSampleIndexStatus(Optional.ofNullable(sampleMetadata.getSampleIndexVersion()).orElse(-1)) == TaskMetadata.Status.READY) { for (Integer fileId : sampleMetadata.getFiles()) { if (affectedFiles.contains(fileId)) { FileMetadata fileMetadata = metadataManager.getFileMetadata(studyId, fileId); @@ -195,6 +200,8 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag } } } else { + logger.info("Sample '{}'({}) sample index is not in READY status. Invalidate to ensure rebuild", sampleName, sampleId); + logger.info(" - Invalidating sample index for sample '{}'({})", sampleName, sampleId); invalidSampleIndexes.add(sampleId); } } @@ -210,9 +217,19 @@ private void checkStudy(VariantStorageEngine engine, String study) throws Storag invalidSamples.addAll(metadataManager.getSampleIdsFromFileId(studyId, fileId)); } - logger.info("Affected files: {}", invalidFiles); - logger.info("Affected samples: {}", invalidSamples); - logger.info("Affected sample indexes: {}", invalidSampleIndexes); + logger.info("Study '{}'", study); + List> invalidFilesPairs = invalidFiles.stream() + .map(fileId -> Pair.of(metadataManager.getFileName(studyId, fileId), fileId)) + .collect(Collectors.toList()); + logger.info("Affected files: {}", invalidFilesPairs); + List> invalidSamplesPairs = invalidSamples.stream() + .map(sampleId -> Pair.of(metadataManager.getSampleName(studyId, sampleId), sampleId)) + .collect(Collectors.toList()); + logger.info("Affected samples: {}", invalidSamplesPairs); + List> invalidSampleIndexesPairs = invalidSampleIndexes.stream() + .map(sampleId -> Pair.of(metadataManager.getSampleName(studyId, sampleId), sampleId)) + .collect(Collectors.toList()); + logger.info("Affected sample indexes: {}", invalidSampleIndexesPairs); } } else { ObjectMap event = new ObjectMap() diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_6/SyncCohortsAndSamplesMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_6/SyncCohortsAndSamplesMigration.java index 6f6f5cbf18d..e7ea95c807d 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_6/SyncCohortsAndSamplesMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2/v2_12_6/SyncCohortsAndSamplesMigration.java @@ -9,20 +9,24 @@ import org.bson.conversions.Bson; import org.opencb.opencga.catalog.db.api.CohortDBAdaptor; import org.opencb.opencga.catalog.db.api.SampleDBAdaptor; +import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; import org.opencb.opencga.catalog.migration.Migration; import org.opencb.opencga.catalog.migration.MigrationTool; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; @Migration(id = "syncCohortsAndSamplesMigration" , - description = "Sync array of samples from cohort with array of cohortIds from Sample", + description = "Sync array of samples from cohort with array of cohortIds from Sample.", version = "2.12.6", domain = Migration.MigrationDomain.CATALOG, language = Migration.MigrationLanguage.JAVA, - date = 20240621 + date = 20240621, + patch = 2 // TASK-6998 ) public class SyncCohortsAndSamplesMigration extends MigrationTool { @@ -31,8 +35,18 @@ protected void run() throws Exception { MongoCollection sampleCollection = getMongoCollection(OrganizationMongoDBAdaptorFactory.SAMPLE_COLLECTION); MongoCollection sampleArchiveCollection = getMongoCollection(OrganizationMongoDBAdaptorFactory.SAMPLE_ARCHIVE_COLLECTION); + // Fill map study uid - fqn + Map uidFqnMap = new HashMap<>(); + Bson studyProjection = Projections.include(StudyDBAdaptor.QueryParams.UID.key(), StudyDBAdaptor.QueryParams.FQN.key()); + queryMongo(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION, new Document(), studyProjection, study -> { + long studyUid = study.get(StudyDBAdaptor.QueryParams.UID.key(), Number.class).longValue(); + String studyFqn = study.getString(StudyDBAdaptor.QueryParams.FQN.key()); + uidFqnMap.put(studyUid, studyFqn); + }); + queryMongo(OrganizationMongoDBAdaptorFactory.COHORT_COLLECTION, new Document(), - Projections.include(CohortDBAdaptor.QueryParams.ID.key(), CohortDBAdaptor.QueryParams.SAMPLES.key()), + Projections.include(CohortDBAdaptor.QueryParams.ID.key(), CohortDBAdaptor.QueryParams.SAMPLES.key(), + CohortDBAdaptor.QueryParams.STUDY_UID.key()), cohortDoc -> { String cohortId = cohortDoc.getString(CohortDBAdaptor.QueryParams.ID.key()); List samples = cohortDoc.getList(CohortDBAdaptor.QueryParams.SAMPLES.key(), Document.class); @@ -50,8 +64,11 @@ protected void run() throws Exception { long addedMissingCohort = sampleCollection.updateMany(query, update).getModifiedCount(); sampleArchiveCollection.updateMany(query, update); + long studyUid = cohortDoc.get(CohortDBAdaptor.QueryParams.STUDY_UID.key(), Number.class).longValue(); + // Ensure there aren't any samples pointing to this cohort that are not in the samples array query = Filters.and( + Filters.eq(SampleDBAdaptor.QueryParams.STUDY_UID.key(), studyUid), Filters.nin(SampleDBAdaptor.QueryParams.UID.key(), sampleUids), Filters.eq(SampleDBAdaptor.QueryParams.COHORT_IDS.key(), cohortId), Filters.eq(MongoDBAdaptor.LAST_OF_VERSION, true) @@ -61,10 +78,10 @@ protected void run() throws Exception { sampleArchiveCollection.updateMany(query, update); if (addedMissingCohort > 0 || removedNonAssociatedCohort > 0) { - logger.info("Fixed cohort '{}' references. " + logger.info("Fixed cohort '{}' references from study '{}'. " + "Added missing reference to {} samples. " + "Removed non-associated reference from {} samples.", - cohortId, addedMissingCohort, removedNonAssociatedCohort); + cohortId, uidFqnMap.get(studyUid), addedMissingCohort, removedNonAssociatedCohort); } } }); diff --git a/opencga-core/src/main/resources/configuration.yml b/opencga-core/src/main/resources/configuration.yml index bb513b9df58..4eb3543f791 100644 --- a/opencga-core/src/main/resources/configuration.yml +++ b/opencga-core/src/main/resources/configuration.yml @@ -9,7 +9,7 @@ jobDir: ${OPENCGA.USER.WORKSPACE}/jobs # Maximum number of login attempts before banning a user account account: maxLoginAttempts: ${OPENCGA.ACCOUNT.MAX_LOGIN_ATTEMPTS} - passwordExpirationDays: 0 + passwordExpirationDays: 90 panel: host: "http://resources.opencb.org/opencb/opencga/disease-panels" diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java index 3dae031c245..2df5a8d71f4 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java @@ -882,7 +882,12 @@ protected DataResult submitJobRaw(String toolId, String project, String stu String jobDescription, String jobDependsOnStr, String jobTagsStr, String jobScheduledStartTime, String jobPriority, Boolean dryRun) throws CatalogException { - Map paramsMap = bodyParams.toParams(); + Map paramsMap; + if (bodyParams != null) { + paramsMap = bodyParams.toParams(); + } else { + paramsMap = new HashMap<>(); + } if (StringUtils.isNotEmpty(study)) { paramsMap.putIfAbsent(ParamConstants.STUDY_PARAM, study); } diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java index 14fb051c85c..4b9613b9268 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java @@ -593,7 +593,12 @@ public Response submitOperationToProject(String toolId, String project, ToolPara public Response submitOperation(String toolId, String project, String study, ToolParams params, String jobName, String jobDescription, String jobDependsOn, String jobTags, String jobScheduledStartTime, String jobPriority, Boolean dryRun) { try { - Map paramsMap = params.toParams(); + Map paramsMap; + if (params != null) { + paramsMap = params.toParams(); + } else { + paramsMap = new HashMap<>(); + } if (StringUtils.isNotEmpty(study)) { paramsMap.put(ParamConstants.STUDY_PARAM, study); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index 847ae860d70..73abcbae61f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -81,6 +81,10 @@ public enum VariantStorageOptions implements ConfigurationOption { ANNOTATOR_CELLBASE_VARIANT_LENGTH_THRESHOLD("annotator.cellbase.variantLengthThreshold", Integer.MAX_VALUE), ANNOTATOR_CELLBASE_IMPRECISE_VARIANTS("annotator.cellbase.impreciseVariants", true), ANNOTATOR_CELLBASE_STAR_ALTERNATE("annotator.cellbase.starAlternate", false), + ANNOTATOR_EXTENSION_PREFIX("annotator.extension."), + ANNOTATOR_EXTENSION_LIST("annotator.extension.list"), + ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"), + ANNOTATOR_EXTENSION_COSMIC_VERSION("annotator.extension.cosmic.version"), INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java index 78229d47ef6..f8e7f4c4b90 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java @@ -52,6 +52,8 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionsFactory; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; import org.opencb.opencga.storage.core.variant.io.db.VariantAnnotationDBWriter; import org.opencb.opencga.storage.core.variant.io.db.VariantDBReader; @@ -265,6 +267,13 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap return variantAnnotationList; }; + List extensions = new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(params); + for (VariantAnnotatorExtensionTask extension : extensions) { + extension.setup(outDir); + extension.checkAvailable(); + annotationTask = annotationTask.then(extension); + } + final DataWriter variantAnnotationDataWriter; if (avro) { //FIXME @@ -286,7 +295,7 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap ParallelTaskRunner parallelTaskRunner = new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config); parallelTaskRunner.run(); - } catch (ExecutionException e) { + } catch (Exception e) { throw new VariantAnnotatorException("Error creating annotations", e); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..03255409123 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java @@ -0,0 +1,59 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.run.Task; + +import java.net.URI; +import java.util.List; + +public interface VariantAnnotatorExtensionTask extends Task { + + /** + * Set up the annotator extension. + * This method will be called before any other method. It might generate extra files or data needed for the annotation. + * + * @param output Output directory where the annotator extension should write the files + * @return List of URIs of generated files (if any) + * @throws Exception if the annotator extension set up fails + */ + List setup(URI output) throws Exception; + + /** + * Check if the annotator extension is available for the given options. + * @throws IllegalArgumentException if the annotator extension is not available + */ + void checkAvailable() throws IllegalArgumentException; + + /** + * Check if the annotator extension is available for the given options. Do not throw any exception if the extension is not available. + * @return true if the annotator extension is available + */ + default boolean isAvailable() { + try { + checkAvailable(); + return true; + } catch (IllegalArgumentException e) { + return false; + } + } + + @Override + default void pre() throws Exception { + Task.super.pre(); + checkAvailable(); + } + + /** + * Get the options for the annotator extension. + * @return Options for the annotator extension + */ + ObjectMap getOptions(); + + /** + * Get the metadata for the annotator extension. + * @return Metadata for the annotator extension + */ + ObjectMap getMetadata(); + +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java new file mode 100644 index 00000000000..f20dadda289 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java @@ -0,0 +1,54 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; + +import java.lang.reflect.InvocationTargetException; +import java.util.LinkedList; +import java.util.List; + +public class VariantAnnotatorExtensionsFactory { + + public List getVariantAnnotatorExtensions(ObjectMap options) { + + List tasks = new LinkedList<>(); + for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) { + VariantAnnotatorExtensionTask task = null; + switch (extensionId) { + case CosmicVariantAnnotatorExtensionTask.ID: + task = new CosmicVariantAnnotatorExtensionTask(options); + break; + default: + String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId); + if (extensionClass != null) { + task = getVariantAnnotatorExtension(extensionClass, options); + } else { + throw new IllegalArgumentException("Unknown annotator extension '" + extensionId + "'"); + } + } + + if (task == null) { + throw new IllegalArgumentException("Unable to create annotator extension '" + extensionId + "'"); + } + + tasks.add(task); + } + return tasks; + } + + private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String className, ObjectMap options) { + try { + Class clazz = Class.forName(className); + return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options); + } catch (ClassNotFoundException + | NoSuchMethodException + | InstantiationException + | IllegalAccessException + | InvocationTargetException e) { + throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e); + } + } + + +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java new file mode 100644 index 00000000000..054ffaf4915 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java @@ -0,0 +1,86 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback; +import org.opencb.biodata.models.sequence.SequenceLocation; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; +import org.opencb.biodata.tools.variant.VariantNormalizer; +import org.opencb.opencga.core.common.JacksonUtils; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class CosmicExtensionTaskCallback implements CosmicParserCallback { + + private RocksDB rdb; + private VariantNormalizer variantNormalizer; + private ObjectMapper defaultObjectMapper; + + private static Logger logger = LoggerFactory.getLogger(CosmicExtensionTaskCallback.class); + + private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|()|()|()|()|()"; + + public CosmicExtensionTaskCallback(RocksDB rdb) { + this.rdb = rdb; + this.variantNormalizer = new VariantNormalizer(new VariantNormalizer.VariantNormalizerConfig() + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(false)); + this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper(); + } + + @Override + public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List evidenceEntries) { + // Add evidence entries in the RocksDB + // More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed + List normalisedVariantStringList; + try { + normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(), + sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()); + if (CollectionUtils.isNotEmpty(normalisedVariantStringList)) { + for (String normalisedVariantString : normalisedVariantStringList) { + rdb.put(normalisedVariantString.getBytes(), defaultObjectMapper.writeValueAsBytes(evidenceEntries)); + } + return true; + } + return false; + } catch (NonStandardCompliantSampleField | RocksDBException | JsonProcessingException e) { + logger.warn(StringUtils.join(e.getStackTrace(), "\n")); + return false; + } + } + + protected List getNormalisedVariantString(String chromosome, int start, String reference, String alternate) + throws NonStandardCompliantSampleField { + Variant variant = new Variant(chromosome, start, reference, alternate); + return getNormalisedVariantString(variant); + } + + protected List getNormalisedVariantString(Variant variant) throws NonStandardCompliantSampleField { + // Checks no weird characters are part of the reference & alternate alleles + if (isValid(variant)) { + List normalizedVariantList = variantNormalizer.normalize(Collections.singletonList(variant), true); + return normalizedVariantList.stream().map(Variant::toString).collect(Collectors.toList()); + } else { + logger.warn("Variant {} is not valid: skipping it!", variant); + } + + return Collections.emptyList(); + } + + protected boolean isValid(Variant variant) { + return (variant.getReference().matches(VARIANT_STRING_PATTERN) + && (variant.getAlternate().matches(VARIANT_STRING_PATTERN) + && !variant.getAlternate().equals(variant.getReference()))); + } +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..421fb5fa87a --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -0,0 +1,164 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.formats.variant.cosmic.CosmicParser; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.utils.FileUtils; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; + +public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExtensionTask { + + public static final String ID = "cosmic"; + + private ObjectMap options; + + private String cosmicVersion; + private String assembly; + + private ObjectReader objectReader; + + private RocksDB rdb = null; + private Options dbOption = null; + private Path dbLocation = null; + + public static final String COSMIC_ANNOTATOR_INDEX_NAME = "cosmicAnnotatorIndex"; + + private static Logger logger = LoggerFactory.getLogger(CosmicVariantAnnotatorExtensionTask.class); + + public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { + this.options = options; + this.objectReader = JacksonUtils.getDefaultObjectMapper().readerFor(new TypeReference>() {}); + } + + @Override + public List setup(URI output) throws Exception { + // Sanity check + Path cosmicFile = Paths.get(options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key())); + FileUtils.checkFile(cosmicFile); + cosmicVersion = (String) options.getOrDefault(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), null); + if (StringUtils.isEmpty(cosmicVersion)) { + throw new IllegalArgumentException("Missing COSMIC version"); + } + assembly = (String) options.getOrDefault(VariantStorageOptions.ASSEMBLY.key(), null); + if (StringUtils.isEmpty(assembly)) { + throw new IllegalArgumentException("Missing assembly"); + } + + // Clean and init RocksDB + dbLocation = Paths.get(output.getPath()).toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); + if (Files.exists(dbLocation)) { + // Skipping setup but init RocksDB + logger.info("Skipping setup, it was already done"); + initRockDB(false); + } else { + logger.info("Setup and populate RocksDB"); + + // Init RocksDB + initRockDB(true); + + // Call COSMIC parser + try { + CosmicExtensionTaskCallback callback = new CosmicExtensionTaskCallback(rdb); + CosmicParser.parse(cosmicFile, cosmicVersion, ID, assembly, callback); + } catch (IOException e) { + throw new ToolException(e); + } + } + return Collections.singletonList(dbLocation.toUri()); + } + + @Override + public void checkAvailable() throws IllegalArgumentException { + if (dbLocation == null || !Files.exists(dbLocation)) { + throw new IllegalArgumentException("COSMIC annotator extension is not available"); + } + } + + @Override + public ObjectMap getOptions() { + return options; + } + + @Override + public ObjectMap getMetadata() { + return new ObjectMap("data", ID) + .append("version", cosmicVersion) + .append("assembly", assembly); + } + + @Override + public List apply(List list) throws Exception { + for (VariantAnnotation variantAnnotation : list) { + Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getEnd(), + variantAnnotation.getReference(), variantAnnotation.getAlternate()); + byte[] key = variant.toString().getBytes(); + byte[] dbContent = rdb.get(key); + if (dbContent != null) { + List evidenceEntryList = objectReader.readValue(dbContent); + if (variantAnnotation.getTraitAssociation() == null) { + variantAnnotation.setTraitAssociation(evidenceEntryList); + } else { + variantAnnotation.getTraitAssociation().addAll(evidenceEntryList); + } + } + } + return list; + } + + @Override + public void post() throws Exception { + closeRocksDB(); + } + + + private void closeRocksDB() { + if (rdb != null) { + rdb.close(); + } + if (dbOption != null) { + dbOption.dispose(); + } + } + + private void initRockDB(boolean forceCreate) throws ToolException { + boolean indexingNeeded = forceCreate || !Files.exists(dbLocation); + // a static method that loads the RocksDB C++ library. + RocksDB.loadLibrary(); + // the Options class contains a set of configurable DB options + // that determines the behavior of a database. + dbOption = new Options().setCreateIfMissing(true); + + rdb = null; + try { + // a factory method that returns a RocksDB instance + if (indexingNeeded) { + rdb = RocksDB.open(dbOption, dbLocation.toAbsolutePath().toString()); + } else { + rdb = RocksDB.openReadOnly(dbOption, dbLocation.toAbsolutePath().toString()); + } + } catch (RocksDBException e) { + throw new ToolException("Error initializing RocksDB", e); + } + } +} diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java index 23ece394c79..eef6d8a6edf 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java @@ -97,6 +97,15 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari "22:16616084:G:A" ))); + public static final Set COSMIC_VARIANTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + "1:169607124:G:T", + "1:169611640:T:G", + "1:169617058:A:T", + "1:169617158:C:A", + "12:124372173:T:A", + "12:124336867:G:A" + ))); + public static final String VCF_TEST_FILE_NAME = "10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"; protected static URI inputUri; @@ -106,6 +115,9 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari public static final String VCF_CORRUPTED_FILE_NAME = "variant-test-file-corrupted.vcf"; protected static URI corruptedInputUri; + public static final String ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME = "variant-test-file-annotator-extension.vcf.gz"; + protected static URI annotatorExtensionInputUri; + protected static URI outputUri; protected VariantStorageEngine variantStorageEngine; protected VariantStorageMetadataManager metadataManager; @@ -134,16 +146,20 @@ public static void _beforeClass() throws Exception { Path inputPath = rootDir.resolve(VCF_TEST_FILE_NAME); Path smallInputPath = rootDir.resolve(SMALL_VCF_TEST_FILE_NAME); Path corruptedInputPath = rootDir.resolve(VCF_CORRUPTED_FILE_NAME); + Path annotatorExtensionInputPath = rootDir.resolve(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_TEST_FILE_NAME), inputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(SMALL_VCF_TEST_FILE_NAME), smallInputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_CORRUPTED_FILE_NAME), corruptedInputPath, StandardCopyOption.REPLACE_EXISTING); + Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME), + annotatorExtensionInputPath, StandardCopyOption.REPLACE_EXISTING); inputUri = inputPath.toUri(); smallInputUri = smallInputPath.toUri(); corruptedInputUri = corruptedInputPath.toUri(); + annotatorExtensionInputUri = annotatorExtensionInputPath.toUri(); outputUri = rootDir.toUri(); // logger.info("count: " + count.getAndIncrement()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index 00544a3d715..ba33f63dcff 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -5,7 +5,9 @@ import org.junit.Assume; import org.junit.Test; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -18,10 +20,13 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantMatchers; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotatorFactory; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.CosmicVariantAnnotatorExtensionTaskTest; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.io.File; import java.net.URI; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; @@ -238,6 +243,191 @@ public void testMultiAnnotations() throws Exception { } + @Test + public void testCosmicAnnotatorExtensionWithCosmicAnnotation() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, COSMIC_VARIANTS.size()); + } + + @Test + public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, 0); + } + + @Test + public void testCosmicAnnotatorExtensionInvalidCosmicFile() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initInvalidCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicFile() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95"); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicVersion() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMismatchAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh37") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + public void checkCosmicVariants(DataResult annotationDataResult, int expected) { + int cosmicCount = 0; + for (VariantAnnotation va : annotationDataResult.getResults()) { + String variantId = va.getChromosome() + ":" + va.getStart() + ":" + va.getReference() + ":" + va.getAlternate(); + if (COSMIC_VARIANTS.contains(variantId)) { + if (va.getTraitAssociation() != null) { + for (EvidenceEntry entry : va.getTraitAssociation()) { + if (CosmicVariantAnnotatorExtensionTask.ID.equals(entry.getSource().getName())) { + cosmicCount++; + break; + } + } + } + } + } + assertEquals(expected, cosmicCount); + } + public void testQueries(VariantStorageEngine variantStorageEngine) throws StorageEngineException { long count = variantStorageEngine.count(new Query()).first(); long partialCount = 0; diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java new file mode 100644 index 00000000000..6f1a1f2825c --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -0,0 +1,167 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.biodata.models.common.DataVersion; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.common.TimeUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.core.testclassification.duration.ShortTests; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +@Category(ShortTests.class) +public class CosmicVariantAnnotatorExtensionTaskTest { + + private final String ASSEMBLY ="GRCh38"; + private final String COSMIC_VERSION = "v95"; + + @Test + public void testSetupCosmicVariantAnnotatorExtensionTask() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + ObjectMap options = new ObjectMap(); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + + Assert.assertEquals(false, task.isAvailable()); + + // Set-up COSMIC variant annotator extension task, once + task.setup(outPath.toUri()); + + // Set-up COSMIC variant annotator extension task, twice + task.setup(outPath.toUri()); + + ObjectMap metadata = task.getMetadata(); + Assert.assertEquals(COSMIC_VERSION, metadata.get("version")); + Assert.assertEquals(CosmicVariantAnnotatorExtensionTask.ID, metadata.get("data")); + Assert.assertEquals(ASSEMBLY, metadata.get("assembly")); + + Assert.assertEquals(true, task.isAvailable()); + } + + @Test + public void testSCosmicVariantAnnotatorExtensionTask() { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + Assert.assertEquals(false, task.isAvailable()); + } + + @Test + public void testAnnotationCosmicVariantAnnotatorExtensionTaskUsingFactory() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + ObjectMap options = new ObjectMap(); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID); + + CosmicVariantAnnotatorExtensionTask task = (CosmicVariantAnnotatorExtensionTask) new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(options).get(0); + + Assert.assertEquals(false, task.isAvailable()); + + // Set-up COSMIC variant annotator extension task, once + task.setup(outPath.toUri()); + + List inputVariantAnnotations = new ArrayList<>(); + VariantAnnotation variantAnnotation1 = new VariantAnnotation(); + variantAnnotation1.setChromosome("12"); + variantAnnotation1.setStart(124402657); + variantAnnotation1.setEnd(124402657); + variantAnnotation1.setReference("G"); + variantAnnotation1.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation1); + VariantAnnotation variantAnnotation2 = new VariantAnnotation(); + variantAnnotation2.setChromosome("22"); + variantAnnotation2.setStart(124402657); + variantAnnotation2.setEnd(124402657); + variantAnnotation2.setReference("G"); + variantAnnotation2.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation2); + + List outputVariantAnnotations = task.apply(inputVariantAnnotations); + task.post(); + + Assert.assertEquals(inputVariantAnnotations.size(), outputVariantAnnotations.size()); + + // Checking variantAnnotation1 + Assert.assertEquals(1, outputVariantAnnotations.get(0).getTraitAssociation().size()); + Assert.assertEquals("COSV62300079", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getId()); + Assert.assertEquals("liver", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getPrimarySite()); + Assert.assertEquals("hepatocellular carcinoma", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getHistologySubtype()); + Assert.assertEquals("PMID:323", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getBibliography().get(0)); + + // Checking variantAnnotation2 + Assert.assertTrue(CollectionUtils.isEmpty(outputVariantAnnotations.get(1).getTraitAssociation())); + } + + public static Path initCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } + + return targetPath; + } + + public static Path initInvalidCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/myannot.vcf").getPath()); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } + + return targetPath; + } + + public static Path getTempPath() { + return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); + } +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz b/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz new file mode 100644 index 00000000000..153129875fe Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz new file mode 100644 index 00000000000..fa19d5b2e4d Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz differ