Skip to content

Commit

Permalink
Merge pull request #2334 from opencb/TASK-4875
Browse files Browse the repository at this point in the history
TASK-4875 - Port Patch 1.6.4 -> 1.9.0
  • Loading branch information
juanfeSanahuja authored Sep 5, 2023
2 parents 2dde3ce + 575a6cd commit ef48f7f
Show file tree
Hide file tree
Showing 31 changed files with 638 additions and 91 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
required: true
mvn_opts:
type: string

required: false
default: ""
secrets:
Expand Down Expand Up @@ -82,3 +83,4 @@ jobs:
report_paths: './**/surefire-reports/TEST-*.xml'
commit: '${{ github.sha }}'
fail_on_test_failures: true

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.opencb.biodata.models.clinical.Phenotype;
import org.opencb.biodata.models.clinical.pedigree.Member;
import org.opencb.biodata.models.clinical.pedigree.Pedigree;
import org.opencb.biodata.models.core.SexOntologyTermAnnotation;
import org.opencb.biodata.models.pedigree.IndividualProperty;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.exec.Command;
Expand All @@ -21,12 +20,9 @@
import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -36,7 +32,6 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;

@ToolExecutor(id = ExomiserWrapperAnalysisExecutor.ID,
tool = ExomiserWrapperAnalysis.ID,
Expand Down Expand Up @@ -133,7 +128,8 @@ public void run() throws ToolException {
.sample(sampleId)
.includeSample(samples)
.includeSampleData("GT")
.unknownGenotype("./.");
.unknownGenotype("./.")
.append("includeAllFromSampleIndex", true);

QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id,studies.samples");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,41 @@ protected OpenCGAResult groupBy(MongoDBCollection collection, Bson query, List<S
return new OpenCGAResult<>(aggregate);
}

public static QueryOptions addPrefixInOptions(QueryOptions options, String prefix) {
if (options == null) {
return null;
}

QueryOptions queryOptions = new QueryOptions(options);

if (queryOptions.containsKey(QueryOptions.INCLUDE)) {
Set<String> includeList = new HashSet<>(queryOptions.getAsStringList(QueryOptions.INCLUDE));
List<String> newInclude = new ArrayList<>(includeList.size());
for (String key : includeList) {
if (key.startsWith(prefix)) {
newInclude.add(key);
} else {
newInclude.add(prefix + key);
}
}
queryOptions.put(QueryOptions.INCLUDE, newInclude);
}
if (queryOptions.containsKey(QueryOptions.EXCLUDE)) {
Set<String> excludeList = new HashSet<>(queryOptions.getAsStringList(QueryOptions.EXCLUDE));
List<String> newExclude = new ArrayList<>(excludeList.size());
for (String key : excludeList) {
if (key.startsWith(prefix)) {
newExclude.add(key);
} else {
newExclude.add(prefix + key);
}
}
queryOptions.put(QueryOptions.EXCLUDE, newExclude);
}

return queryOptions;
}

/**
* Filter QueryOptions object to ensure the keys provided are always included.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -771,8 +771,10 @@ private MongoDBIterator<Document> getMongoCursor(ClientSession clientSession, Qu
aggregates.add(Aggregates.match(bsonQuery));

// Check include
QueryOptions qOptions = options != null ? new QueryOptions(options) : new QueryOptions();
List<String> includeList = new ArrayList<>();
QueryOptions qOptions = filterQueryOptions(options, Arrays.asList(QueryParams.UID.key(), QueryParams.FQN.key()));
qOptions = addPrefixInOptions(qOptions, "projects.");

/*List<String> includeList = new ArrayList<>();
if (options != null && options.get(QueryOptions.INCLUDE) != null) {
List<String> optionsAsStringList = options.getAsStringList(QueryOptions.INCLUDE);
includeList.addAll(optionsAsStringList.stream().collect(Collectors.toList()));
Expand All @@ -792,6 +794,7 @@ private MongoDBIterator<Document> getMongoCursor(ClientSession clientSession, Qu
}
qOptions.put(QueryOptions.INCLUDE, includeList);
}
*/

for (Bson aggregate : aggregates) {
logger.debug("Get project: Aggregate : {}", aggregate.toBsonDocument());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,10 +318,10 @@ Study insert(ClientSession clientSession, Project project, Study study)
}

@Override
public OpenCGAResult<Study> getAllStudiesInProject(long projectId, QueryOptions options) throws CatalogDBException {
public OpenCGAResult<Study> getAllStudiesInProject(long projectUid, QueryOptions options) throws CatalogDBException {
long startTime = startQuery();
dbAdaptorFactory.getCatalogProjectDbAdaptor().checkId(projectId);
Query query = new Query(QueryParams.PROJECT_ID.key(), projectId);
dbAdaptorFactory.getCatalogProjectDbAdaptor().checkId(projectUid);
Query query = new Query(QueryParams.PROJECT_UID.key(), projectUid);
return endQuery(startTime, get(query, options));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public class ProjectCatalogMongoDBIterator<E> extends CatalogMongoDBIterator<E>
private final String user;

private final QueryOptions options;
private boolean includeStudyInfo;
private final StudyMongoDBAdaptor studyDBAdaptor;
private QueryOptions studyQueryOptions;

Expand All @@ -44,6 +45,7 @@ public ProjectCatalogMongoDBIterator(MongoDBIterator<Document> mongoCursor, Clie
this.studyDBAdaptor = dbAdaptorFactory.getCatalogStudyDBAdaptor();

this.options = options != null ? new QueryOptions(options) : new QueryOptions();
this.includeStudyInfo = includeStudyInfo();
this.studyQueryOptions = createInnerQueryOptionsForVersionedEntity(this.options, ProjectDBAdaptor.QueryParams.STUDIES.key(), false);
this.studyQueryOptions = MongoDBAdaptor.filterQueryOptions(this.studyQueryOptions,
Collections.singletonList(MongoDBAdaptor.PRIVATE_PROJECT));
Expand Down Expand Up @@ -90,8 +92,7 @@ private void fetchNextBatch() {
projectListBuffer.add(projectDocument);
counter++;

if (options == null || !options.containsKey(QueryOptions.EXCLUDE)
|| !options.getAsStringList(QueryOptions.EXCLUDE).contains("projects.studies")) {
if (includeStudyInfo) {
projectUidSet.add(projectDocument.get(UID, Long.class));
}
}
Expand Down Expand Up @@ -130,4 +131,29 @@ private void fetchNextBatch() {
}
}
}

private boolean includeStudyInfo() {
if (options == null) {
return true;
}
if (options.containsKey(QueryOptions.EXCLUDE)) {
List<String> list = options.getAsStringList(QueryOptions.EXCLUDE);
for (String exclude : list) {
if (exclude.equals("studies") || exclude.equals("projects.studies")) {
return false;
}
}
return true;
} else if (options.containsKey(QueryOptions.INCLUDE)) {
List<String> list = options.getAsStringList(QueryOptions.INCLUDE);
for (String include : list) {
if (include.startsWith("studies") || include.startsWith("projects.studies")) {
return true;
}
}
return false;
} else {
return true;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,9 @@ public OpenCGAResult<Integer> incrementRelease(String projectStr, String session
int currentRelease = project.getCurrentRelease();

// Check current release has been used at least in one study or file or cohort or individual...
List<Study> allStudiesInProject = project.getStudies();
QueryOptions studyOptions = keepFieldInQueryOptions(StudyManager.INCLUDE_STUDY_IDS, StudyDBAdaptor.QueryParams.RELEASE.key());
OpenCGAResult<Study> studyResult = studyDBAdaptor.getAllStudiesInProject(projectUid, studyOptions);
List<Study> allStudiesInProject = studyResult.getResults();
if (allStudiesInProject.isEmpty()) {
throw new CatalogException("Cannot increment current release number. No studies found for release " + currentRelease);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.opencb.opencga.catalog.managers;

import com.fasterxml.jackson.core.JsonProcessingException;
import org.apache.commons.collections4.CollectionUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -161,6 +162,42 @@ public void searchProjects() throws CatalogException {

}

@Test
public void searchProjectsUsingInclude() throws CatalogException {
OpenCGAResult<Project> projects = catalogManager.getProjectManager().search(new Query(),
new QueryOptions(QueryOptions.INCLUDE, ProjectDBAdaptor.QueryParams.ID.key()), sessionIdUser);
assertEquals(1, projects.getNumResults());
for (Project project : projects.getResults()) {
assertNotNull(project.getId());
assertNull(project.getDescription());
assertNull(project.getName());
assertNull(project.getStudies());
assertTrue(CollectionUtils.isEmpty(project.getStudies()));
}

projects = catalogManager.getProjectManager().search(new Query(),
new QueryOptions(QueryOptions.INCLUDE, ProjectDBAdaptor.QueryParams.STUDIES.key()), sessionIdUser);
assertEquals(1, projects.getNumResults());
for (Project project : projects.getResults()) {
assertNull(project.getId());
assertNull(project.getDescription());
assertNull(project.getName());
assertNotNull(project.getStudies());
assertTrue(CollectionUtils.isNotEmpty(project.getStudies()));
}

projects = catalogManager.getProjectManager().search(new Query(),
new QueryOptions(QueryOptions.EXCLUDE, ProjectDBAdaptor.QueryParams.NAME.key()), sessionIdUser);
assertEquals(1, projects.getNumResults());
for (Project project : projects.getResults()) {
assertNotNull(project.getId());
assertNull(project.getName());
assertNotNull(project.getDescription());
assertNotNull(project.getStudies());
assertFalse(CollectionUtils.isEmpty(project.getStudies()));
}
}

@Test
public void getSharedProjects() throws CatalogException {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,15 @@ public List<IndexFieldConfiguration> getCustomFields() {
return customFields;
}

public IndexFieldConfiguration getCustomField(IndexFieldConfiguration.Source source, String key) {
for (IndexFieldConfiguration s : customFields) {
if (s.getKey().equals(key) && s.getSource() == source) {
return s;
}
}
return null;
}

public int getFilePositionBits() {
return filePositionBits;
}
Expand Down Expand Up @@ -705,4 +714,13 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(fileIndexConfiguration, annotationIndexConfiguration);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SampleIndexConfiguration{");
sb.append("fileIndexConfiguration=").append(fileIndexConfiguration);
sb.append(", annotationIndexConfiguration=").append(annotationIndexConfiguration);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,6 @@ public VariantAnnotationMetadata() {
sourceVersion = new ArrayList<>();
}

public VariantAnnotationMetadata(int id, String name, Date creationDate, VariantAnnotatorProgram annotator,
List<ObjectMap> sourceVersion, DataRelease dataRelease) {

}
public VariantAnnotationMetadata(int id, String name, Date creationDate, VariantAnnotatorProgram annotator,
List<ObjectMap> sourceVersion, DataRelease dataRelease, List<String> privateSources) {
this.id = id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantBuilder;
import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.commons.datastore.core.*;
Expand Down Expand Up @@ -515,9 +516,18 @@ public static boolean isHpo(String value) {
*/
public static boolean isVariantId(String value) {
int count = StringUtils.countMatches(value, ':');
return count == 3
// It may have more colons if is a symbolic alternate like <DUP:TANDEM>, or a breakend 4:100:C:]15:300]A
|| count > 3 && StringUtils.containsAny(value, '<', '[', ']');
if (count == 3) {
return true;
}
if (count > 3) {
try {
new VariantBuilder(value);
} catch (RuntimeException e) {
return false;
}
return true;
}
return false;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,22 +435,27 @@ public void isVariantId() {
checkIsVariantId("1:1000:A:T");
checkIsVariantId("1:1000-2000:A:T");
checkIsVariantId("1:1000-2000:A:<DUP:TANDEM>");
checkIsVariantId("HLA-DRB1*04:03:01:3349:GAGACAGAG:-", true);
checkIsVariantId("HLA-DRB1*04:03:01:3349:GAGACAGAG:-:2", false);
checkIsVariantId("11:14525312:-:]11:14521700].");
checkIsVariantId("4:100:C:[15:300[A");
checkIsVariantId("4:100:C:]15:300]A");
checkIsVariantId("rs123");
}

public void checkIsVariantId(String v) {

boolean actual = VariantQueryUtils.isVariantId(v);
boolean expected;
try {
new Variant(v);
expected = true;
} catch (Exception e) {
expected = false;
}
checkIsVariantId(v, expected);
}

public void checkIsVariantId(String v, boolean expected) {
boolean actual = VariantQueryUtils.isVariantId(v);
assertEquals(v, expected, actual);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=noPass,Description="No pass">
##FILTER=<ID=noPass2,Description="No pass other">
##FILTER=<ID=.,Description="unknown filter state">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods">
##FORMAT=<ID=DS,Number=1,Type=Float,Description="">
##command=seq 1000000 500 3000000 | while read i ; do echo -e "chr1\t$i\t.\tA\tC\t$RANDOM\tPASS\t.\tGT\t0/1\t1/1\t1|0\t0|1" ; done
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19600 NA19660 NA19661 NA19685
chr1 1000000 . A C,T 5 noPass,noPass2 . GT 1/2 1/1 0|0 0|1
chr1 1000010 . A AC,CA 20 PASS . GT 1/2 1/1 0|0 0|1
chr1 1000020 . AT T,A 60 . . GT 1/2 1/1 0|0 0|1
chr1 1000030 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000040 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000050 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000060 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000070 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000080 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000090 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
Loading

0 comments on commit ef48f7f

Please sign in to comment.