Skip to content

Commit

Permalink
Merge pull request #2353 from opencb/TASK-5120
Browse files Browse the repository at this point in the history
TASK-5120 - Port Patch 1.6.5 - > 1.10.0
  • Loading branch information
juanfeSanahuja authored Oct 27, 2023
2 parents ce32db4 + 5939cf0 commit eca6390
Show file tree
Hide file tree
Showing 168 changed files with 1,727 additions and 1,060 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/manual-deploy-ext-tools.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ jobs:
needs: build
with:
cli: python3 ./build/cloud/docker/docker-build.py push --images ext-tools --tag ${{ inputs.tag }}
secrets: inherit
secrets: inherit

1 change: 1 addition & 0 deletions .github/workflows/pull-request-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ jobs:
with:
cli: python3 ./build/cloud/docker/docker-build.py delete --images base --tag ${{ github.head_ref }}
secrets: inherit

1 change: 1 addition & 0 deletions .github/workflows/test-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,4 @@ jobs:
commit: '${{ github.sha }}'
fail_on_test_failures: true


Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,12 @@ protected void run() throws Exception {
if (family == null || StringUtils.isEmpty(family.getId())) {
continue;
}
List<List<String>> trios = variantStorageManager.getTriosFromFamily(getStudy(), family, true, getToken());
for (List<String> trio : trios) {
String child = trio.get(2);
List<Trio> trios = variantStorageManager.getTriosFromFamily(getStudy(), family, true, getToken());
for (Trio trio : trios) {
String child = trio.getChild();
if (analysisParams.getSample().contains(child)) {
String father = trio.get(0);
String mother = trio.get(1);
String father = trio.getFather();
String mother = trio.getMother();
triosMap.put(child, new Trio(family.getId(),
"-".equals(father) ? null : father,
"-".equals(mother) ? null : mother,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1299,10 +1299,10 @@ public Integer getReleaseFilter(Query query, String sessionId) throws CatalogExc
return release;
}

public List<List<String>> getTriosFromFamily(
public List<Trio> getTriosFromFamily(
String studyFqn, Family family, VariantStorageMetadataManager metadataManager, boolean skipIncompleteFamily, String sessionId)
throws StorageEngineException, CatalogException {
List<List<String>> trios = getTrios(studyFqn, metadataManager, family.getMembers(), sessionId);
List<Trio> trios = getTrios(studyFqn, metadataManager, family.getMembers(), sessionId);
if (trios.size() == 0) {
if (skipIncompleteFamily) {
logger.debug("Skip family '" + family.getId() + "'. ");
Expand All @@ -1313,7 +1313,7 @@ public List<List<String>> getTriosFromFamily(
return trios;
}

public List<List<String>> getTriosFromSamples(
public List<Trio> getTriosFromSamples(
String studyFqn, VariantStorageMetadataManager metadataManager, Collection<String> sampleIds, String token)
throws CatalogException {
OpenCGAResult<Individual> individualResult = catalogManager.getIndividualManager()
Expand All @@ -1330,12 +1330,12 @@ public List<List<String>> getTriosFromSamples(
return getTrios(studyFqn, metadataManager, individualResult.getResults(), token);
}

public List<List<String>> getTrios(
public List<Trio> getTrios(
String studyFqn, VariantStorageMetadataManager metadataManager, List<Individual> membersList, String sessionId)
throws CatalogException {
int studyId = metadataManager.getStudyId(studyFqn);
Map<Long, Individual> membersMap = membersList.stream().collect(Collectors.toMap(Individual::getUid, i -> i));
List<List<String>> trios = new LinkedList<>();
List<Trio> trios = new LinkedList<>();
for (Individual individual : membersList) {
String fatherSample = null;
String motherSample = null;
Expand Down Expand Up @@ -1402,10 +1402,7 @@ public List<List<String>> getTrios(

// Allow one missing parent
if (childSample != null && (fatherSample != null || motherSample != null)) {
trios.add(Arrays.asList(
fatherSample == null ? "-" : fatherSample,
motherSample == null ? "-" : motherSample,
childSample));
trios.add(new Trio(fatherSample, motherSample, childSample));
}
}
return trios;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import org.opencb.opencga.core.models.operations.variant.*;
import org.opencb.opencga.core.models.project.DataStore;
import org.opencb.opencga.core.models.project.Project;
import org.opencb.opencga.core.models.project.ProjectOrganism;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.sample.SamplePermissions;
import org.opencb.opencga.core.models.study.Study;
Expand All @@ -76,10 +77,7 @@
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.VariantMetadataFactory;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
import org.opencb.opencga.storage.core.metadata.models.ProjectMetadata;
import org.opencb.opencga.storage.core.metadata.models.SampleMetadata;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.metadata.models.VariantScoreMetadata;
import org.opencb.opencga.storage.core.metadata.models.*;
import org.opencb.opencga.storage.core.utils.CellBaseUtils;
import org.opencb.opencga.storage.core.variant.BeaconResponse;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
Expand Down Expand Up @@ -397,19 +395,19 @@ public void sampleIndexAnnotate(String study, List<String> samples, ObjectMap pa
});
}

public DataResult<List<String>> familyIndexUpdate(String study,
public DataResult<Trio> familyIndexUpdate(String study,
ObjectMap params, String token)
throws CatalogException, StorageEngineException {
return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> {
return engine.familyIndexUpdate(study, params);
});
}

public DataResult<List<String>> familyIndex(String study, List<String> familiesStr, boolean skipIncompleteFamilies,
public DataResult<Trio> familyIndex(String study, List<String> familiesStr, boolean skipIncompleteFamilies,
ObjectMap params, String token)
throws CatalogException, StorageEngineException {
return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> {
List<List<String>> trios = new LinkedList<>();
List<Trio> trios = new LinkedList<>();
List<Event> events = new LinkedList<>();
VariantStorageMetadataManager metadataManager = engine.getMetadataManager();
VariantCatalogQueryUtils catalogUtils = new VariantCatalogQueryUtils(catalogManager);
Expand All @@ -425,9 +423,9 @@ public DataResult<List<String>> familyIndex(String study, List<String> familiesS
trios.addAll(catalogUtils.getTriosFromFamily(study, family, metadataManager, skipIncompleteFamilies, token));
}
}
DataResult<List<String>> dataResult = engine.familyIndex(study, trios, params);
DataResult<Trio> dataResult = engine.familyIndex(study, trios, params);
getSynchronizer(engine).synchronizeCatalogSamplesFromStorage(study, trios.stream()
.flatMap(Collection::stream)
.flatMap(t->t.toList().stream())
.collect(Collectors.toList()), token);
return dataResult;
});
Expand All @@ -439,25 +437,24 @@ private CatalogStorageMetadataSynchronizer getSynchronizer(VariantStorageEngine
return synchronizer;
}

public DataResult<List<String>> familyIndexBySamples(String study, Collection<String> samples, ObjectMap params, String token)
public DataResult<Trio> familyIndexBySamples(String study, Collection<String> samples, ObjectMap params, String token)
throws CatalogException, StorageEngineException {
return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> {
Collection<String> thisSamples = samples;
if (CollectionUtils.size(thisSamples) == 1 && thisSamples.iterator().next().equals(ParamConstants.ALL)) {
thisSamples = getIndexedSamples(study, token);
}

List<List<String>> trios = catalogUtils.getTriosFromSamples(study, engine.getMetadataManager(), thisSamples, token);

DataResult<List<String>> dataResult = engine.familyIndex(study, trios, params);
List<Trio> trios = catalogUtils.getTriosFromSamples(study, engine.getMetadataManager(), thisSamples, token);
DataResult<Trio> dataResult = engine.familyIndex(study, trios, params);
getSynchronizer(engine).synchronizeCatalogSamplesFromStorage(study, trios.stream()
.flatMap(Collection::stream)
.flatMap(t -> t.toList().stream())
.collect(Collectors.toList()), token);
return dataResult;
});
}

public List<List<String>> getTriosFromFamily(String study, Family family, boolean skipIncompleteFamilies, String token)
public List<Trio> getTriosFromFamily(String study, Family family, boolean skipIncompleteFamilies, String token)
throws CatalogException, StorageEngineException {
VariantStorageEngine variantStorageEngine = getVariantStorageEngine(study, token);
return catalogUtils.getTriosFromFamily(study, family, variantStorageEngine.getMetadataManager(), skipIncompleteFamilies, token);
Expand Down Expand Up @@ -1015,11 +1012,21 @@ protected VariantStorageEngine getVariantStorageEngineByProject(String project,
return variantStorageEngine;
}

private void setCellbaseConfiguration(VariantStorageEngine engine, String project, String token)
private void setCellbaseConfiguration(VariantStorageEngine engine, String projectId, String token)
throws CatalogException {
CellBaseConfiguration cellbase = catalogManager.getProjectManager()
.get(project, new QueryOptions(INCLUDE, ProjectDBAdaptor.QueryParams.CELLBASE.key()), token)
.first().getCellbase();
Project project = catalogManager.getProjectManager()
.get(projectId, new QueryOptions(INCLUDE, Arrays.asList(
ProjectDBAdaptor.QueryParams.CELLBASE.key(),
ProjectDBAdaptor.QueryParams.ORGANISM.key())), token)
.first();
CellBaseConfiguration cellbase = project.getCellbase();
ProjectOrganism organism = project.getOrganism();
if (organism == null) {
throw new CatalogException("Missing organism in project '" + project.getFqn()+ "'");
} else {
engine.getOptions().put(VariantStorageOptions.SPECIES.key(), organism.getScientificName());
engine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), organism.getAssembly());
}
if (cellbase != null) {
// To ensure that we use the project API key before using the storage API key
if (StringUtils.isEmpty(cellbase.getApiKey()) && storageConfiguration.getCellbase() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ public class VariantFileIndexerOperationManager extends OperationManager {
private List<File> filesToIndex;
private CatalogStorageMetadataSynchronizer synchronizer;
private boolean fullSynchronize = false;
private boolean force;

public VariantFileIndexerOperationManager(VariantStorageManager variantStorageManager, VariantStorageEngine engine) {
super(variantStorageManager, engine);
Expand Down Expand Up @@ -138,6 +139,7 @@ private void check(String study, ObjectMap params, String token) throws Exceptio
}
resume = params.getBoolean(VariantStorageOptions.RESUME.key());
skipIndexedFiles = params.getBoolean(SKIP_INDEXED_FILES);
force = params.getBoolean(VariantStorageOptions.FORCE.key());

// Obtain the type of analysis (transform, load or index)
step = getType(load, transform);
Expand Down Expand Up @@ -589,6 +591,7 @@ private List<File> filterTransformFiles(List<File> fileList, boolean resume) thr
break;
case VariantIndexStatus.INDEXING:
case VariantIndexStatus.TRANSFORMING:
case VariantIndexStatus.LOADING:
if (resume) {
filteredFiles.add(file);
} else {
Expand All @@ -603,14 +606,17 @@ private List<File> filterTransformFiles(List<File> fileList, boolean resume) thr
}
break;
case VariantIndexStatus.TRANSFORMED:
case VariantIndexStatus.LOADING:
case VariantIndexStatus.READY:
default:
String msg = "We can only " + step + " VCF files not transformed, the status is " + indexStatus;
if (skipIndexedFiles) {
logger.warn(msg);
if (force) {
filteredFiles.add(file);
} else {
throw new StorageEngineException(msg);
if (skipIndexedFiles) {
logger.warn(msg);
} else {
throw new StorageEngineException(msg);
}
}
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,16 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
}
fileSamplesMap.put(fileMetadata.getName(), samples);
allSamples.addAll(fileMetadata.getSamples());
if (samples.size() > 100) {
// Try to reuse value.
// If the file holds more than 100 samples, it's most likely this same set of samples is already present
for (Set<String> value : fileSamplesMap.values()) {
if (value.equals(samples)) {
fileSamplesMap.put(fileMetadata.getName(), value);
break;
}
}
}
}

if (!indexedFilesFromStorage.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,9 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo
StringBuilder scriptParams = new StringBuilder("R CMD Rscript --vanilla ")
.append("/opt/opencga/signature.tools.lib/scripts/signatureFit")
.append(" --catalogues=/data/input/").append(cataloguesFile.getName())
.append(" --outdir=/data/output");
.append(" --outdir=/data/output")
.append(" --commonsigtier=T2");

if (StringUtils.isNotEmpty(getFitMethod())) {
scriptParams.append(" --fitmethod=").append(getFitMethod());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.opencb.opencga.core.models.operations.variant.VariantFamilyIndexParams;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.storage.core.metadata.models.Trio;

import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -67,7 +68,7 @@ protected void check() throws Exception {
@Override
protected void run() throws Exception {
step(() -> {
DataResult<List<String>> trios;
DataResult<Trio> trios;
if (variantFamilyIndexParams.isUpdateIndex()) {
trios = variantStorageManager.familyIndexUpdate(study, params, token);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ protected void check() throws Exception {
params.put(VariantStorageOptions.ANNOTATE.key(), indexParams.isAnnotate());
params.putIfNotEmpty(VariantStorageOptions.ANNOTATOR.key(), indexParams.getAnnotator());
params.put(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), indexParams.isOverwriteAnnotations());
params.put(VariantStorageOptions.FORCE.key(), indexParams.isForceReload());
params.put(VariantStorageOptions.RESUME.key(), indexParams.isResume());
params.put(VariantStorageOptions.NORMALIZATION_SKIP.key(), indexParams.getNormalizationSkip());
params.putIfNotEmpty(VariantStorageOptions.NORMALIZATION_REFERENCE_GENOME.key(), indexParams.getReferenceGenome());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.opencb.opencga.core.models.operations.variant.VariantSecondarySampleIndexParams;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.storage.core.metadata.models.Trio;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -80,7 +81,7 @@ protected void run() throws Exception {
}
if (sampleIndexParams.isFamilyIndex()) {
step("familyIndex", () -> {
DataResult<List<String>> result = variantStorageManager.familyIndexBySamples(study, sampleIndexParams.getSample(), params,
DataResult<Trio> result = variantStorageManager.familyIndexBySamples(study, sampleIndexParams.getSample(), params,
getToken());
if (result.getEvents() != null) {
for (Event event : result.getEvents()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.junit.AfterClass;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -95,6 +96,7 @@ public AlignmentAnalysisTest(String storageEngine) {
private CatalogManager catalogManager;
private VariantStorageManager variantStorageManager;

@ClassRule
public static OpenCGATestExternalResource opencga = new OpenCGATestExternalResource();
public static HadoopVariantStorageTest.HadoopExternalResource hadoopExternalResource = new HadoopVariantStorageTest.HadoopExternalResource();

Expand Down
Loading

0 comments on commit eca6390

Please sign in to comment.