Skip to content

Commit

Permalink
analysis: improve family QC and add JUnit test, #TASK-6772, #TASK-6766
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Sep 20, 2024
1 parent 24efa3e commit 63baa43
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.clinical.qc.Relatedness;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis;
Expand Down Expand Up @@ -74,19 +75,21 @@ public class FamilyVariantQcAnalysis extends VariantQcAnalysis {

@Override
protected void check() throws Exception {
// IMPORTANT: the first thing to do since it initializes "study" from params.get(STUDY_PARAM)
super.check();

setUpStorageEngineExecutor(study);

// Check parameters
super.check();
checkParameters(analysisParams, getStudy(), catalogManager, token);
checkParameters(analysisParams, study, catalogManager, token);

// Check custom resources path
userResourcesPath = checkResourcesDir(analysisParams.getResourcesDir(), getStudy(), catalogManager, token);
userResourcesPath = checkResourcesDir(analysisParams.getResourcesDir(), study, catalogManager, token);
}

@Override
protected List<String> getSteps() {
List<String> steps = Arrays.asList(PREPARE_QC_STEP, ID);
List<String> steps = new ArrayList<>(Arrays.asList(PREPARE_QC_STEP, ID));
if (!Boolean.TRUE.equals(analysisParams.getSkipIndex())) {
steps.add(INDEX_QC_STEP);
}
Expand Down Expand Up @@ -131,7 +134,7 @@ protected void prepareQualityControl() throws ToolException {
// Create query options
QueryOptions queryOptions = new QueryOptions().append(QueryOptions.INCLUDE, "id,studies.samples");

// Export to VCF.GZ format
// Export variants (VCF.GZ format)
String basename = getOutDir().resolve(familyId).toAbsolutePath().toString();
getVariantStorageManager().exportData(basename, VCF_GZ, null, query, queryOptions, token);

Expand All @@ -144,11 +147,11 @@ protected void prepareQualityControl() throws ToolException {
}
vcfPaths.add(vcfPath);

// Export family (JSON format)
Path jsonPath = Paths.get(basename + "." + JSON.getExtension());
// Write family (JSON format)
Path jsonPath = Paths.get(basename + "_info." + JSON.getExtension());
objectWriter.writeValue(jsonPath.toFile(), family);

// Check VCF file
// Check JSON file
if (!Files.exists(jsonPath)) {
throw new ToolException("Something wrong happened when saving JSON file for family ID " + familyId + ". JSON file "
+ jsonPath + " was not created.");
Expand Down Expand Up @@ -189,7 +192,8 @@ protected void indexQualityControl() throws ToolException {
familyQc = new FamilyQualityControl();

// Check and parse the relatedness output file
Path qcPath = getOutDir().resolve(family.getId()).resolve(RELATEDNESS_ANALYSIS_ID + QC_JSON_EXTENSION);
Path qcPath = getOutDir().resolve(family.getId()).resolve(RELATEDNESS_ANALYSIS_ID)
.resolve(RELATEDNESS_ANALYSIS_ID + QC_JSON_EXTENSION);
if (!Files.exists(qcPath)) {
failedQcSet.add(family.getId());
qcStatus = new QualityControlStatus(ERROR, FAILURE_FILE + qcPath.getFileName() + NOT_FOUND);
Expand All @@ -199,11 +203,16 @@ protected void indexQualityControl() throws ToolException {
logger.error(logMsg);
} else {
try {
List<Relatedness> relatedness = isQcArray(qcPath)
List<Relatedness> relatednessList = isQcArray(qcPath)
? relatednessListReader.readValue(qcPath.toFile())
: Collections.singletonList(relatednessReader.readValue(qcPath.toFile()));

familyQc.setRelatedness(relatedness);
// Set common attributes
for (Relatedness relatedness : relatednessList) {
addCommonAttributes(relatedness.getAttributes());
}

familyQc.setRelatedness(relatednessList);
qcStatus = new QualityControlStatus(READY, SUCCESS);
} catch (IOException e) {
failedQcSet.add(family.getId());
Expand Down Expand Up @@ -233,7 +242,6 @@ protected void indexQualityControl() throws ToolException {
checkFailedQcCounter(families.size(), FAMILY_QC_TYPE);
}


public static void checkParameters(FamilyQcAnalysisParams params, String studyId, CatalogManager catalogManager, String token)
throws ToolException {
// Check study
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.ResourceUtils;
Expand Down Expand Up @@ -100,6 +101,9 @@ public class VariantQcAnalysis extends OpenCgaToolScopeStudy {
+ " in OpenCGA catalog";
protected static final String SUCCESS = "Success";

// Common attributes
public static final String OPENCGA_JOB_ID_ATTR = "OPENCGA_JOB_ID";

protected LinkedList<Path> vcfPaths = new LinkedList<>();
protected LinkedList<Path> jsonPaths = new LinkedList<>();

Expand Down Expand Up @@ -247,6 +251,19 @@ protected boolean isQcArray(Path qcPath) throws ToolException {
}
}

protected void addCommonAttributes(ObjectMap attributes) {
if (attributes != null) {
attributes.append(OPENCGA_JOB_ID_ATTR, getJobId());
} else {
String msg = "Could not add common attributes, such as " + OPENCGA_JOB_ID_ATTR;
try {
addWarning(msg);
} catch (ToolException e) {
logger.warn(msg, e);
}
}
}

protected <T> T parseQcFile(String id, String analysisId, List<String> skip, Path qcPath, String qcType, ObjectReader reader)
throws ToolException {
if (CollectionUtils.isEmpty(skip) || !skip.contains(analysisId)) {
Expand Down Expand Up @@ -356,15 +373,16 @@ protected void downloadQcResourceFile(String resourceName, Path destPath) throws

protected void copyUserResourceFiles() throws ToolException {
// Sanity check
if (userResourcesPath == null) {
// Nothing to do
return;
if (userResourcesPath != null && Files.exists(userResourcesPath)) {
copyUserResourceFiles(userResourcesPath);
}
}

protected void copyUserResourceFiles(Path inputPath) throws ToolException {
Path destResourcesPath = checkResourcesPath(getOutDir().resolve(RESOURCES_FOLDER));

// Copy custom resource files to the job dir
for (java.io.File file : userResourcesPath.toFile().listFiles()) {
for (java.io.File file : inputPath.toFile().listFiles()) {
Path destPath = destResourcesPath.resolve(file.getName());
if (file.isFile()) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,21 @@ public Path isolateOpenCGA() throws IOException {
Files.copy(inputStream, analysisPath.resolve(exomiserFile), StandardCopyOption.REPLACE_EXISTING);
}

// QC
analysisPath = Files.createDirectories(opencgaHome.resolve("analysis/qc")).toAbsolutePath();
List<String> qcFiles = Arrays.asList("variant_qc.main.py", "utils.py", "__init__.py");
for (String qcFile : qcFiles) {
inputStream = new FileInputStream("../opencga-app/app/analysis/qc/" + qcFile);
Files.copy(inputStream, analysisPath.resolve(qcFile), StandardCopyOption.REPLACE_EXISTING);
}
// Family QC
analysisPath = Files.createDirectories(opencgaHome.resolve("analysis/qc/family_qc")).toAbsolutePath();
qcFiles = Arrays.asList("family_qc.py", "__init__.py");
for (String qcFile : qcFiles) {
inputStream = new FileInputStream("../opencga-app/app/analysis/qc/family_qc/" + qcFile);
Files.copy(inputStream, analysisPath.resolve(qcFile), StandardCopyOption.REPLACE_EXISTING);
}

return opencgaHome;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.common.ExceptionUtils;
import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.common.TimeUtils;
import org.opencb.opencga.core.config.storage.CellBaseConfiguration;
import org.opencb.opencga.core.config.storage.StorageConfiguration;
import org.opencb.opencga.core.exceptions.ToolException;
Expand Down Expand Up @@ -116,8 +117,7 @@

import static org.hamcrest.CoreMatchers.hasItem;
import static org.junit.Assert.*;
import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.GENOME_PLOT_ANALYSIS_ID;
import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.SIGNATURE_ANALYSIS_ID;
import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.*;
import static org.opencb.opencga.storage.core.variant.VariantStorageBaseTest.getResourceUri;

@RunWith(Parameterized.class)
Expand Down Expand Up @@ -1111,22 +1111,39 @@ public void testPedigreeGraph() throws CatalogException {
public void testFamilyQC() throws Exception {
Path outDir = Paths.get(opencga.createTmpOutdir("_family_qc"));

// To be sure, all samples are no-somatic
SampleUpdateParams sampleUpdateParams = new SampleUpdateParams().setSomatic(false);
catalogManager.getSampleManager().update(STUDY, son, sampleUpdateParams, null, token);

// Update quality control for the cancer sample
FamilyQualityControl qc = new FamilyQualityControl();
FamilyUpdateParams updateParams = new FamilyUpdateParams().setQualityControl(qc);
catalogManager.getFamilyManager().update(STUDY, family, updateParams, null, token);
FamilyUpdateParams familyUpdateParams = new FamilyUpdateParams().setQualityControl(qc);
catalogManager.getFamilyManager().update(STUDY, family, familyUpdateParams, null, token);

// Family QC analysis
FamilyQcAnalysisParams params = new FamilyQcAnalysisParams();
params.setFamilies(Arrays.asList(family));

String jobId = "test-family-qc-" + TimeUtils.getTimeMillis();
toolRunner.execute(FamilyVariantQcAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, STUDY),
outDir, null, false, token);
outDir, jobId, false, token);

Family fam = catalogManager.getFamilyManager().get(STUDY, family, QueryOptions.empty(), token).first();
System.out.println("fam.getInternal().getQualityControlStatus() = " + fam.getInternal().getQualityControlStatus());

// Some output to check
System.out.println("fam.getInternal().getQualityControlStatus() = " + fam.getInternal().getQualityControlStatus());
System.out.println("fam.getQualityControl() = " + fam.getQualityControl());
System.out.println("outDir = " + outDir);

// Restore
sampleUpdateParams = new SampleUpdateParams().setSomatic(true);
catalogManager.getSampleManager().update(STUDY, son, sampleUpdateParams, null, token);

// Asserts
assertEquals(fam.getInternal().getQualityControlStatus().getId(), QualityControlStatus.READY);
assertNotNull(fam.getQualityControl());
assertEquals(1, fam.getQualityControl().getRelatedness().size());
assertEquals(jobId, fam.getQualityControl().getRelatedness().get(0).getAttributes().get(OPENCGA_JOB_ID_ATTR));
}

@Test
Expand Down
Binary file not shown.

0 comments on commit 63baa43

Please sign in to comment.