Skip to content

Commit

Permalink
app: update exporter for protein substitution predictions (sift, poly…
Browse files Browse the repository at this point in the history
…phen, revel and alphamissense), #TASK-5464, #TASK-5388
  • Loading branch information
jtarraga committed Jan 12, 2024
1 parent 4167282 commit bbabc8d
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.beust.jcommander.*;
import org.opencb.cellbase.app.cli.CliOptionsParser;
import org.opencb.cellbase.core.api.key.ApiKeyQuota;
import org.opencb.cellbase.lib.EtlCommons;

import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -238,8 +239,8 @@ public class ExportCommandOptions {
public CommonCommandOptions commonOptions = commonCommandOptions;

@Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, "
+ "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' "
+ " loads everything", required = true, arity = 1)
+ EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA + ", conservation, regulation, protein, clinical_variants, repeats,"
+ " regulatory_pfm, splice_score, pubmed. 'all' export everything", required = true, arity = 1)
public String data;

@Parameter(names = {"--db", "--database"}, description = "Database name, e.g., cellbase_hsapiens_grch38_v5", required = true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ public ExportCommandExecutor(AdminCliOptionsParser.ExportCommandOptions exportCo
if (exportCommandOptions.data.equals("all")) {
this.dataToExport = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA,
EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
PROTEIN_SUBSTITUTION_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA,
OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PHARMACOGENOMICS_DATA};
OBO_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PHARMACOGENOMICS_DATA};
} else {
this.dataToExport = exportCommandOptions.data.split(",");
}
Expand Down Expand Up @@ -200,38 +200,6 @@ public void execute() throws CellBaseException {
counterMsg = counter + " CADD items";
break;
}
case EtlCommons.MISSENSE_VARIATION_SCORE_DATA: {
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output);
ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly);
Map<String, List<Integer>> positionMap = new HashMap<>();
for (Variant variant : variants) {
if (!positionMap.containsKey(variant.getChromosome())) {
positionMap.put(variant.getChromosome(), new ArrayList<>());
}
positionMap.get(variant.getChromosome()).add(variant.getStart());
if (positionMap.get(variant.getChromosome()).size() >= 200) {
CellBaseDataResult<MissenseVariantFunctionalScore> results = proteinManager
.getMissenseVariantFunctionalScores(variant.getChromosome(),
positionMap.get(variant.getChromosome()), null, dataRelease);
counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer);
positionMap.put(variant.getChromosome(), new ArrayList<>());
}
}

// Process map
for (Map.Entry<String, List<Integer>> entry : positionMap.entrySet()) {
if (CollectionUtils.isEmpty(entry.getValue())) {
continue;
}
CellBaseDataResult<MissenseVariantFunctionalScore> results = proteinManager
.getMissenseVariantFunctionalScores(entry.getKey(), entry.getValue(), null, dataRelease);
counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer);
}
serializer.close();

counterMsg = counter + " missense variation functional scores";
break;
}
case EtlCommons.CONSERVATION_DATA: {
// Export data
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output);
Expand Down Expand Up @@ -271,7 +239,7 @@ public void execute() throws CellBaseException {
counterMsg = counter + " proteins";
break;
}
case EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA: {
case EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA: {
ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly);
Map<String, List<String>> transcriptsMap = new HashMap<>();
for (Gene gene : genes) {
Expand All @@ -290,7 +258,7 @@ public void execute() throws CellBaseException {
}
serializer.close();

counterMsg = counter + " protein functional predictions";
counterMsg = counter + " protein substitution predictions";
break;
}
case EtlCommons.CLINICAL_VARIANTS_DATA: {
Expand Down

0 comments on commit bbabc8d

Please sign in to comment.