Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-6765 - Improve VCF Export performance from secondary sample index #2503

Open
wants to merge 22 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0eabaa0
storage: Add DataFields for INDELs and AltCoordinates #TASK-6765
j-coll Sep 4, 2024
5250927
storage: Add varint to store DataSchema entry length. #TASK-6765
j-coll Sep 4, 2024
d9a7bf1
storage: Skip data entries if equals to defaults. #TASK-6765
j-coll Sep 4, 2024
1a281da
storage: Replace string with binary file data encoding. #TASK-6765
j-coll Sep 9, 2024
add8038
storage: Add sampleIndexConfiguration.fileDataConfiguration. #TASK-6765
j-coll Sep 9, 2024
3bff019
storage: Fix junit tests. #TASK-6765
j-coll Sep 9, 2024
42fcd2b
storage: Add VariantQueryParam "source" #TASK-6765
j-coll Sep 9, 2024
7fac8bd
storage: Fix NPE building sample index. #TASK-6765
j-coll Sep 10, 2024
4037883
storage: Ensure SampleIndexConfiguration always exists. Add migration…
j-coll Sep 11, 2024
98be159
app: Fix migration EnsureSampleIndexConfigurationIsAlwaysDefined. #TA…
j-coll Sep 11, 2024
7529652
Merge branch 'develop' into TASK-6765
j-coll Sep 20, 2024
1f92650
storage: Fix compilation issues #TASK-6765
j-coll Sep 20, 2024
b02d45d
storage: Rename schema "entry" with "document". Create parent schema …
j-coll Oct 18, 2024
f5fd1aa
storage: Rename IndexFieldConfiguration with FieldConfiguration #TASK…
j-coll Oct 18, 2024
499b5e1
storage: Rename SampleVariantIndexEntry to SampleIndexVariant and Ann…
j-coll Oct 18, 2024
9526fe0
Merge branch 'develop' into TASK-6765
j-coll Oct 18, 2024
ec84b74
storage: Fix compilation issue. #TASK-6765
j-coll Oct 18, 2024
d7f17b1
storage: Rename "canUseThisExecutor" with "acceptsQuery" #TASK-6765
j-coll Oct 18, 2024
faaaad5
Merge branch 'develop' into TASK-6765
j-coll Nov 8, 2024
4f4ce7e
Push echo to debug pull-request-approve #TASK-6765
juanfeSanahuja Nov 11, 2024
d6109e7
storage: Fix tests. #TASK-6765
j-coll Nov 15, 2024
bf7f84e
Merge branch 'TASK-6765' of github.com:opencb/opencga into TASK-6765
j-coll Nov 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pull-request-approved.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}"
xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }})
echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY}
echo "xetabase_branch: ${xetabase_branch}"
echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT
env:
ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scripts/get-xetabase-branch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ get_xetabase_branch() {
# If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it
if [[ $input_branch == TASK* ]]; then
if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then
echo $input_branch;
echo "$input_branch";
return 0;
fi
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.opencb.opencga.core.models.sample.SamplePermissions;
import org.opencb.opencga.core.models.study.Study;
import org.opencb.opencga.core.models.user.UserFilter;
import org.opencb.opencga.core.models.variant.VariantQueryParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
Expand Down Expand Up @@ -89,70 +90,45 @@
*/
public class VariantCatalogQueryUtils extends CatalogUtils {

public static final String SAMPLE_ANNOTATION_DESC =
"Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith";
public static final QueryParam SAMPLE_ANNOTATION
= QueryParam.create("sampleAnnotation", SAMPLE_ANNOTATION_DESC, QueryParam.Type.TEXT_ARRAY);
public static final String PROJECT_DESC = ParamConstants.PROJECT_DESCRIPTION;
public static final QueryParam PROJECT = QueryParam.create(ParamConstants.PROJECT_PARAM, PROJECT_DESC, QueryParam.Type.TEXT_ARRAY);
= QueryParam.create("sampleAnnotation", VariantQueryParams.SAMPLE_ANNOTATION_DESC, QueryParam.Type.TEXT_ARRAY);
public static final QueryParam PROJECT = QueryParam.create(ParamConstants.PROJECT_PARAM, VariantQueryParams.PROJECT_DESC, QueryParam.Type.TEXT_ARRAY);

public static final String FAMILY_DESC = "Filter variants where any of the samples from the given family contains the variant "
+ "(HET or HOM_ALT)";
public static final QueryParam FAMILY =
QueryParam.create("family", FAMILY_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_MEMBERS_DESC = "Sub set of the members of a given family";
QueryParam.create("family", VariantQueryParams.FAMILY_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_MEMBERS =
QueryParam.create("familyMembers", FAMILY_MEMBERS_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_DISORDER_DESC = "Specify the disorder to use for the family segregation";
QueryParam.create("familyMembers", VariantQueryParams.FAMILY_MEMBERS_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_DISORDER =
QueryParam.create("familyDisorder", FAMILY_DISORDER_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_PROBAND_DESC = "Specify the proband child to use for the family segregation";
QueryParam.create("familyDisorder", VariantQueryParams.FAMILY_DISORDER_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_PROBAND =
QueryParam.create("familyProband", FAMILY_PROBAND_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_SEGREGATION_DESCR = "Filter by segregation mode from a given family. Accepted values: "
+ "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, "
+ "deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]";
QueryParam.create("familyProband", VariantQueryParams.FAMILY_PROBAND_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_SEGREGATION =
QueryParam.create("familySegregation", FAMILY_SEGREGATION_DESCR, QueryParam.Type.TEXT);
QueryParam.create("familySegregation", VariantQueryParams.FAMILY_SEGREGATION_DESCR, QueryParam.Type.TEXT);

public static final String SAVED_FILTER_DESCR = "Use a saved filter at User level";
public static final QueryParam SAVED_FILTER =
QueryParam.create("savedFilter", SAVED_FILTER_DESCR, QueryParam.Type.TEXT);
QueryParam.create("savedFilter", VariantQueryParams.SAVED_FILTER_DESCR, QueryParam.Type.TEXT);

@Deprecated
public static final QueryParam FAMILY_PHENOTYPE = FAMILY_DISORDER;
@Deprecated
public static final QueryParam MODE_OF_INHERITANCE = FAMILY_SEGREGATION;

public static final String PANEL_DESC = "Filter by genes from the given disease panel";
public static final QueryParam PANEL =
QueryParam.create("panel", PANEL_DESC, QueryParam.Type.TEXT);
public static final String PANEL_MOI_DESC = "Filter genes from specific panels that match certain mode of inheritance. " +
"Accepted values : "
+ "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, "
+ "deNovo, mendelianError, compoundHeterozygous ]";
QueryParam.create("panel", VariantQueryParams.PANEL_DESC, QueryParam.Type.TEXT);
public static final QueryParam PANEL_MODE_OF_INHERITANCE =
QueryParam.create("panelModeOfInheritance", PANEL_MOI_DESC
QueryParam.create("panelModeOfInheritance", VariantQueryParams.PANEL_MOI_DESC
, QueryParam.Type.TEXT);
public static final String PANEL_CONFIDENCE_DESC = "Filter genes from specific panels that match certain confidence. " +
"Accepted values : [ high, medium, low, rejected ]";
public static final QueryParam PANEL_CONFIDENCE =
QueryParam.create("panelConfidence", PANEL_CONFIDENCE_DESC, QueryParam.Type.TEXT);
QueryParam.create("panelConfidence", VariantQueryParams.PANEL_CONFIDENCE_DESC, QueryParam.Type.TEXT);

public static final String PANEL_INTERSECTION_DESC = "Intersect panel genes and regions with given "
+ "genes and regions from que input query. This will prevent returning variants from regions out of the panel.";
public static final QueryParam PANEL_INTERSECTION =
QueryParam.create("panelIntersection", PANEL_INTERSECTION_DESC, Type.BOOLEAN);
QueryParam.create("panelIntersection", VariantQueryParams.PANEL_INTERSECTION_DESC, Type.BOOLEAN);

public static final String PANEL_ROLE_IN_CANCER_DESC = "Filter genes from specific panels that match certain role in cancer. " +
"Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]";
public static final QueryParam PANEL_ROLE_IN_CANCER =
QueryParam.create("panelRoleInCancer", PANEL_ROLE_IN_CANCER_DESC, QueryParam.Type.TEXT);
QueryParam.create("panelRoleInCancer", VariantQueryParams.PANEL_ROLE_IN_CANCER_DESC, QueryParam.Type.TEXT);

public static final String PANEL_FEATURE_TYPE_DESC = "Filter elements from specific panels by type. " +
"Accepted values : [ gene, region, str, variant ]";
public static final QueryParam PANEL_FEATURE_TYPE =
QueryParam.create("panelFeatureType", PANEL_FEATURE_TYPE_DESC, QueryParam.Type.TEXT);
QueryParam.create("panelFeatureType", VariantQueryParams.PANEL_FEATURE_TYPE_DESC, QueryParam.Type.TEXT);

public static final List<QueryParam> VARIANT_CATALOG_QUERY_PARAMS = Arrays.asList(
SAMPLE_ANNOTATION,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -555,10 +555,16 @@ public OpenCGAResult<Job> configureSampleIndex(String studyStr, SampleIndexConfi
boolean skipRebuild, String token)
throws CatalogException, StorageEngineException {
return secureOperation("configure", studyStr, new ObjectMap(), token, engine -> {
String version = engine.getCellBaseUtils().getCellBaseClient().getClientConfiguration().getVersion();
sampleIndexConfiguration.validate(version);
String cellbaseVersion = engine.getCellBaseUtils().getVersionFromServer();
sampleIndexConfiguration.validate(cellbaseVersion);
String studyFqn = getStudyFqn(studyStr, token);
engine.getMetadataManager().addSampleIndexConfiguration(studyFqn, sampleIndexConfiguration, true);
int studyId;
if (!engine.getMetadataManager().studyExists(studyFqn)) {
studyId = engine.getMetadataManager().createStudy(studyFqn, cellbaseVersion).getId();
} else {
studyId = engine.getMetadataManager().getStudyId(studyFqn);
}
engine.getMetadataManager().addSampleIndexConfiguration(studyId, sampleIndexConfiguration, true);

catalogManager.getStudyManager()
.setVariantEngineConfigurationSampleIndex(studyStr, sampleIndexConfiguration, token);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import static org.opencb.opencga.core.api.FieldConstants.EXOMISER_CLINICAL_ANALYSIS_DESCRIPTION;
import static org.opencb.opencga.core.api.FieldConstants.EXOMISER_VERSION_DESCRIPTION;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*;
import static org.opencb.opencga.core.models.variant.VariantQueryParams.*;

@Parameters(commandNames = {"clinical"}, commandDescription = "Clinical analysis commands")
public class ClinicalCommandOptions {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,13 @@
import org.opencb.opencga.core.models.variant.AnnotationVariantQueryParams;
import org.opencb.opencga.core.models.variant.SampleVariantFilterParams;
import org.opencb.opencga.core.tools.variant.IndividualQcAnalysisExecutor;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils;
import org.opencb.oskar.analysis.variant.gwas.GwasConfiguration;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils.*;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.CohortVariantStatsCommandOptions.COHORT_VARIANT_STATS_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.FamilyIndexCommandOptions.FAMILY_INDEX_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.FamilyIndexCommandOptions.FAMILY_INDEX_COMMAND_DESCRIPTION;
Expand All @@ -73,6 +71,7 @@
import static org.opencb.opencga.core.api.FieldConstants.EXOMISER_SAMPLE_DESCRIPTION;
import static org.opencb.opencga.core.api.FieldConstants.EXOMISER_VERSION_DESCRIPTION;
import static org.opencb.opencga.core.api.ParamConstants.*;
import static org.opencb.opencga.core.models.variant.VariantQueryParams.*;
import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.AggregateCommandOptions.AGGREGATE_COMMAND;
import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.AggregateCommandOptions.AGGREGATE_COMMAND_DESCRIPTION;
import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.AggregateFamilyCommandOptions.AGGREGATE_FAMILY_COMMAND;
Expand All @@ -88,8 +87,6 @@
import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.GenericAnnotationSaveCommandOptions.ANNOTATION_SAVE_COMMAND_DESCRIPTION;
import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.VariantDeleteCommandOptions.VARIANT_DELETE_COMMAND;
import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.VariantDeleteCommandOptions.VARIANT_DELETE_COMMAND_DESCRIPTION;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.ANNOT_CLINICAL_DESCR;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.ANNOT_CLINICAL_SIGNIFICANCE_DESCR;

/**
* Created by pfurio on 23/11/16.
Expand Down Expand Up @@ -268,7 +265,7 @@ public class VariantSecondaryIndexCommandOptions extends GeneralCliOptions.Study
@Parameter(names = {"-p", "--project"}, description = "Project to index.", arity = 1)
public String project;

@Parameter(names = {"-r", "--region"}, description = VariantQueryParam.REGION_DESCR)
@Parameter(names = {"-r", "--region"}, description = REGION_DESCR)
public String region;

@Parameter(names = {"--sample"}, description = "Samples to index."
Expand Down Expand Up @@ -878,25 +875,25 @@ public SampleVariantFilterParams setMaxVariants(int maxVariants) {
return super.setMaxVariants(maxVariants);
}

@Parameter(names = {"--id"}, description = VariantQueryParam.ID_DESCR)
@Parameter(names = {"--id"}, description = ID_DESCR)
@Override
public AnnotationVariantQueryParams setId(String id) {
return super.setId(id);
}

@Parameter(names = {"--region"}, description = VariantQueryParam.REGION_DESCR)
@Parameter(names = {"--region"}, description = REGION_DESCR)
@Override
public AnnotationVariantQueryParams setRegion(String region) {
return super.setRegion(region);
}

@Parameter(names = {"--gene"}, description = VariantQueryParam.GENE_DESCR)
@Parameter(names = {"--gene"}, description = GENE_DESCR)
@Override
public AnnotationVariantQueryParams setGene(String gene) {
return super.setGene(gene);
}

@Parameter(names = {"--type"}, description = VariantQueryParam.TYPE_DESCR)
@Parameter(names = {"--type"}, description = TYPE_DESCR)
@Override
public AnnotationVariantQueryParams setType(String type) {
return super.setType(type);
Expand Down Expand Up @@ -926,79 +923,79 @@ public AnnotationVariantQueryParams setPanelRoleInCancer(String panelRoleInCance
return super.setPanelRoleInCancer(panelRoleInCancer);
}

@Parameter(names = {"--cohort-stats-ref"}, description = VariantQueryParam.STATS_REF_DESCR)
@Parameter(names = {"--cohort-stats-ref"}, description = STATS_REF_DESCR)
@Override
public AnnotationVariantQueryParams setCohortStatsRef(String cohortStatsRef) {
return super.setCohortStatsRef(cohortStatsRef);
}

@Parameter(names = {"--cohort-stats-alt"}, description = VariantQueryParam.STATS_ALT_DESCR)
@Parameter(names = {"--cohort-stats-alt"}, description = STATS_ALT_DESCR)
@Override
public AnnotationVariantQueryParams setCohortStatsAlt(String cohortStatsAlt) {
return super.setCohortStatsAlt(cohortStatsAlt);
}

@Parameter(names = {"--cohort-stats-maf"}, description = VariantQueryParam.STATS_MAF_DESCR)
@Parameter(names = {"--cohort-stats-maf"}, description = STATS_MAF_DESCR)
@Override
public AnnotationVariantQueryParams setCohortStatsMaf(String cohortStatsMaf) {
return super.setCohortStatsMaf(cohortStatsMaf);
}

@Parameter(names = {"--ct", "--consequence-type"}, description = VariantQueryParam.ANNOT_CONSEQUENCE_TYPE_DESCR)
@Parameter(names = {"--ct", "--consequence-type"}, description = ANNOT_CONSEQUENCE_TYPE_DESCR)
@Override
public AnnotationVariantQueryParams setCt(String ct) {
return super.setCt(ct);
}

@Parameter(names = {"--xref"}, description = VariantQueryParam.ANNOT_XREF_DESCR)
@Parameter(names = {"--xref"}, description = ANNOT_XREF_DESCR)
@Override
public AnnotationVariantQueryParams setXref(String xref) {
return super.setXref(xref);
}

@Parameter(names = {"--biotype"}, description = VariantQueryParam.ANNOT_BIOTYPE_DESCR)
@Parameter(names = {"--biotype"}, description = ANNOT_BIOTYPE_DESCR)
@Override
public AnnotationVariantQueryParams setBiotype(String biotype) {
return super.setBiotype(biotype);
}

@Parameter(names = {"--protein-substitution"}, description = VariantQueryParam.ANNOT_PROTEIN_SUBSTITUTION_DESCR)
@Parameter(names = {"--protein-substitution"}, description = ANNOT_PROTEIN_SUBSTITUTION_DESCR)
@Override
public AnnotationVariantQueryParams setProteinSubstitution(String proteinSubstitution) {
return super.setProteinSubstitution(proteinSubstitution);
}

@Parameter(names = {"--conservation"}, description = VariantQueryParam.ANNOT_CONSERVATION_DESCR)
@Parameter(names = {"--conservation"}, description = ANNOT_CONSERVATION_DESCR)
@Override
public AnnotationVariantQueryParams setConservation(String conservation) {
return super.setConservation(conservation);
}

@Parameter(names = {"--population-frequency-maf"}, description = VariantQueryParam.ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR)
@Parameter(names = {"--population-frequency-maf"}, description = ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR)
@Override
public AnnotationVariantQueryParams setPopulationFrequencyMaf(String populationFrequencyMaf) {
return super.setPopulationFrequencyMaf(populationFrequencyMaf);
}

@Parameter(names = {"--population-frequency-alt"}, description = VariantQueryParam.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR)
@Parameter(names = {"--population-frequency-alt"}, description = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR)
@Override
public AnnotationVariantQueryParams setPopulationFrequencyAlt(String populationFrequencyAlt) {
return super.setPopulationFrequencyAlt(populationFrequencyAlt);
}

@Parameter(names = {"--population-frequency-ref"}, description = VariantQueryParam.ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR)
@Parameter(names = {"--population-frequency-ref"}, description = ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR)
@Override
public AnnotationVariantQueryParams setPopulationFrequencyRef(String populationFrequencyRef) {
return super.setPopulationFrequencyRef(populationFrequencyRef);
}

@Parameter(names = {"--transcript-flag"}, description = VariantQueryParam.ANNOT_TRANSCRIPT_FLAG_DESCR)
@Parameter(names = {"--transcript-flag"}, description = ANNOT_TRANSCRIPT_FLAG_DESCR)
@Override
public AnnotationVariantQueryParams setTranscriptFlag(String transcriptFlag) {
return super.setTranscriptFlag(transcriptFlag);
}

@Parameter(names = {"--functional-score"}, description = VariantQueryParam.ANNOT_FUNCTIONAL_SCORE_DESCR)
@Parameter(names = {"--functional-score"}, description = ANNOT_FUNCTIONAL_SCORE_DESCR)
@Override
public AnnotationVariantQueryParams setFunctionalScore(String functionalScore) {
return super.setFunctionalScore(functionalScore);
Expand Down Expand Up @@ -1268,10 +1265,10 @@ public class KnockoutCommandOptions {
+ "By default filters by loss of function + missense_variant consequence types.")
public String consequenceType;

@Parameter(names = {"--filter"}, description = VariantQueryParam.FILTER_DESCR)
@Parameter(names = {"--filter"}, description = FILTER_DESCR)
public String filter;

@Parameter(names = {"--qual"}, description = VariantQueryParam.QUAL_DESCR)
@Parameter(names = {"--qual"}, description = QUAL_DESCR)
public String qual;

@Parameter(names = {"--skip-genes-file"}, description = "Do not generate the results file by gene")
Expand Down
Loading
Loading