Skip to content

Commit

Permalink
Merge branch 'release-6.x.x' into TASK-5318
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Sep 20, 2024
2 parents 6757b0e + 8f1e774 commit c910a88
Show file tree
Hide file tree
Showing 51 changed files with 1,557 additions and 505 deletions.
37 changes: 31 additions & 6 deletions .github/workflows/pull-request-approved.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,40 @@
name: Pull request approve workflow
run-name: 'Pull request approve workflow ${{ github.event.pull_request.head.ref }} -> ${{ github.event.pull_request.base.ref }} by @${{ github.actor }}'

on:
pull_request_review:
types: [ submitted ]

jobs:
build:
uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop
calculate-xetabase-branch:
name: Calculate Xetabase branch
runs-on: ubuntu-22.04
outputs:
xetabase_branch: ${{ steps.get_xetabase_branch.outputs.xetabase_branch }}
steps:
- name: Clone java-common-libs
uses: actions/checkout@v4
with:
fetch-depth: '10'
## This is important to avoid the error in the next step: "fatal: repository 'https://github.com/zetta-genomics/opencga-enterprise.git/' not found"
persist-credentials: false
- id: get_xetabase_branch
name: "Get current branch for Xetabase from target branch"
run: |
chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh
echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}"
echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}"
xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }})
echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY}
echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT
env:
ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}

test:
name: "Test analysis"
uses: ./.github/workflows/test-analysis.yml
needs: build
secrets: inherit
name: "Run all tests before merging"
needs: calculate-xetabase-branch
uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@develop
with:
branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }}
task: ${{ github.event.pull_request.head.ref }}
secrets: inherit
50 changes: 50 additions & 0 deletions .github/workflows/scripts/get-xetabase-branch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

# Function to calculate the corresponding branch of Xetabase project
get_xetabase_branch() {
# Input parameter (branch name)
input_branch="$1"

# If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it
if [[ $input_branch == TASK* ]]; then
if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then
echo $input_branch;
return 0;
fi
fi

# Check if the branch name is "develop" in that case return the same branch name
if [[ "$input_branch" == "develop" ]]; then
echo "develop"
return 0
fi

# Check if the branch name starts with "release-" and follows the patterns "release-a.x.x" or "release-a.b.x"
if [[ "$input_branch" =~ ^release-([0-9]+)\.x\.x$ ]] || [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]]; then
# Extract the MAJOR part of the branch name
MAJOR=${BASH_REMATCH[1]}
# Calculate the XETABASE_MAJOR by subtracting 4 from MAJOR of cellbase
XETABASE_MAJOR=$((MAJOR - 4))
# Check if the XETABASE_MAJOR is negative
if (( XETABASE_MAJOR < 0 )); then
echo "Error: 'MAJOR' digit after subtraction results in a negative number."
return 1
fi
# Construct and echo the new branch name
echo "release-$XETABASE_MAJOR.${input_branch#release-$MAJOR.}"
return 0
fi

# If the branch name does not match any of the expected patterns
echo "Error: The branch name is not correct."
return 1
}

# Check if the script receives exactly one argument
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <branch-name>"
exit 1
fi

# Call the function with the input branch name
get_xetabase_branch "$1"
2 changes: 1 addition & 1 deletion cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ RUN cd /opt/ensembl && \
git clone https://github.com/Ensembl/ensembl-compara.git && \
git clone https://github.com/Ensembl/ensembl-io.git

ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts
ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase
8 changes: 4 additions & 4 deletions cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,10 @@ our $ENSEMBL_GENOMES_PORT = "4157";
our $ENSEMBL_GENOMES_USER = "anonymous";

## Vertebrates
our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38";
our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38";
our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38";
our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38";
our $HOMO_SAPIENS_CORE = "homo_sapiens_core_104_38";
our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_104_38";
our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_104_38";
our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_104_38";
#our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38";
#our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38";
#our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38";
Expand Down
2 changes: 1 addition & 1 deletion cellbase-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>6.2.1-SNAPSHOT</version>
<version>6.3.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.beust.jcommander.*;
import org.opencb.cellbase.app.cli.CliOptionsParser;
import org.opencb.cellbase.core.api.key.ApiKeyQuota;
import org.opencb.cellbase.lib.EtlCommons;

import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -74,6 +75,7 @@ public AdminCliOptionsParser() {
jCommander.addCommand("validate", validationCommandOptions);
}

@Override
public void parse(String[] args) throws ParameterException {
jCommander.parse(args);
}
Expand All @@ -87,9 +89,13 @@ public class DownloadCommandOptions {
@ParametersDelegate
public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;

@Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download: genome, gene, "
+ "variation, variation_functional_score, regulation, protein, conservation, "
+ "clinical_variants, repeats, svs, pubmed and 'all' to download everything", required = true, arity = 1)
@Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download:"
+ EtlCommons.GENOME_DATA + ", " + EtlCommons.GENE_DATA + ", " + EtlCommons.VARIATION_DATA + ", "
+ EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA + ", " + EtlCommons.MISSENSE_VARIATION_SCORE_DATA + ", "
+ EtlCommons.REGULATION_DATA + ", " + EtlCommons.PROTEIN_DATA + ", " + EtlCommons.CONSERVATION_DATA + ", "
+ EtlCommons.CLINICAL_VARIANTS_DATA + ", " + EtlCommons.REPEATS_DATA + ", " + EtlCommons.OBO_DATA + ", "
+ EtlCommons.PUBMED_DATA + ", " + EtlCommons.PHARMACOGENOMICS_DATA + "; and 'all' to download everything",
required = true, arity = 1)
public String data;

@Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import java.util.Collections;
import java.util.List;

import static org.opencb.cellbase.lib.EtlCommons.PHARMGKB_DATA;
import static org.opencb.cellbase.lib.EtlCommons.*;

/**
* Created by imedina on 03/02/15.
Expand Down Expand Up @@ -132,6 +132,9 @@ public void execute() {
case EtlCommons.REFSEQ_DATA:
parser = buildRefSeq();
break;
case EtlCommons.VARIATION_DATA:
parser = buildVariation();
break;
case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
parser = buildCadd();
break;
Expand Down Expand Up @@ -275,6 +278,21 @@ private CellBaseBuilder buildRefSeq() {
return new RefSeqGeneBuilder(refseqFolderPath, speciesConfiguration, serializer);
}

private CellBaseBuilder buildVariation() throws IOException {
Path downloadVariationPath = downloadFolder.resolve(VARIATION_DATA);
Path buildVariationPath = buildFolder.resolve(VARIATION_DATA);
if (!buildVariationPath.toFile().exists()) {
buildVariationPath.toFile().mkdirs();
}

CellBaseFileSerializer variationSerializer = new CellBaseJsonFileSerializer(buildVariationPath);

// Currently, only dbSNP data
Files.copy(downloadVariationPath.resolve(DBSNP_VERSION_FILENAME), buildVariationPath.resolve(DBSNP_VERSION_FILENAME),
StandardCopyOption.REPLACE_EXISTING);
return new VariationBuilder(downloadVariationPath, variationSerializer, configuration);
}

private CellBaseBuilder buildCadd() {
Path variationFunctionalScorePath = downloadFolder.resolve("variation_functional_score");
copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("caddVersion.json")));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ public void execute() {
case EtlCommons.GENE_DATA:
downloadFiles.addAll(downloader.downloadGene());
break;
// case EtlCommons.VARIATION_DATA:
// downloadManager.downloadVariation();
// break;
case EtlCommons.VARIATION_DATA:
downloadFiles.addAll(downloader.downloadVariation());
break;
case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
downloadFiles.addAll(downloader.downloadCaddScores());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import java.util.List;
import java.util.concurrent.ExecutionException;

import static org.opencb.cellbase.lib.EtlCommons.*;

/**
* Created by imedina on 03/02/15.
*/
Expand Down Expand Up @@ -372,30 +374,57 @@ private void checkParameters() throws CellBaseException {
private void loadVariationData() throws NoSuchMethodException, InterruptedException, ExecutionException,
InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
IOException, LoaderException, CellBaseException {
Path variationPath = input.resolve(VARIATION_DATA);
// First load data
// Common loading process from CellBase variation data models
if (field == null) {
DirectoryStream<Path> stream = Files.newDirectoryStream(input,
// Common loading process from CellBase variation data models
DirectoryStream<Path> stream = Files.newDirectoryStream(variationPath,
entry -> entry.getFileName().toString().startsWith("variation_chr"));

int numLoadings = 0;
for (Path entry : stream) {
logger.info("Loading file '{}'", entry);
loadRunner.load(input.resolve(entry.getFileName()), "variation", dataRelease);
loadRunner.load(variationPath.resolve(entry.getFileName()), "variation", dataRelease);
numLoadings++;
}

// Create index
createIndex("variation");

// Update release (collection and sources)
List<Path> sources = new ArrayList<>(Arrays.asList(
input.resolve("ensemblVariationVersion.json")
));
dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources);
if (numLoadings > 0) {
// Create index
createIndex("variation");

// Update release (collection and sources)
List<Path> sources = new ArrayList<>(Arrays.asList(
variationPath.resolve("ensemblVariationVersion.json")
));
dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources);
} else {
logger.info("Any variation file 'variation_chr...' found within folder '{}'", variationPath);
}
} else {
// Custom update required e.g. population freqs loading
logger.info("Loading file '{}'", variationPath);
loadRunner.load(variationPath, "variation", dataRelease, field, innerFields);
}

// Load dbSNP
Path dbSnpFilePath = variationPath.resolve(DBSNP_NAME + ".json.gz");
if (dbSnpFilePath.toFile().exists()) {
if (variationPath.resolve(DBSNP_VERSION_FILENAME).toFile().exists()) {
logger.info("Loading dbSNP file '{}'", dbSnpFilePath);
loadRunner.load(dbSnpFilePath, SNP_COLLECTION_NAME, dataRelease);

// Create index
createIndex(SNP_COLLECTION_NAME);

// Update release (collection and sources)
List<Path> sources = Collections.singletonList(variationPath.resolve(DBSNP_VERSION_FILENAME));
dataReleaseManager.update(dataRelease, SNP_COLLECTION_NAME, EtlCommons.VARIATION_DATA, sources);
} else {
logger.warn("In order to load the dbSNP file you need the version file {} within the folder '{}'", DBSNP_VERSION_FILENAME,
variationPath);
}
} else {
logger.info("Loading file '{}'", input);
loadRunner.load(input, "variation", dataRelease, field, innerFields);
logger.warn("Any dbSNP file found within the folder '{}'", variationPath);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cellbase-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>6.2.1-SNAPSHOT</version>
<version>6.3.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.opencb.cellbase.client.rest;

import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Snp;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.cellbase.client.config.ClientConfiguration;
Expand Down Expand Up @@ -236,6 +237,14 @@ public CellBaseDataResponse<String> getAllConsequenceTypes(Query query) throws I
return execute("consequenceTypes", query, new QueryOptions(), String.class);
}

public CellBaseDataResponse<Snp> searchSnp(Query query, QueryOptions options) throws IOException {
return execute("snp/search", query, options, Snp.class);
}

public CellBaseDataResponse<Snp> startsWithSnp(Query query, QueryOptions options) throws IOException {
return execute("snp/startsWith", query, options, Snp.class);
}

// public CellBaseDataResponse<String> getConsequenceTypeById(String id, QueryOptions options) throws IOException {
// return execute(id, "consequence_type", options, String.class);
// }
Expand Down
Loading

0 comments on commit c910a88

Please sign in to comment.