Skip to content

Commit

Permalink
Merge branch 'release-6.x.x' into TASK-6565
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Sep 20, 2024
2 parents f86423c + 8f1e774 commit fdf6520
Show file tree
Hide file tree
Showing 21 changed files with 173 additions and 170 deletions.
37 changes: 31 additions & 6 deletions .github/workflows/pull-request-approved.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,40 @@
name: Pull request approve workflow
run-name: 'Pull request approve workflow ${{ github.event.pull_request.head.ref }} -> ${{ github.event.pull_request.base.ref }} by @${{ github.actor }}'

on:
pull_request_review:
types: [ submitted ]

jobs:
build:
uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop
calculate-xetabase-branch:
name: Calculate Xetabase branch
runs-on: ubuntu-22.04
outputs:
xetabase_branch: ${{ steps.get_xetabase_branch.outputs.xetabase_branch }}
steps:
- name: Clone java-common-libs
uses: actions/checkout@v4
with:
fetch-depth: '10'
## This is important to avoid the error in the next step: "fatal: repository 'https://github.com/zetta-genomics/opencga-enterprise.git/' not found"
persist-credentials: false
- id: get_xetabase_branch
name: "Get current branch for Xetabase from target branch"
run: |
chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh
echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}"
echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}"
xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }})
echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY}
echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT
env:
ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}

test:
name: "Test analysis"
uses: ./.github/workflows/test-analysis.yml
needs: build
secrets: inherit
name: "Run all tests before merging"
needs: calculate-xetabase-branch
uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@develop
with:
branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }}
task: ${{ github.event.pull_request.head.ref }}
secrets: inherit
50 changes: 50 additions & 0 deletions .github/workflows/scripts/get-xetabase-branch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

# Function to calculate the corresponding branch of Xetabase project
get_xetabase_branch() {
# Input parameter (branch name)
input_branch="$1"

# If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it
if [[ $input_branch == TASK* ]]; then
if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then
echo $input_branch;
return 0;
fi
fi

# Check if the branch name is "develop" in that case return the same branch name
if [[ "$input_branch" == "develop" ]]; then
echo "develop"
return 0
fi

# Check if the branch name starts with "release-" and follows the patterns "release-a.x.x" or "release-a.b.x"
if [[ "$input_branch" =~ ^release-([0-9]+)\.x\.x$ ]] || [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]]; then
# Extract the MAJOR part of the branch name
MAJOR=${BASH_REMATCH[1]}
# Calculate the XETABASE_MAJOR by subtracting 4 from MAJOR of cellbase
XETABASE_MAJOR=$((MAJOR - 4))
# Check if the XETABASE_MAJOR is negative
if (( XETABASE_MAJOR < 0 )); then
echo "Error: 'MAJOR' digit after subtraction results in a negative number."
return 1
fi
# Construct and echo the new branch name
echo "release-$XETABASE_MAJOR.${input_branch#release-$MAJOR.}"
return 0
fi

# If the branch name does not match any of the expected patterns
echo "Error: The branch name is not correct."
return 1
}

# Check if the script receives exactly one argument
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <branch-name>"
exit 1
fi

# Call the function with the input branch name
get_xetabase_branch "$1"
2 changes: 1 addition & 1 deletion cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ RUN cd /opt/ensembl && \
git clone https://github.com/Ensembl/ensembl-compara.git && \
git clone https://github.com/Ensembl/ensembl-io.git

ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts
ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase
8 changes: 4 additions & 4 deletions cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,10 @@ our $ENSEMBL_GENOMES_PORT = "4157";
our $ENSEMBL_GENOMES_USER = "anonymous";

## Vertebrates
our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38";
our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38";
our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38";
our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38";
our $HOMO_SAPIENS_CORE = "homo_sapiens_core_104_38";
our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_104_38";
our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_104_38";
our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_104_38";
#our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38";
#our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38";
#our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38";
Expand Down
2 changes: 1 addition & 1 deletion cellbase-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>6.2.1-SNAPSHOT</version>
<version>6.3.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion cellbase-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>6.2.1-SNAPSHOT</version>
<version>6.3.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion cellbase-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>6.2.1-SNAPSHOT</version>
<version>6.3.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ public class DownloadProperties {
private EnsemblProperties ensembl;
private EnsemblProperties ensemblGenomes;
private URLProperties hgnc;
private URLProperties cancerHotspot;
private URLProperties refSeq;
private URLProperties refSeqFasta;
private URLProperties refSeqProteinFasta;
Expand Down Expand Up @@ -72,7 +71,6 @@ public class DownloadProperties {
private URLProperties hpoObo;
private URLProperties goObo;
private URLProperties doidObo;
private URLProperties mondoObo;
private URLProperties goAnnotation;
private URLProperties revel;
private URLProperties pubmed;
Expand Down Expand Up @@ -529,24 +527,6 @@ public DownloadProperties setHgnc(URLProperties hgnc) {
return this;
}

public URLProperties getCancerHotspot() {
return cancerHotspot;
}

public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) {
this.cancerHotspot = cancerHotspot;
return this;
}

public URLProperties getMondoObo() {
return mondoObo;
}

public DownloadProperties setMondoObo(URLProperties mondoObo) {
this.mondoObo = mondoObo;
return this;
}

public static class EnsemblProperties {

private DatabaseCredentials database;
Expand Down
83 changes: 37 additions & 46 deletions cellbase-core/src/main/resources/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,7 @@ download:
url:
host: ftp://ftp.ensemblgenomes.org/pub
hgnc:
host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt
version: 2023-11-01
cancerHotspot:
host: https://www.cancerhotspots.org/files/hotspots_v2.xls
version: "v2"
host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2022-01-01.txt
refSeq:
host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
refSeqFasta:
Expand All @@ -77,15 +73,12 @@ download:
host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
maneSelect:
# host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_0.93/MANE.GRCh38.v0.93.summary.txt.gz
# host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz
host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.1/MANE.GRCh38.v1.1.summary.txt.gz
version: "1.1"
host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz
version: 0.93
lrg:
host: http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt
version: "2021-03-30"
geneUniprotXref:
host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
version: "2023-11-08"
geneExpressionAtlas:
host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
mirbase:
Expand All @@ -95,35 +88,31 @@ download:
targetScan:
host: http://hgdownload.cse.ucsc.edu/goldenPath/
miRTarBase:
host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx
version: "9.0"

## Protein Data
host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/8.0/hsa_MTI.xlsx
uniprot:
host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
version: "2023-11-08"
host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
uniprotRelNotes:
host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt
version: "2023-11-08"
host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt
intact:
host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt
interpro:
host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz
version: "2023-11-08"
host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz
interproRelNotes:
host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt
intact:
host: https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt
version: "2023-10-07"

## Conservation Scores
host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt
conservation:
host: https://hgdownload.cse.ucsc.edu/goldenPath/
version: "2022-08-30"
gerp:
host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
version: "2023-05-17"
host: http://ftp.ensembl.org/pub/release-104/compara/conservation_scores/90_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
clinvar:
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz
<<<<<<< HEAD
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz
clinvarVariation:
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz
=======
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/RCV_xml_old_format/ClinVarFullRelease_2024-05.xml.gz
version: 2024-05
Expand All @@ -133,12 +122,14 @@ download:
# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz
host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/VCV_xml_old_format/ClinVarVariationRelease_2024-05.xml.gz
version: 2024-05
<<<<<<< HEAD
=======
>>>>>>> release-6.2.x
>>>>>>> release-6.x.x
clinvarSummary:
host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
version: "2023-12-01"
clinvarVariationAllele:
host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz
version: "2023-12-01"
clinvarEfoTerms:
host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
dbSNP:
Expand All @@ -159,43 +150,43 @@ download:
genomicSuperDups:
host: http://hgdownload.cse.ucsc.edu/goldenPath
gwasCatalog:
<<<<<<< HEAD
=======
<<<<<<< HEAD
host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv
version: "1.0.2 associations_e106_r2022-05-17"
=======
>>>>>>> release-6.x.x
#host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv
host: "https://ftp.ebi.ac.uk/pub/databases/gwas/releases/2024/05/20/gwas-catalog-associations_ontology-annotated.tsv"
#version: "1.0.2 associations_e106_r2022-05-17"
version: "2024-05-20"
<<<<<<< HEAD
=======
>>>>>>> release-6.2.x
>>>>>>> release-6.x.x
hpo:
## Downlaod manually from here now: https://hpo.jax.org/app/data/annotations
host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt
disgenet:
host: https://www.disgenet.org/static/disgenet_ap1/files/downloads
files:
- all_gene_disease_associations.tsv.gz
- readme.txt
dgidb:
host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv
version: "2022-02-01"
host: https://dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv
cadd:
## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP!
# host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz
host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz
version: "1.7-pre"
host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz
reactome:
host: http://www.reactome.org/download/current/biopax.zip
gnomadConstraints:
host: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
version: "2.1.1"
version: 2.1.1
hpoObo:
host: http://purl.obolibrary.org/obo/hp.obo
version: "2023-12-01"
goObo:
host: http://purl.obolibrary.org/obo/go/go-basic.obo
version: "2023-12-01"
doidObo:
host: http://purl.obolibrary.org/obo/doid.obo
version: "2023-12-01"
mondoObo:
host: http://purl.obolibrary.org/obo/mondo.obo
version: "2023-12-01"
goAnnotation:
host: http://geneontology.org/gene-associations/goa_human.gaf.gz
revel:
Expand All @@ -222,7 +213,7 @@ species:
- id: hsapiens
scientificName: Homo sapiens
assemblies:
- ensemblVersion: '110_38'
- ensemblVersion: '104_38'
name: GRCh38
- ensemblVersion: '82_37'
name: GRCh37
Expand Down
6 changes: 3 additions & 3 deletions cellbase-lib/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>6.2.1-SNAPSHOT</version>
<version>6.3.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down Expand Up @@ -137,10 +137,10 @@
<groupId>com.github.samtools</groupId>
<artifactId>htsjdk</artifactId>
</dependency>
<!-- <dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</dependency>-->
</dependency>
<dependency>
<groupId>io.jsonwebtoken</groupId>
<artifactId>jjwt-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ public class EtlCommons {
public static final String HPO_FILE = "hp.obo";
public static final String GO_FILE = "go-basic.obo";
public static final String DOID_FILE = "doid.obo";
public static final String MONDO_FILE = "mondo.obo";
public static final String PFM_DATA = "regulatory_pfm";

public static final String REGULATORY_REGION_DATA = "regulatory_region";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, Species
boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException {
this(null, geneDirectoryPath.resolve("description.txt"),
geneDirectoryPath.resolve("xrefs.txt"),
geneDirectoryPath.resolve("hgnc_complete_set_2023-11-01.txt"),
geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"),
geneDirectoryPath.resolve("hgnc_complete_set_2022-01-01.txt"),
geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz"),
geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"),
geneDirectoryPath.resolve("idmapping_selected.tab.gz"),
geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"),
Expand Down
Loading

0 comments on commit fdf6520

Please sign in to comment.