diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index eb410c9cba..ba378d3abb 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -1,15 +1,40 @@ name: Pull request approve workflow +run-name: 'Pull request approve workflow ${{ github.event.pull_request.head.ref }} -> ${{ github.event.pull_request.base.ref }} by @${{ github.actor }}' on: pull_request_review: types: [ submitted ] jobs: - build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + calculate-xetabase-branch: + name: Calculate Xetabase branch + runs-on: ubuntu-22.04 + outputs: + xetabase_branch: ${{ steps.get_xetabase_branch.outputs.xetabase_branch }} + steps: + - name: Clone java-common-libs + uses: actions/checkout@v4 + with: + fetch-depth: '10' + ## This is important to avoid the error in the next step: "fatal: repository 'https://github.com/zetta-genomics/opencga-enterprise.git/' not found" + persist-credentials: false + - id: get_xetabase_branch + name: "Get current branch for Xetabase from target branch" + run: | + chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh + echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" + echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} + echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT + env: + ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }} test: - name: "Test analysis" - uses: ./.github/workflows/test-analysis.yml - needs: build - secrets: inherit + name: "Run all tests before merging" + needs: calculate-xetabase-branch + uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@develop + with: + branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }} + task: ${{ github.event.pull_request.head.ref }} + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/scripts/get-xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh new file mode 100644 index 0000000000..fd9626a79a --- /dev/null +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Function to calculate the corresponding branch of Xetabase project +get_xetabase_branch() { + # Input parameter (branch name) + input_branch="$1" + + # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it + if [[ $input_branch == TASK* ]]; then + if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then + echo $input_branch; + return 0; + fi + fi + + # Check if the branch name is "develop" in that case return the same branch name + if [[ "$input_branch" == "develop" ]]; then + echo "develop" + return 0 + fi + + # Check if the branch name starts with "release-" and follows the patterns "release-a.x.x" or "release-a.b.x" + if [[ "$input_branch" =~ ^release-([0-9]+)\.x\.x$ ]] || [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]]; then + # Extract the MAJOR part of the branch name + MAJOR=${BASH_REMATCH[1]} + # Calculate the XETABASE_MAJOR by subtracting 4 from MAJOR of cellbase + XETABASE_MAJOR=$((MAJOR - 4)) + # Check if the XETABASE_MAJOR is negative + if (( XETABASE_MAJOR < 0 )); then + echo "Error: 'MAJOR' digit after subtraction results in a negative number." + return 1 + fi + # Construct and echo the new branch name + echo "release-$XETABASE_MAJOR.${input_branch#release-$MAJOR.}" + return 0 + fi + + # If the branch name does not match any of the expected patterns + echo "Error: The branch name is not correct." + return 1 +} + +# Check if the script receives exactly one argument +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Call the function with the input branch name +get_xetabase_branch "$1" diff --git a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile index 6e1657d1bf..17d5accff4 100644 --- a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile +++ b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile @@ -28,4 +28,4 @@ RUN cd /opt/ensembl && \ git clone https://github.com/Ensembl/ensembl-compara.git && \ git clone https://github.com/Ensembl/ensembl-io.git -ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts +ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase diff --git a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm index 70865465e9..aa22cf10b1 100755 --- a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm +++ b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm @@ -134,10 +134,10 @@ our $ENSEMBL_GENOMES_PORT = "4157"; our $ENSEMBL_GENOMES_USER = "anonymous"; ## Vertebrates -our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38"; -our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38"; -our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38"; -our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38"; +our $HOMO_SAPIENS_CORE = "homo_sapiens_core_104_38"; +our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_104_38"; +our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_104_38"; +our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_104_38"; #our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38"; #our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38"; #our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38"; diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 441dc47bff..aed90e9897 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.1-SNAPSHOT + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index a843ef0685..7424c21bbb 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.1-SNAPSHOT + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index db167df9bb..7c74e13d92 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.1-SNAPSHOT + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java index 507e85a75f..9a097fd202 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java @@ -26,7 +26,6 @@ public class DownloadProperties { private EnsemblProperties ensembl; private EnsemblProperties ensemblGenomes; private URLProperties hgnc; - private URLProperties cancerHotspot; private URLProperties refSeq; private URLProperties refSeqFasta; private URLProperties refSeqProteinFasta; @@ -72,7 +71,6 @@ public class DownloadProperties { private URLProperties hpoObo; private URLProperties goObo; private URLProperties doidObo; - private URLProperties mondoObo; private URLProperties goAnnotation; private URLProperties revel; private URLProperties pubmed; @@ -529,24 +527,6 @@ public DownloadProperties setHgnc(URLProperties hgnc) { return this; } - public URLProperties getCancerHotspot() { - return cancerHotspot; - } - - public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) { - this.cancerHotspot = cancerHotspot; - return this; - } - - public URLProperties getMondoObo() { - return mondoObo; - } - - public DownloadProperties setMondoObo(URLProperties mondoObo) { - this.mondoObo = mondoObo; - return this; - } - public static class EnsemblProperties { private DatabaseCredentials database; diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 7a6605dacd..ea500a24a0 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -62,11 +62,7 @@ download: url: host: ftp://ftp.ensemblgenomes.org/pub hgnc: - host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt - version: 2023-11-01 - cancerHotspot: - host: https://www.cancerhotspots.org/files/hotspots_v2.xls - version: "v2" + host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2022-01-01.txt refSeq: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz refSeqFasta: @@ -77,15 +73,12 @@ download: host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz maneSelect: # host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_0.93/MANE.GRCh38.v0.93.summary.txt.gz -# host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz - host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.1/MANE.GRCh38.v1.1.summary.txt.gz - version: "1.1" + host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz + version: 0.93 lrg: host: http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt - version: "2021-03-30" geneUniprotXref: host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ - version: "2023-11-08" geneExpressionAtlas: host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz mirbase: @@ -95,35 +88,31 @@ download: targetScan: host: http://hgdownload.cse.ucsc.edu/goldenPath/ miRTarBase: - host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx - version: "9.0" - - ## Protein Data + host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/8.0/hsa_MTI.xlsx uniprot: - host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz - version: "2023-11-08" + host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz uniprotRelNotes: - host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt - version: "2023-11-08" + host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt + intact: + host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt interpro: - host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz - version: "2023-11-08" + host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz interproRelNotes: - host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt - intact: - host: https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt - version: "2023-10-07" - - ## Conservation Scores + host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt conservation: host: https://hgdownload.cse.ucsc.edu/goldenPath/ - version: "2022-08-30" gerp: - host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw - version: "2023-05-17" + host: http://ftp.ensembl.org/pub/release-104/compara/conservation_scores/90_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw clinvar: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz +<<<<<<< HEAD + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz + clinvarVariation: +# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz +# host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz + host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz +======= # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/RCV_xml_old_format/ClinVarFullRelease_2024-05.xml.gz version: 2024-05 @@ -133,12 +122,14 @@ download: # host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/VCV_xml_old_format/ClinVarVariationRelease_2024-05.xml.gz version: 2024-05 +<<<<<<< HEAD +======= +>>>>>>> release-6.2.x +>>>>>>> release-6.x.x clinvarSummary: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz - version: "2023-12-01" clinvarVariationAllele: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz - version: "2023-12-01" clinvarEfoTerms: host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv dbSNP: @@ -159,12 +150,22 @@ download: genomicSuperDups: host: http://hgdownload.cse.ucsc.edu/goldenPath gwasCatalog: +<<<<<<< HEAD +======= +<<<<<<< HEAD + host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv + version: "1.0.2 associations_e106_r2022-05-17" +======= +>>>>>>> release-6.x.x #host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv host: "https://ftp.ebi.ac.uk/pub/databases/gwas/releases/2024/05/20/gwas-catalog-associations_ontology-annotated.tsv" #version: "1.0.2 associations_e106_r2022-05-17" version: "2024-05-20" +<<<<<<< HEAD +======= +>>>>>>> release-6.2.x +>>>>>>> release-6.x.x hpo: - ## Downlaod manually from here now: https://hpo.jax.org/app/data/annotations host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt disgenet: host: https://www.disgenet.org/static/disgenet_ap1/files/downloads @@ -172,30 +173,20 @@ download: - all_gene_disease_associations.tsv.gz - readme.txt dgidb: - host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv - version: "2022-02-01" + host: https://dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv cadd: - ## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP! -# host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz - host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz - version: "1.7-pre" + host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz reactome: host: http://www.reactome.org/download/current/biopax.zip gnomadConstraints: host: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz - version: "2.1.1" + version: 2.1.1 hpoObo: host: http://purl.obolibrary.org/obo/hp.obo - version: "2023-12-01" goObo: host: http://purl.obolibrary.org/obo/go/go-basic.obo - version: "2023-12-01" doidObo: host: http://purl.obolibrary.org/obo/doid.obo - version: "2023-12-01" - mondoObo: - host: http://purl.obolibrary.org/obo/mondo.obo - version: "2023-12-01" goAnnotation: host: http://geneontology.org/gene-associations/goa_human.gaf.gz revel: @@ -222,7 +213,7 @@ species: - id: hsapiens scientificName: Homo sapiens assemblies: - - ensemblVersion: '110_38' + - ensemblVersion: '104_38' name: GRCh38 - ensemblVersion: '82_37' name: GRCh37 diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 28ccca9267..a181ccf4a9 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.1-SNAPSHOT + 6.3.0-SNAPSHOT ../pom.xml @@ -137,10 +137,10 @@ com.github.samtools htsjdk - + io.jsonwebtoken jjwt-api diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 02c1d72697..c2ded07a26 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -78,7 +78,6 @@ public class EtlCommons { public static final String HPO_FILE = "hp.obo"; public static final String GO_FILE = "go-basic.obo"; public static final String DOID_FILE = "doid.obo"; - public static final String MONDO_FILE = "mondo.obo"; public static final String PFM_DATA = "regulatory_pfm"; public static final String REGULATORY_REGION_DATA = "regulatory_region"; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java index cd0863a259..563f76dea7 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java @@ -90,8 +90,8 @@ public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, Species boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException { this(null, geneDirectoryPath.resolve("description.txt"), geneDirectoryPath.resolve("xrefs.txt"), - geneDirectoryPath.resolve("hgnc_complete_set_2023-11-01.txt"), - geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"), + geneDirectoryPath.resolve("hgnc_complete_set_2022-01-01.txt"), + geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz"), geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"), geneDirectoryPath.resolve("idmapping_selected.tab.gz"), geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"), diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java index 1eabf8975a..8873dd7f93 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java @@ -32,14 +32,12 @@ public class OntologyBuilder extends CellBaseBuilder { private Path hpoFile; private Path goFile; private Path doidFile; - private Path mondoFile; public OntologyBuilder(Path oboDirectoryPath, CellBaseSerializer serializer) { super(serializer); hpoFile = oboDirectoryPath.resolve(EtlCommons.HPO_FILE); goFile = oboDirectoryPath.resolve(EtlCommons.GO_FILE); doidFile = oboDirectoryPath.resolve(EtlCommons.DOID_FILE); - mondoFile = oboDirectoryPath.resolve(EtlCommons.MONDO_FILE); } @Override @@ -66,13 +64,6 @@ public void parse() throws Exception { serializer.serialize(term); } - bufferedReader = FileUtils.newBufferedReader(mondoFile); - terms = parser.parseOBO(bufferedReader, "Mondo Ontology"); - for (OntologyTerm term : terms) { - term.setSource("MONDO"); - serializer.serialize(term); - } - serializer.close(); } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java index 260ff75427..9d2685eadf 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java @@ -82,8 +82,6 @@ public List download() throws IOException, InterruptedException { downloadFiles.addAll(downloadRefSeq(refseqFolder)); downloadFiles.add(downloadMane(geneFolder)); downloadFiles.add(downloadLrg(geneFolder)); - downloadFiles.add(downloadHgnc(geneFolder)); - downloadFiles.add(downloadCancerHotspot(geneFolder)); downloadFiles.add(downloadDrugData(geneFolder)); downloadFiles.addAll(downloadGeneUniprotXref(geneFolder)); downloadFiles.add(downloadGeneExpressionAtlas(geneFolder)); @@ -210,30 +208,6 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte return null; } - private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException { - if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { - logger.info("Downloading HGNC ..."); - String url = configuration.getDownload().getHgnc().getHost(); - saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(), - getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json")); - String[] array = url.split("/"); - return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); - } - return null; - } - - private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException { - if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { - logger.info("Downloading Cancer Hotspot ..."); - String url = configuration.getDownload().getCancerHotspot().getHost(); - saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(), - getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json")); - String[] array = url.split("/"); - return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString()); - } - return null; - } - private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException { if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { logger.info("Downloading go annotation..."); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java index 0ba9f39db4..5a0609867f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java @@ -47,11 +47,11 @@ public GenomeDownloadManager(String species, String assembly, Path targetDirecto public List download() throws IOException, InterruptedException { List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadReferenceGenome()); - downloadFiles.addAll(downloadConservation()); - downloadFiles.addAll(downloadRepeats()); +// downloadFiles.addAll(downloadConservation()); +// downloadFiles.addAll(downloadRepeats()); // cytobands -// runGenomeInfo(); + runGenomeInfo(); return downloadFiles; } @@ -115,16 +115,16 @@ public List downloadConservation() throws IOException, Interrupted List phastconsUrls = new ArrayList<>(chromosomes.length); List phyloPUrls = new ArrayList<>(chromosomes.length); for (String chromosome : chromosomes) { - String phastConsUrl = url + "/phastCons470way/hg38.470way.phastCons/chr" + chromosome - + ".phastCons470way.wigFix.gz"; + String phastConsUrl = url + "/phastCons100way/hg38.100way.phastCons/chr" + chromosome + + ".phastCons100way.wigFix.gz"; downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons") - .resolve("chr" + chromosome + ".phastCons470way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phastCons100way.wigFix.gz").toString())); phastconsUrls.add(phastConsUrl); - String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP/chr" + chromosome - + ".phyloP470way.wigFix.gz"; + String phyloPUrl = url + "/phyloP100way/hg38.100way.phyloP100way/chr" + chromosome + + ".phyloP100way.wigFix.gz"; downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop") - .resolve("chr" + chromosome + ".phyloP470way.wigFix.gz").toString())); + .resolve("chr" + chromosome + ".phyloP100way.wigFix.gz").toString())); phyloPUrls.add(phyloPUrl); } String gerpUrl = configuration.getDownload().getGerp().getHost(); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java index 522be7b27d..0776354e80 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java @@ -36,7 +36,7 @@ public OntologyDownloadManager(String species, String assembly, Path targetDirec public List download() throws IOException, InterruptedException { - logger.info("Downloading OBO files ..."); + logger.info("Downloading obo files ..."); List downloadFiles = new ArrayList<>(); Path oboFolder = downloadFolder.resolve("ontology"); @@ -44,22 +44,20 @@ public List download() throws IOException, InterruptedException { String url = configuration.getDownload().getHpoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("hp.obo").toString())); + saveVersionData(EtlCommons.OBO_DATA, "HPO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.HPO_VERSION_FILE)); url = configuration.getDownload().getGoObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("go-basic.obo").toString())); + saveVersionData(EtlCommons.OBO_DATA, "GO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.GO_VERSION_FILE)); url = configuration.getDownload().getDoidObo().getHost(); downloadFiles.add(downloadFile(url, oboFolder.resolve("doid.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(), - Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); - url = configuration.getDownload().getMondoObo().getHost(); - downloadFiles.add(downloadFile(url, oboFolder.resolve("mondo.obo").toString())); - saveVersionData(EtlCommons.OBO_DATA, "MONDO", getTimeStamp(), getTimeStamp(), + saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(), Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE)); return downloadFiles; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java index 5a722ed448..08f28cfdad 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java @@ -22,6 +22,7 @@ import org.opencb.commons.utils.FileUtils; import java.io.BufferedReader; +import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Files; @@ -33,8 +34,6 @@ public class ProteinDownloadManager extends AbstractDownloadManager { private static final String UNIPROT_NAME = "UniProt"; - private static final String INTERPRO_NAME = "InterPro"; - private static final String INTACT_NAME = "IntAct"; public ProteinDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration) throws IOException, CellBaseException { @@ -57,7 +56,6 @@ public List download() throws IOException, InterruptedException { Files.createDirectories(proteinFolder); List downloadFiles = new ArrayList<>(); - // Uniprot String url = configuration.getDownload().getUniprot().getHost(); downloadFiles.add(downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString())); Files.createDirectories(proteinFolder.resolve("uniprot_chunks")); @@ -65,25 +63,23 @@ public List download() throws IOException, InterruptedException { String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost(); downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString())); + saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1), getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("uniprotVersion.json")); - // Interpro - String interproUrl = configuration.getDownload().getInterpro().getHost(); - downloadFiles.add(downloadFile(interproUrl, proteinFolder.resolve("protein2ipr.dat.gz").toString())); - - relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); - downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString())); - saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), - getTimeStamp(), Collections.singletonList(interproUrl), proteinFolder.resolve("interproVersion.json")); - - // Intact - String intactUrl = configuration.getDownload().getIntact().getHost(); - downloadFiles.add(downloadFile(intactUrl, proteinFolder.resolve("intact.txt").toString())); - saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, configuration.getDownload().getIntact().getVersion(), - getTimeStamp(), Collections.singletonList(intactUrl), proteinFolder.resolve("intactVersion.json")); - return downloadFiles; + +// url = configuration.getDownload().getIntact().getHost(); +// downloadFile(url, proteinFolder.resolve("intact.txt").toString()); +// saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, null, getTimeStamp(), Collections.singletonList(url), +// proteinFolder.resolve("intactVersion.json")); +// +// url = configuration.getDownload().getInterpro().getHost(); +// downloadFile(url, proteinFolder.resolve("protein2ipr.dat.gz").toString()); +// relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost(); +// downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString()); +// saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5), +// getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("interproVersion.json")); } private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException { @@ -100,7 +96,7 @@ private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOE inEntry = true; beforeEntry = false; if (count % 10000 == 0) { - pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath())); + pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile())); pw.println(header.toString().trim()); } count++; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java index 51152e478d..1abb352fbe 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java @@ -64,8 +64,8 @@ public List download() throws IOException, InterruptedException, N List downloadFiles = new ArrayList<>(); downloadFiles.addAll(downloadRegulatoryaAndMotifFeatures()); - downloadFiles.add(downloadMiRTarBase()); downloadFiles.add(downloadMirna()); + downloadFiles.add(downloadMiRTarBase()); return downloadFiles; } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java index 5b0fac80e6..4413f4bf48 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MetaMongoDBAdaptor.java @@ -16,8 +16,6 @@ package org.opencb.cellbase.lib.impl.core; - -import com.fasterxml.jackson.databind.ObjectMapper; import com.mongodb.ReadPreference; import com.mongodb.WriteConcern; import com.mongodb.client.model.Filters; @@ -25,6 +23,7 @@ import org.bson.BsonDocument; import org.bson.Document; import org.bson.conversions.Bson; +import org.codehaus.jackson.map.ObjectMapper; import org.opencb.cellbase.core.api.key.ApiKeyStats; import org.opencb.cellbase.core.api.query.AbstractQuery; import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 304cca8aa2..fe4509c6fc 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.1-SNAPSHOT + 6.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 1c619a886b..35498a9a40 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.2.1-SNAPSHOT + 6.3.0-SNAPSHOT pom CellBase project @@ -23,20 +23,19 @@ ${project.version} - 5.2.1-SNAPSHOT - 3.2.1-SNAPSHOT - 0.1.0 - 9.4.51.v20230217 - - 2.14.3 - 3.14.0 - 1.7.36 + 5.3.0-SNAPSHOT + 3.3.0-SNAPSHOT + 0.1.0 + 2.11.4 + 1.9.13 2.30.1 + 1.7.32 2.17.2 1.5.2 5.5.2 0.8.8 + 9.4.17.v20190418 0.11.5 1.6.5 3.1.0 @@ -52,6 +51,7 @@ 1.48.0 2.4 2.4 + 3.12.0 2.1.6 4.4 1.69 @@ -413,11 +413,11 @@ swagger-annotations ${swagger-annotations.version} - + io.jsonwebtoken jjwt-jackson