Skip to content

Commit

Permalink
Merge branch 'TASK-5564' into TASK-5387
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Aug 13, 2024
2 parents 6eac380 + a8d6368 commit 2950c0e
Show file tree
Hide file tree
Showing 136 changed files with 5,365 additions and 3,607 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/manual-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ jobs:
name: JUnit Test
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch }}
fetch-depth: '0'
- name: Set up JDK 11
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '11'
Expand Down
15 changes: 15 additions & 0 deletions .github/workflows/pull-request-approved.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: Pull request approve workflow

on:
pull_request_review:
types: [ submitted ]

jobs:
build:
uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop

test:
name: "Test analysis"
uses: ./.github/workflows/test-analysis.yml
needs: build
secrets: inherit
2 changes: 1 addition & 1 deletion .github/workflows/task.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ jobs:
uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop
needs: test
with:
cli: python3 ./build/cloud/docker/docker-build.py push --images base --tag ${{ github.ref_name }}
cli: python3 ./build/cloud/docker/docker-build.py push --images base,builder --tag ${{ github.ref_name }}
secrets: inherit
4 changes: 2 additions & 2 deletions .github/workflows/test-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ jobs:
name: Test and push Sonar analysis
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: '0'
- name: Set up JDK 11
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '11'
Expand Down
10 changes: 7 additions & 3 deletions cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ LABEL org.label-schema.vendor="OpenCB" \
## We need to be root to install dependencies
USER root
RUN apt-get update -y && \
apt-get install -y git default-mysql-client libjson-perl libdbi-perl libdbd-mysql-perl libdbd-mysql-perl libtry-tiny-perl && \
apt-get install -y git default-mysql-client libjson-perl libdbi-perl libdbd-mysql-perl libdbd-mysql-perl libtry-tiny-perl libxml-simple-perl liblog-log4perl-perl libxml-parser-perl libxml-dom-perl && \
mkdir /opt/ensembl && chown cellbase:cellbase /opt/ensembl && \
rm -rf /var/lib/apt/lists/*

Expand All @@ -26,6 +26,10 @@ RUN cd /opt/ensembl && \
git clone https://github.com/Ensembl/ensembl-variation.git && \
git clone https://github.com/Ensembl/ensembl-funcgen.git && \
git clone https://github.com/Ensembl/ensembl-compara.git && \
git clone https://github.com/Ensembl/ensembl-io.git
git clone https://github.com/Ensembl/ensembl-io.git && \
git clone --branch cvs/release-0_7 https://github.com/biomart/biomart-perl

ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts
## Give writting permissions to allow the script ensembl_canonical.pl to create sub-folder for cache purposes
RUN chmod -R 777 /opt/cellbase/scripts/ensembl-scripts/

ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts:/opt/ensembl/biomart-perl/lib
6 changes: 3 additions & 3 deletions cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@ our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_111_38";
#our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38";
#our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38";
#our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38";
our $MUS_MUSCULUS_CORE = "mus_musculus_core_78_38";
our $MUS_MUSCULUS_VARIATION = "mus_musculus_variation_78_38";
our $MUS_MUSCULUS_FUNCTIONAL = "mus_musculus_funcgen_78_38";
our $MUS_MUSCULUS_CORE = "mus_musculus_core_111_39";
our $MUS_MUSCULUS_VARIATION = "mus_musculus_variation_111_39";
our $MUS_MUSCULUS_FUNCTIONAL = "mus_musculus_funcgen_111_39";
our $RATTUS_NORVEGICUS_CORE = "rattus_norvegicus_core_78_5";
our $RATTUS_NORVEGICUS_VARIATION = "rattus_norvegicus_variation_78_5";
our $RATTUS_NORVEGICUS_FUNCTIONAL = "rattus_norvegicus_funcgen_78_5";
Expand Down
61 changes: 61 additions & 0 deletions cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env perl

use strict;
use Getopt::Long;
use Data::Dumper;
use JSON;
use DB_CONFIG;

use BioMart::Initializer;
use BioMart::Query;
use BioMart::QueryRunner;

## Default values
my $species = 'hsapiens';
my $outdir = "./";

## Parsing command line
GetOptions ('species=s' => \$species, 'outdir=s' => \$outdir);


my $confFile = "/opt/cellbase/scripts/ensembl-scripts/martURLLocation.xml";

# NB: change action to 'clean' if you wish to start a fresh configuration
# and to 'cached' if you want to skip configuration step on subsequent runs from the same registry
my $action='clean';
my $initializer = BioMart::Initializer->new('registryFile'=>$confFile, 'action'=>$action);
my $registry = $initializer->getRegistry;

my $query = BioMart::Query->new('registry'=>$registry,'virtualSchemaName'=>'default');

$query->setDataset($species."_gene_ensembl");

$query->addAttribute("ensembl_gene_id");
$query->addAttribute("ensembl_transcript_id");
$query->addAttribute("transcript_is_canonical");

$query->formatter("TSV");

# Open the file for writing
open(my $fh, '>', "$outdir/ensembl_canonical.txt") or die "Cannot open ensembl_canonical.txt file: $!";

# Save the original stdout
my $original_stdout = *STDOUT;
open(STDOUT, '>&', $fh) or die "Can't redirect STDOUT: $!";

my $query_runner = BioMart::QueryRunner->new();

# to obtain unique rows only
$query_runner->uniqueRowsOnly(1);
$query_runner->execute($query);
#$query_runner->printHeader();
#print ENSEMBL_CANONICAL $query_runner->printResults();
# Call printResults which prints to STDOUT (now redirected to the file)
$query_runner->printResults();
#$query_runner->printFooter();

# Restore the original stdout
open(STDOUT, '>&', $original_stdout) or die "Can't restore STDOUT: $!";

# Close the filehandle
close($fh) or die "Failed to close file: $!";
8 changes: 5 additions & 3 deletions cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
####################################################################
## Parsing command line options ####################################
####################################################################
# USAGE: ./gene_extra_info.pl --species "Homo sapiens" --outdir ../../appl_db/ird_v1/hsa ...
##docker run -it --mount type=bind,source=/tmp,target=/tmp opencb/cellbase-builder:6.2.0-SNAPSHOT /opt/cellbase/scripts/ensembl-scripts/gene_extra_info.pl -s "Mus musculus" -o /tmp

# USAGE: ./gene_extra_info.pl --species "Homo sapiens" --assembly "GRCh38" --outdir ../../appl_db/ird_v1/hsa ...

## Parsing command line
GetOptions ('species=s' => \$species, 'assembly=s' => \$assembly, 'outdir=s' => \$outdir, 'phylo=s' => \$phylo,
Expand Down Expand Up @@ -50,8 +52,8 @@

if ($phylo eq "" || $phylo eq "vertebrate") {
print ("In vertebrates section\n");
if ($species eq "Homo sapiens" && $assembly eq "GRCh38") {
print ("Human selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
if ($species eq "Homo sapiens" || $species eq "Mus musculus") {
print ($species." selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => $ENSEMBL_HOST,
-user => $ENSEMBL_USER,
Expand Down
32 changes: 12 additions & 20 deletions cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
####################################################################
## Parsing command line options ####################################
####################################################################
# USAGE: ./genome_info.pl --species "Homo sapiens" --outfile ../../appl_db/ird_v1/hsa ...
##docker run -it --mount type=bind,source=/tmp,target=/tmp opencb/cellbase-builder:6.2.0-SNAPSHOT /opt/cellbase/scripts/ensembl-scripts/genome_info.pl --species "Mus musculus" --assembly GRCm39 --outfile /tmp

# USAGE: ./genome_info.pl --species "Homo sapiens" --assembly GRCh38 --outfile ../../appl_db/ird_v1/hsa ...

## Parsing command line
GetOptions ('species=s' => \$species, 'assembly=s' => \$assembly, 'o|outfile=s' => \$outfile, 'phylo=s' => \$phylo,
Expand All @@ -29,7 +31,6 @@

if ($outfile eq "") {
$outfile = "/ensembl-data/genome_info.json";
# $outfile = "/ensembl-data/$species.json";
}

####################################################################
Expand All @@ -42,17 +43,13 @@
# Bio::EnsEMBL::Registry->load_all("$ENSEMBL_REGISTRY");
if($phylo eq "" || $phylo eq "vertebrate") {
print ("In vertebrates section\n");
if ($species eq "Homo sapiens" && $assembly eq "GRCh38") {
print ("Human selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => $ENSEMBL_HOST,
-user => $ENSEMBL_USER,
-port => $ENSEMBL_PORT,
-verbose => $verbose
);
} else {
print ("Human selected, assembly ".$assembly." no supported\n");
}
print ("Species: ".$species.", assembly ".$assembly.", connecting to: ".$ENSEMBL_HOST.":".$ENSEMBL_PORT."\n");
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => $ENSEMBL_HOST,
-user => $ENSEMBL_USER,
-port => $ENSEMBL_PORT,
-verbose => $verbose
);
} else {
print ("In no-vertebrates section\n");
Bio::EnsEMBL::Registry->load_registry_from_db(
Expand All @@ -64,7 +61,6 @@

my $slice_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Slice");
my $karyotype_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "KaryotypeBand");
# my $gene_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Gene");
####################################################################

my %info_stats = ();
Expand All @@ -81,12 +77,10 @@
$chromosome{'start'} = int($chrom->start());
$chromosome{'end'} = int($chrom->end());
$chromosome{'size'} = int($chrom->seq_region_length());
# $chromosome{'numberGenes'} = scalar @{$chrom->get_all_Genes()};
$chromosome{'isCircular'} = $chrom->is_circular();

my @cytobands = ();
foreach my $cyto(@{$karyotype_adaptor->fetch_all_by_chr_name($chrom->seq_region_name)}) {
# print $cytoband->name."\n";
my %cytoband = ();
$cytoband{'name'} = $cyto->name();
$cytoband{'start'} = int($cyto->start());
Expand All @@ -96,7 +90,7 @@
push(@cytobands, \%cytoband);
}

## check if any cytoband has been added
## Check if any cytoband has been added
## If not a unique cytoband covering all chromosome is added.
if(@cytobands == 0) {
my %cytoband = ();
Expand All @@ -110,7 +104,6 @@
$chromosome{'cytobands'} = \@cytobands;

push(@chromosomes, \%chromosome);
# push(@chrom_ids, $chrom->seq_region_name);
}
$info_stats{'chromosomes'} = \@chromosomes;

Expand All @@ -124,7 +117,6 @@
$supercontig{'start'} = int($supercon->start());
$supercontig{'end'} = int($supercon->end());
$supercontig{'size'} = int($supercon->seq_region_length());
# $supercontig{'numberGenes'} = scalar @{$supercon->get_all_Genes()};
$supercontig{'isCircular'} = $supercon->is_circular();

## Adding an unique cytoband covering all chromosome is added.
Expand All @@ -151,7 +143,7 @@

sub print_parameters {
print "Parameters: ";
print "species: $species, outfile: $outfile, ";
print "species: $species, assembly: $assembly, outfile: $outfile, ";
print "ensembl-registry: $ENSEMBL_REGISTRY, ";
print "ensembl-host: $ENSEMBL_HOST, ensembl-port: $ENSEMBL_PORT, ";
print "ensembl-user: $ENSEMBL_USER, verbose: $verbose, help: $help";
Expand Down
19 changes: 19 additions & 0 deletions cellbase-app/app/scripts/ensembl-scripts/martURLLocation.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!--
~ Copyright 2015-2020 OpenCB
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->

<MartRegistry>
<MartURLLocation database="ensembl_mart_111" default="1" displayName="Ensembl Genes 111" host="www.ensembl.org" includeDatasets="" martUser="" name="ENSEMBL_MART_ENSEMBL" path="/biomart/martservice" port="80" serverVirtualSchema="default" visible="1" />
</MartRegistry>
60 changes: 0 additions & 60 deletions cellbase-app/app/scripts/mirtarbase/fix-gene-symbol.sh

This file was deleted.

2 changes: 1 addition & 1 deletion cellbase-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>6.2.0-SNAPSHOT</version>
<version>6.3.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ public class CommonCommandOptions {
description = "Set the logging level, accepted values are: debug, info, warn, error and fatal")
public String logLevel = "info";

@Parameter(names = {"-C", "--config"}, arity = 1,
description = "Path to CellBase configuration.yml file")
@Deprecated
@Parameter(names = {"-C", "--config"}, arity = 1, hidden = true, description = "Path to CellBase configuration.yml file")
public String conf;
}

Expand Down
Loading

0 comments on commit 2950c0e

Please sign in to comment.