diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 8afe18d..e246853 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -8,9 +8,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: hadolint uses: reviewdog/action-hadolint@v1 with: reporter: github-pr-review # Default is github-pr-check fail_on_error: true + hadolint_ignore: SC2086 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e724064..0bab6ff 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -8,9 +8,9 @@ jobs: name: Lint steps: - name: Check out source repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python environment - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: "3.10" - name: flake8 Lint diff --git a/CHANGELOG.md b/CHANGELOG.md index 24c342c..61a9f33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added Docker build push action to push images to GH container registry +- Added Python environment with basic dependencies for MiX3R (jupyter, numpy, pandas, scipy, matplotlib, numba, ++) ### Updated diff --git a/docker/dockerfiles/ldpred2/Dockerfile b/docker/dockerfiles/ldpred2/Dockerfile index d3869af..ac7fad9 100644 --- a/docker/dockerfiles/ldpred2/Dockerfile +++ b/docker/dockerfiles/ldpred2/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ update-ca-certificates RUN apt-get update && apt-get install -y --no-install-recommends \ + bzip2=1.0.8-5build1 \ cmake=3.22.1-1ubuntu1.22.04.2 \ curl=7.81.0-1ubuntu1.16 \ libatlas-base-dev=3.10.3-12ubuntu1 \ @@ -27,18 +28,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ node-shasum=1.0.2-3 \ patch=2.7.6-7build2 \ unzip=6.0-26ubuntu3.2 \ - wget=1.21.2-2ubuntu1 \ zlib1g-dev=1:1.2.11.dfsg-2ubuntu9.2 \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # R dependencies -RUN R -e "install.packages(c('devtools', 'argparser', 'bigsnpr', 'data.table', 'optparse', 'stringr', 'R.utils', 'tidyverse'), dependencies=c('Depends', 'Imports', 'LinkingTo'))" +RUN R -e "install.packages(c('devtools', 'argparser', 'bigsnpr', 'data.table', 'essentials', 'eulerr', 'IRkernel', 'optparse', 'stringr', 'R.utils', 'tidyverse'), dependencies=c('Depends', 'Imports', 'LinkingTo'))" # install plink 1.9 -RUN wget -q --no-check-certificate https://github.com/chrchang/plink-ng/archive/refs/tags/v2.00a4.5.zip && \ +RUN curl -sSL https://github.com/chrchang/plink-ng/archive/refs/tags/v2.00a4.5.zip -o v2.00a4.5.zip && \ unzip v2.00a4.5.zip WORKDIR /tmp/plink-ng-2.00a4.5/1.9 @@ -56,7 +56,7 @@ RUN rm -rf v2.00a4.5.zip plink-ng-2.00a4.5 # install PRSice-2 COPY patches/*.diff /tmp/patches/ -RUN wget -q --no-check-certificate https://github.com/choishingwan/PRSice/archive/refs/tags/2.3.5.zip && \ +RUN curl -sSL https://github.com/choishingwan/PRSice/archive/refs/tags/2.3.5.zip -o 2.3.5.zip && \ unzip 2.3.5.zip WORKDIR /tmp/PRSice-2.3.5 @@ -78,7 +78,29 @@ RUN cmake ../ && \ WORKDIR /tmp RUN rm -rf 2.3.5.zip PRSice-2.3.5 +WORKDIR /tmp/miniforge +RUN curl -sSL "https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-$("uname")-$("uname" "-m").sh" -o /tmp/miniforge.sh \ + && mkdir /root/.conda \ + && bash /tmp/miniforge.sh -bfp /usr/local \ + && rm -rf /tmp/miniforge.sh \ + && export PATH=$PATH:/opt/conda/bin + +RUN mamba install \ + python=3.11 \ + conda-pack==0.7.1 \ + numpy==1.26.4 \ + scipy==1.13.0 \ + numba==0.59.1 \ + pandas==2.2.2 \ + matplotlib-base==3.8.4 \ + seaborn==0.13.2 \ + notebook==7.1.3 \ + --yes && \ + mamba install icc_rt==2020.2 --channel numba --yes + # clean up -RUN rm -rf /tmp/* +RUN mamba clean -a -y && \ + pip cache purge && \ + rm -rf /tmp/* -WORKDIR / \ No newline at end of file +WORKDIR / diff --git a/scripts/LDpred2/run_ldpred2_w_docker.sh b/scripts/LDpred2/run_ldpred2_w_docker.sh new file mode 100644 index 0000000..ade4956 --- /dev/null +++ b/scripts/LDpred2/run_ldpred2_w_docker.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# pull docker image and tag it as "ldpred2:latest" +docker pull --platform=linux/amd64 ghcr.io/comorment/ldpred2:latest +docker image tag ghcr.io/comorment/ldpred2:latest ldpred2:latest + +bash # make sure you run bash locally, not zsh (which is the default shell in some systems - e.g. macOS) + +# input/output files +export fileGeno=/REF/examples/prsice2/EUR.bed +export fileGenoRDS=EUR.rds +export fileSumstats=/REF/examples/prsice2/Height.gwas.txt.gz +export fileOut=Height + +# set environmental variables. Replace $REPOS with +# the full path to the folder containing cloned "containers" and "ldpred2_ref" repositories +export REPOS=~/Repositories +export REFERENCE=$REPOS/containers/reference # clone of https://github.com/comorment/containers +export LDPRED2_REF=$REPOS/ldpred2_ref # clone of https://github.com/comorment/ldpred2_ref +export CONTAINER="ldpred2:latest" +export RSCRIPT="docker run --platform=linux/amd64 --mount type=bind,source=${PWD},target=/home --mount type=bind,source=${REFERENCE},target=/REF --mount type=bind,source=${LDPRED2_REF},target=/ldpred2_ref -w=/home/ ${CONTAINER} Rscript" + +# run tasks +# $RSCRIPT /home/createBackingFile.R --file-input $fileGeno --file-output /home/$fileGenoRDS +$RSCRIPT createBackingFile.R --file-input $fileGeno --file-output $fileGenoRDS + +# impute +$RSCRIPT imputeGenotypes.R --impute-simple mean0 --geno-file-rds $fileGenoRDS + +# Generate PGS using LDPRED2-auto +$RSCRIPT ldpred2.R \ + --ldpred-mode auto \ + --col-stat OR \ + --col-stat-se SE \ + --stat-type OR \ + --geno-file-rds $fileGenoRDS \ + --sumstats $fileSumstats \ + --out $fileOut.auto + diff --git a/scripts/LDpred2/score.png b/scripts/LDpred2/score.png deleted file mode 100644 index 4daea00..0000000 Binary files a/scripts/LDpred2/score.png and /dev/null differ diff --git a/tests/extras/r.R b/tests/extras/r.R index 9ee6e95..906e34f 100644 --- a/tests/extras/r.R +++ b/tests/extras/r.R @@ -11,8 +11,11 @@ libraries_to_check <- c( "devtools", "data.table", "dplyr", + "essentials", + "eulerr", "ggplot2", "grDevices", + "IRkernel", "methods", "optparse", "R.utils", diff --git a/tests/test_ldpred2_standalone.py b/tests/test_ldpred2_standalone.py index de2acf8..f635e18 100644 --- a/tests/test_ldpred2_standalone.py +++ b/tests/test_ldpred2_standalone.py @@ -67,6 +67,22 @@ def test_ldpred2_R_packages(): assert out.returncode == 0 +def test_ldpred2_python3_packages(): + packages = [ + 'notebook', + 'numba', + 'numpy', + 'matplotlib', + 'pandas', + 'scipy', + 'seaborn', + ] + for pkg in packages: + call = f'{PREFIX} python3 -c "import {pkg}"' + out = subprocess.run(call, shell=True, check=True) + assert out.returncode == 0 + + def test_ldpred2_bin_prsice(): call = f'{PREFIX} PRSice --version' out = subprocess.run(call.split(' '), check=True)