From 30e890b15f57cb9415874f13197567dc325f75db Mon Sep 17 00:00:00 2001 From: Espen Hagen <2492641+espenhgn@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:45:27 +0200 Subject: [PATCH] fix `GLIBCXX_3.4.30 not found`; conda env; fix broken tests (#268) * fixed broken test test_gwas_metal, add conda env * Fixed broken tests for `GLIBCXX_3.4.30' not found * bump curl version * whitespace removed * LDAK 5.2 -> 6 * bump libssl-dev version * bump libcurl4-gnutls-dev * LDAK git-sha * add additional py/R packages/modules * test build * pin version * add wget --referer arg for king download * add `WSpiller/MVMR`, `noahlorinczcomi/MRBEE` R packages * king issues * missing comma * R... * add packages to test * remove SINGULARITY_LD_LIBRARY_PATH export --- CHANGELOG.md | 6 ++++++ INSTALL.md | 13 ++++++++++++- docker/dockerfiles/python3/Dockerfile | 23 ++++++++++++++++------- docker/scripts/R/cran.R | 7 ++++--- docker/scripts/R/github.R | 4 +++- docker/scripts/apt_get_essential.sh | 4 ++-- docker/scripts/install_bcftools.sh | 8 ++++---- docker/scripts/install_htslib.sh | 2 +- docker/scripts/install_king.sh | 2 +- docker/scripts/install_ldak.sh | 6 +++--- docs/singularity/gwas.md | 2 +- environment.yml | 12 ++++++++++++ tests/extras/r.R | 3 +++ tests/test_gwas.py | 4 ++-- tests/test_python3.py | 23 +++++++++++++++++++---- 15 files changed, 89 insertions(+), 30 deletions(-) create mode 100644 environment.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 875fe92e..41172c90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ If MD5 sum is not listed for a certain release then it means that the container ### Added +* Add Conda environment file for project dependencies +* Add Python packages `scikit-survival, pandas-plink, numba, xmltodict, pyliftover, configparser, intervaltree` to `python3.sif` container +* Add `Haplin`, `WSpiller/MVMR`, `noahlorinczcomi/MRBEE` R packages to `r.sif` container * Add container build and push actions for all containers: * Action should trigger builds on pushes and pull requests targeting the main branch. * Should build and push Docker and Singularity images for new tags with `v*.*.*` pattern in main branch. @@ -32,6 +35,7 @@ If MD5 sum is not listed for a certain release then it means that the container ### Updated +* Update LDAK binary to version 6 in gwas.sif (from 5.2) * Rebuilt `gwas.sif` container with md5sum checksum: ``` @@ -49,6 +53,8 @@ If MD5 sum is not listed for a certain release then it means that the container ### Fixed +* Fixed broken unit test `tests/test_gwas.py::test_gwas_metal` with Apptainer "sandbox" mode +* Workaround for pandas import before scipy in python codes via `export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH` * Fixed brittle tests if `TMPDIR` is not `/tmp` ### Removed diff --git a/INSTALL.md b/INSTALL.md index d3d808dd..e3e79eb9 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -7,6 +7,17 @@ In order to set up these resources, some software may be required - [Git LFS](https://git-lfs.com) - [ORAS CLI](https://oras.land) +## Conda environment (optional) + +The above and other miscellaneous dependencies may be installed in a [Conda](https://conda.io) environment using the provided [``environment.yml``](https://github.com/comorment/containers/blob/main/environment.yml) file as (``conda`` may be replaced by ``mamba`` executable): + +```bash +conda env create -f environment.yml # create cosgap environment +conda activate cosgap +# do this and that... +conda deactivate +``` + ## Clone the repository To download all files of the last revision of this project, issue: @@ -21,7 +32,7 @@ git lfs pull # pull "large" files ## Update the Singularity Image Files (containers) We are presently migrating container builds as distributed here to the [GitHub Container Registry](https://ghcr.io). -Future image build artifacts (Singularity and Docker) will be listed under [Packages](https://github.com/orgs/comorment/packages?repo_name=containers) from hereon. +Current and future image build artifacts (Singularity and Docker) are listed under [Packages](https://github.com/orgs/comorment/packages?repo_name=containers). To obtain updated versions of the Singularity Image Format (.sif) container files provided here, issue: diff --git a/docker/dockerfiles/python3/Dockerfile b/docker/dockerfiles/python3/Dockerfile index b5541848..0c9f0dc0 100644 --- a/docker/dockerfiles/python3/Dockerfile +++ b/docker/dockerfiles/python3/Dockerfile @@ -19,25 +19,33 @@ RUN bash install_mambaforge.sh && \ # keep the list of packages sorted alphabetically # https://www.online-utility.org/text/sort.jsp RUN mamba install python=3.10.6 \ + configparser=7.0.0 \ "h5py=3.7.0=nompi*" \ jupyterlab=3.4.8 \ lifelines=0.27.0 \ + intervaltree=3.1.0 \ matplotlib-venn=0.11.5 \ matplotlib=3.6.0 \ more-itertools=9.0.0 \ numdifftools=0.9.39 \ + numba=0.60.0 \ numpy=1.23.3 \ pandas=1.5.0 \ + pandas-plink=2.2.9 \ psutil=5.9.3 \ - pyreadstat=1.1.9 \ + pyreadstat=1.2.7 \ pyyaml=6.0 \ scikit-learn=1.1.2 \ + scikit-survival=0.19.0.post1 \ scipy=1.9.1 \ seaborn=0.12.0 \ semantic_version=2.10.0 \ statsmodels=0.13.2 \ xlrd=2.0.1 \ - --yes + xmltodict=0.13.0 \ + --yes && \ + mamba clean -a -y + # pip install stuff in env. RUN pip install --no-cache-dir cbgen==1.0.2 && \ @@ -47,7 +55,9 @@ RUN pip install --no-cache-dir cbgen==1.0.2 && \ pip install --no-cache-dir fastlmm==0.6.3 && \ pip install --no-cache-dir plinkio==0.9.8 && \ pip install --no-cache-dir LDpred==1.0.11 && \ - pip install --no-cache-dir pycap==2.1.0 + pip install --no-cache-dir pycap==2.1.0 && \ + pip install --no-cache-dir pyliftover==0.4.1 && \ + pip cache purge # Plink (as python_convert depends on plink) WORKDIR /tmp/plink @@ -65,9 +75,8 @@ WORKDIR /tools/ukb RUN git clone https://github.com/precimed/ukb.git . && \ git reset --hard dc57e0d8380cd9b2eca479dc6f181d76ca5a429a -# cleanup for smaller image size -RUN mamba clean -a -y -RUN pip cache purge - WORKDIR /tools + +# https://github.com/comorment/containers/issues/267: +ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH diff --git a/docker/scripts/R/cran.R b/docker/scripts/R/cran.R index 6a416069..2b804abf 100644 --- a/docker/scripts/R/cran.R +++ b/docker/scripts/R/cran.R @@ -21,8 +21,11 @@ packages <- list( 'data.table' = '1.14.6', dendextend = '1.16.0', dplyr = '1.1.0', + fastICA = '1.2-3', + fcfdr = '1.0.0', flextable = '0.8.5', fmsb = '0.7.5', + foreign = '0.8-84', GCPBayes = '4.0.0', geepack = '1.3.9', ggalluvial = '0.12.4', @@ -36,9 +39,7 @@ packages <- list( glue = '1.6.2', gplots = '3.1.3', gtsummary = '1.7.0', - fastICA = '1.2-3', - fcfdr = '1.0.0', - foreign = '0.8-84', + Haplin = '7.3.0', homologene = '1.4.68.19.3.27', imputeMissings = '0.0.3', jtools = '2.2.1', diff --git a/docker/scripts/R/github.R b/docker/scripts/R/github.R index 68d09575..cd9db9c3 100644 --- a/docker/scripts/R/github.R +++ b/docker/scripts/R/github.R @@ -24,9 +24,11 @@ packages <- list( 'gqi/MRMix' = '56afdb2bc96760842405396f5d3f02e60e305039', 'stephenslab/susieR' = 'c0314a6113dfb54ea3817103ee01c027849f0eb7', 'MRCIEU/gwasglue' = 'c2d5660eed389e1a9b3e04406b88731d642243f1', + 'noahlorinczcomi/MRBEE' = '96971e346099b89585a6eff4a6f22bbcf25d6ca8', 'norment/normentR' = 'dfa1fbae9587db6c3613b0405df4f9cfa98ee0e1', 'psychgen/phenotools' = 'b744d927a1302d85152917f3802a2212093d588a', - 'wouterpeyrot/CCGWAS' = 'ce9764da946189623a0164f156ad119773bc32f5' + 'wouterpeyrot/CCGWAS' = 'ce9764da946189623a0164f156ad119773bc32f5', + 'WSpiller/MVMR' = '6adf8839a33fbe225c0161c564a517dfd61cee32' ) # install package from GitHub and quit with error if installation fails diff --git a/docker/scripts/apt_get_essential.sh b/docker/scripts/apt_get_essential.sh index a8fe2c11..a46735a0 100644 --- a/docker/scripts/apt_get_essential.sh +++ b/docker/scripts/apt_get_essential.sh @@ -12,14 +12,14 @@ apt-get update && apt-get install -y --no-install-recommends \ build-essential=12.8ubuntu1 \ bzip2=1.0.8-2 \ cmake=3.16.3-1ubuntu1.20.04.1 \ - curl=7.68.0-1ubuntu2.22 \ + curl=7.68.0-1ubuntu2.23 \ dos2unix=7.4.0-2 \ gdb=9.1-0ubuntu1 \ gfortran=4:9.3.0-1ubuntu2 \ git=1:2.25.1-1ubuntu3.13 \ less=551-1ubuntu0.3 \ libatlas-base-dev=3.10.3-8ubuntu7 \ - libcurl4-openssl-dev=7.68.0-1ubuntu2.22 \ + libcurl4-openssl-dev=7.68.0-1ubuntu2.23 \ libgomp1=10.5.0-1ubuntu1~20.04 \ libgsl-dev=2.5+dfsg-6+deb10u1build0.20.04.1 \ libnss3=2:3.98-0ubuntu0.20.04.2 \ diff --git a/docker/scripts/install_bcftools.sh b/docker/scripts/install_bcftools.sh index f1eebe9f..ced83ffb 100644 --- a/docker/scripts/install_bcftools.sh +++ b/docker/scripts/install_bcftools.sh @@ -3,10 +3,10 @@ set -euo pipefail # deps apt-get update && apt-get install -y --no-install-recommends \ - libcurl4-gnutls-dev=7.68.0-1ubuntu2.22 \ - libperl-dev=5.30.0-9ubuntu0.5 \ - && \ - apt-get clean && \ + libcurl4-gnutls-dev=7.68.0-1ubuntu2.23 \ + libperl-dev=5.30.0-9ubuntu0.5 + +apt-get clean && \ rm -rf /var/lib/apt/lists/* # bcftools diff --git a/docker/scripts/install_htslib.sh b/docker/scripts/install_htslib.sh index 5d7f01dd..54e4dceb 100644 --- a/docker/scripts/install_htslib.sh +++ b/docker/scripts/install_htslib.sh @@ -5,7 +5,7 @@ set -euo pipefail apt-get update && apt-get install --no-install-recommends \ libbz2-dev=1.0.8-2 \ liblzma-dev=5.2.4-1ubuntu1.1 \ - libssl-dev=1.1.1f-1ubuntu2.22 \ + libssl-dev=1.1.1f-1ubuntu2.23 \ -y apt-get clean && \ diff --git a/docker/scripts/install_king.sh b/docker/scripts/install_king.sh index ad87bcbe..9e39522f 100644 --- a/docker/scripts/install_king.sh +++ b/docker/scripts/install_king.sh @@ -3,7 +3,7 @@ set -euo pipefail # king VERSION="232" -wget --no-check-certificate https://www.kingrelatedness.com/executables/Linux-king$VERSION.tar.gz && \ +wget --debug --no-check-certificate --user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/83.0.4103.61 Chrome/83.0.4103.61 Safari/537.36" https://www.kingrelatedness.com/executables/Linux-king$VERSION.tar.gz && \ tar -xvzf Linux-king$VERSION.tar.gz && \ rm -rf Linux-king$VERSION.tar.gz diff --git a/docker/scripts/install_ldak.sh b/docker/scripts/install_ldak.sh index af078cdf..a88415ca 100644 --- a/docker/scripts/install_ldak.sh +++ b/docker/scripts/install_ldak.sh @@ -1,6 +1,6 @@ #!/bin/sh set -euo pipefail -wget --no-check-certificate https://dougspeed.com/wp-content/uploads/ldak5.2.linux_.zip -unzip ldak5.2.linux_.zip -cp ldak5.2.linux /bin/ldak +wget https://github.com/dougspeed/LDAK/blob/4ee871be17d8ea406494211638a5ead677e7dd47/ldak6.linux +chmod +x ldak6.linux +cp ldak6.linux /bin/ldak diff --git a/docs/singularity/gwas.md b/docs/singularity/gwas.md index d03f98d3..de4e485b 100644 --- a/docs/singularity/gwas.md +++ b/docs/singularity/gwas.md @@ -44,7 +44,7 @@ List of software included in the container: | GWAMA[^gwama] | 2.2.2 | [BSD-3-Clause](https://opensource.org/licenses/BSD-3-Clause) | HTSlib[^htslib] | 1.19.1 | [MIT/Expat/Modified-BSD](https://github.com/samtools/htslib/blob/develop/LICENSE) | king[^king] | 2.3.2 | [permissive](https://www.kingrelatedness.com/Download.shtml) - | ldak[^ldak] | 5.2 | [GPLv3](https://www.gnu.org/licenses/gpl-3.0.html) + | ldak[^ldak] | 6 | [GPLv3](https://www.gnu.org/licenses/gpl-3.0.html) | liftOver[^liftover] | latest | [permissive](https://genome-store.ucsc.edu) | metal[^metal] | 2020-05-05 | - | minimac4[^minimac4] | v4.1.6 | [GPLv3](https://www.gnu.org/licenses/gpl-3.0.html) diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..438de077 --- /dev/null +++ b/environment.yml @@ -0,0 +1,12 @@ +name: cosgap +channels: + - conda-forge + - defaults +dependencies: + - apptainer=1.3.2 + - git=2.46.0 + - git-lfs=3.5.1 + - oras=1.2.0 + - pytest=8.3.2 + - python=3.11.9 + diff --git a/tests/extras/r.R b/tests/extras/r.R index 570e0827..c5d40faf 100644 --- a/tests/extras/r.R +++ b/tests/extras/r.R @@ -86,6 +86,7 @@ libraries_to_check <- c( "gwasglue2", "gwasvcf", "haven", + "Haplin", "homologene", "ieugwasr", "imputeMissings", @@ -99,6 +100,8 @@ libraries_to_check <- c( "MatrixEQTL", "mgcv", "miniCRAN", + "MRBEE", + "MVMR", "modelr", "moments", "MplusAutomation", diff --git a/tests/test_gwas.py b/tests/test_gwas.py index 35b93fed..003bdb39 100644 --- a/tests/test_gwas.py +++ b/tests/test_gwas.py @@ -125,9 +125,9 @@ def test_gwas_metal(): os.system( f'tar -xvf {cwd}/tests/extras/GlucoseExample.tar.gz -C {d} ' + '--strip-components=1') - print(os.listdir(d)) + os.chdir(d) # test must be run in temporary directory call = \ - f'singularity run --home={d}:/home/ {cwd}/{pth} metal metal.txt' + f'singularity run {cwd}/{pth} metal metal.txt' out = subprocess.run(call.split(' '), capture_output=True, check=False) assert out.returncode == 0 # software may not crash on error, checking captured output diff --git a/tests/test_python3.py b/tests/test_python3.py index f395bbb2..bb950469 100644 --- a/tests/test_python3.py +++ b/tests/test_python3.py @@ -7,7 +7,6 @@ import os import subprocess - pth = os.path.join('singularity', 'python3.sif') @@ -50,24 +49,40 @@ def test_python3_ukb(): def test_python3_packages(): packages = [ + 'configparser', 'h5py', + 'intervaltree', 'ldpred', 'lifelines', 'matplotlib', 'matplotlib_venn', + 'numba', 'numdifftools', 'numpy', 'pandas', + 'pandas_plink', 'plinkio', - 'redcap', # pycap + 'pyliftover', 'pyreadstat', - 'yaml', # pyyaml + 'redcap', # pycap 'scipy', 'seaborn', 'semantic_version', + 'sklearn', + 'sksurv', 'statsmodels', - 'xlrd'] + 'xlrd', + 'xmltodict', + 'yaml', # pyyaml + ] for pkg in packages: call = f'singularity run {pth} python -c "import {pkg}"' out = subprocess.run(call.split(' ')) assert out.returncode == 0 + + +def test_python3_import_pandas_scipy_stats(): + pwd = os.getcwd() + call = f'singularity run --home={pwd} {pth} python -c "import pandas as pd; from scipy import *"' + out = subprocess.run(call.split(' ')) + assert out.returncode == 0