Skip to content

Commit

Permalink
Merge pull request #70 from cancerit/develop
Browse files Browse the repository at this point in the history
merge 3.5.1 into main
  • Loading branch information
blex-max authored Oct 18, 2023
2 parents 68fab43 + 667da4f commit a05afe2
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 51 deletions.
14 changes: 14 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# CHANGES

## 3.5.1
* Dockerfile updated to avoid snv_merge_and_vaf_calc.R bug and to improve reproducibility
* Small change to hardcoded overlapping_mask value in indel caller

## 3.5.0
* Minor bug in the indel pipeline fixed (was using bitwise OR instead of logical OR)
* New quality metrics added to the indel calls such that SNVs and indels come out with the same metrics

## 3.4.0

* Fixed faulty PART step that was dropping last genomic intervals for targeted experiments
* Refactored how indels get merged to avoid errors when scaling up calculations
* Updated htslibs, samtools, bcftools and libdeflate

## 3.3.0

* Added new INFO fileds to the final vcf
Expand Down
72 changes: 51 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,43 @@
FROM ubuntu:22.04 as builder
FROM ubuntu:18.04 as builder

USER root

# ALL tool versions used by opt-build.sh
ENV VER_SAMTOOLS="1.14"
ENV VER_HTSLIB="1.14"
ENV VER_BCFTOOLS="1.14"
ENV VER_SAMTOOLS="1.18"
ENV VER_HTSLIB="1.18"
ENV VER_BCFTOOLS="1.18"
ENV VER_VERIFYBAMID="2.0.1"
ENV VER_LIBDEFLATE="v1.12"
ENV VER_LIBDEFLATE="v1.18"

ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -yq update
RUN apt-get install -yq --no-install-recommends locales
RUN apt-get install -yq --no-install-recommends g++
RUN apt-get install -yq --no-install-recommends ca-certificates
RUN apt-get install -yq --no-install-recommends cmake
RUN apt-get install -yq --no-install-recommends wget

# install latest cmake so opt-build.sh works - the initial installs will also help install R
RUN apt-get install -yq --no-install-recommends software-properties-common lsb-release
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
RUN apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main"
RUN apt-get install -yq --no-install-recommends cmake=3.25.2-0kitware1ubuntu18.04.1

RUN apt-get install -yq --no-install-recommends make
RUN apt-get install -yq --no-install-recommends bzip2
RUN apt-get install -yq --no-install-recommends gcc
RUN apt-get install -yq --no-install-recommends pkg-config
RUN apt-get install -yq --no-install-recommends wget
RUN apt-get install -yq --no-install-recommends locales
RUN apt-get install -yq --no-install-recommends r-base

# if ubuntu 18.04
RUN apt install -yq --no-install-recommends dirmngr
RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
RUN add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
RUN apt-get install -yq --no-install-recommends r-base-core=4.1.3-1.1804.0
RUN apt-mark hold r-base-core
RUN apt-get install -yq --no-install-recommends r-cran-mass=7.3-51.5-2bionic0 r-cran-class=7.3-16-1bionic0 r-cran-nnet=7.3-13-1bionic0
RUN apt-get install -yq --no-install-recommends r-recommended=4.1.3-1.1804.0
RUN apt-get install -yq --no-install-recommends r-base=4.1.3-1.1804.0
RUN apt-mark hold r-base r-recommended
# if ubuntu 22.04
# RUN apt-get install -yq --no-install-recommends r-base=4.1.2-1ubuntu2

RUN apt-get install -yq --no-install-recommends zlib1g-dev
RUN apt-get install -yq --no-install-recommends libbz2-dev
RUN apt-get install -yq --no-install-recommends liblzma-dev
Expand Down Expand Up @@ -57,11 +73,11 @@ RUN bash build/opt-build.sh $OPT
COPY . .
RUN bash build/opt-build-local.sh $OPT

FROM ubuntu:22.04
FROM ubuntu:18.04

LABEL maintainer="[email protected]" \
uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Trust Sanger Institute" \
version="1.0.0" \
version="1.0.1" \
description="nanoseq docker"

ENV DEBIAN_FRONTEND=noninteractive
Expand All @@ -70,19 +86,21 @@ RUN apt-get install -yq --no-install-recommends \
apt-transport-https \
locales \
curl \
wget \
make \
g++ \
gcc \
gfortran \
libblas-dev \
liblapack-dev \
ca-certificates \
time \
zlib1g \
libz-dev \
python3 \
r-base \
r-cran-ggplot2 \
r-cran-data.table \
r-cran-epitools \
r-cran-gridextra \
r-cran-seqinr \
libxml2 \
libgsl27 \
libperl5.34 \
libgsl23 \
libperl5.26 \
libcapture-tiny-perl \
libfile-which-perl \
libpng16-16 \
Expand All @@ -92,6 +110,18 @@ unattended-upgrade -d -v && \
apt-get remove -yq unattended-upgrades && \
apt-get autoremove -yq

RUN apt install -yq --no-install-recommends software-properties-common dirmngr
RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
RUN add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
RUN apt-get install -yq --no-install-recommends r-base-core=4.1.3-1.1804.0
RUN apt-mark hold r-base-core
RUN apt-get install -yq --no-install-recommends r-cran-mass=7.3-51.5-2bionic0 r-cran-class=7.3-16-1bionic0 r-cran-nnet=7.3-13-1bionic0
RUN apt-get install -yq --no-install-recommends r-recommended=4.1.3-1.1804.0
RUN apt-get install -yq --no-install-recommends r-base=4.1.3-1.1804.0
RUN apt-mark hold r-base r-recommended
ADD build/libInstall2.R build/
RUN Rscript build/libInstall2.R

RUN locale-gen en_US.UTF-8
RUN update-locale LANG=en_US.UTF-8

Expand Down
31 changes: 28 additions & 3 deletions R/snv_merge_and_vaf_calc.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ if (length(grep("\\.gz", muts_vcf)) > 0) {
} else {
num_snvs = system(paste("grep -cv \"^#\" ", muts_vcf, "|| true"), intern = TRUE)
}
num_snvs = as.integer(num_snvs)

num_indels = 0
if (indel_vcf != '-') {
Expand All @@ -96,6 +97,7 @@ if (indel_vcf != '-') {
num_indels = system(paste("grep -cv \"^#\" ", indel_vcf, "|| true"), intern = TRUE)
}
}
num_indels = as.integer(num_indels)

col_t = c("character","numeric","character","character","character","character","character")
if (num_snvs == 0) {
Expand Down Expand Up @@ -245,8 +247,8 @@ if (num_snvs > 0) {
snvs_new2[new_row, "BEND"] = paste(snvs_tmp[, "BEND"], collapse = ",")
snvs_new2[new_row, "TRI"] = snvs_tmp[1, "TRI"]
snvs_new2[new_row, "QPOS"] = paste(snvs_tmp[, "QPOS"], collapse = ",")
snvs_new2[new_row, "DEPTH_FWD"] = median(snvs_tmp[, "DEPTH_FWD"])
snvs_new2[new_row, "DEPTH_REV"] = median(snvs_tmp[, "DEPTH_REV"])
snvs_new2[new_row, "DEPTH_FWD"] = median(as.numeric(snvs_tmp[, "DEPTH_FWD"]))
snvs_new2[new_row, "DEPTH_REV"] = median(as.numeric(snvs_tmp[, "DEPTH_REV"]))
snvs_new2[new_row, "DEPTH_NORM_FWD"] = snvs_tmp[1, "DEPTH_NORM_FWD"]
snvs_new2[new_row, "DEPTH_NORM_REV"] = snvs_tmp[1, "DEPTH_NORM_REV"]
snvs_new2[new_row, "TIMES_CALLED"] = freq
Expand Down Expand Up @@ -370,13 +372,26 @@ if (num_indels > 0) {
indels_new[new_row, "MQ"] = indels_tmp[1, "MQ"]
indels_new[new_row, "DP4"] = indels_tmp[1, "DP4"]
indels_new[new_row, "mut_id"] = indels_tmp[1, "mut_id"]

indels_new[new_row, "BBEG"] = paste(indels_tmp[, "BBEG"], collapse = ",")
indels_new[new_row, "BEND"] = paste(indels_tmp[, "BEND"], collapse = ",")
indels_new[new_row, "QPOS"] = paste(indels_tmp[, "QPOS"], collapse = ",")
indels_new[new_row, "DEPTH_FWD"] = median(as.numeric(indels_tmp[, "DEPTH_FWD"]))
indels_new[new_row, "DEPTH_REV"] = median(as.numeric(indels_tmp[, "DEPTH_REV"]))
indels_new[new_row, "DEPTH_NORM_FWD"] = indels_tmp[1, "DEPTH_NORM_FWD"]
indels_new[new_row, "DEPTH_NORM_REV"] = indels_tmp[1, "DEPTH_NORM_REV"]
indels_new[new_row, "DPLX_ASXS"] = paste(indels_tmp[, "DPLX_ASXS"], collapse = ",")
indels_new[new_row, "DPLX_CLIP"] = paste(indels_tmp[, "DPLX_CLIP"], collapse = ",")
indels_new[new_row, "DPLX_NM"] = paste(indels_tmp[, "DPLX_NM"], collapse = ",")
indels_new[new_row, "BULK_ASXS"] = paste(indels_tmp[, "BULK_ASXS"], collapse = ",")
indels_new[new_row, "BULK_NM"] = paste(indels_tmp[, "BULK_NM"], collapse = ",")
}
}

indels_new = indels_new[order(indels_new$chr, indels_new$pos),]

# drop some columns:
indels_new = indels_new[, c("chr", "pos", "kk", "ref", "mut", "qual", "filter", "rb_id", "TYPE", "TIMES_CALLED", "SEQ")]
indels_new = indels_new[, c("chr", "pos", "kk", "ref", "mut", "qual", "filter", "rb_id", "TYPE", "TIMES_CALLED", "SEQ","BBEG","BEND","QPOS","DEPTH_FWD","DEPTH_REV","DEPTH_NORM_FWD","DEPTH_NORM_REV","DPLX_ASXS","DPLX_CLIP","DPLX_NM","BULK_ASXS","BULK_NM")]

##########################################################################################
# Calculate VAFs for indels
Expand Down Expand Up @@ -468,13 +483,23 @@ if (num_indels > 0) {
indels_final$INFO = paste(indels_final$INFO, rep("DUPLEX_VAF=", nrow(indels_final)), indels_final$DUPLEX_VAF, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BAM_VAF=", nrow(indels_final)), indels_final$BAM_VAF, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BAM_VAF_BQ10=", nrow(indels_final)), indels_final$BAM_VAF_BQ10, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DEPTH_NORM_FWD=", nrow(indels_final)), indels_final$DEPTH_NORM_FWD, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DEPTH_NORM_REV=", nrow(indels_final)), indels_final$DEPTH_NORM_REV, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DEPTH_FWD=", nrow(indels_final)), indels_final$DEPTH_FWD, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DEPTH_REV=", nrow(indels_final)), indels_final$DEPTH_REV, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("SEQ=", nrow(indels_final)), indels_final$SEQ, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DUPLEX_COV=", nrow(indels_final)), indels_final$DUPLEX_COV, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BAM_MUT=", nrow(indels_final)), indels_final$BAM_MUT, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BAM_COV=", nrow(indels_final)), indels_final$BAM_COV, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BAM_MUT_BQ10=", nrow(indels_final)), indels_final$BAM_MUT_BQ10, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BAM_COV_BQ10=", nrow(indels_final)), indels_final$BAM_COV_BQ10, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("RB=", nrow(indels_final)), indels_final$rb_id, "", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("QPOS=", nrow(indels_final)), indels_final$QPOS, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DPLX_ASXS=", nrow(indels_final)), indels_final$DPLX_ASXS, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DPLX_CLIP=", nrow(indels_final)), indels_final$DPLX_CLIP, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("DPLX_NM=", nrow(indels_final)), indels_final$DPLX_NM, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BULK_ASXS=", nrow(indels_final)), indels_final$BULK_ASXS, ";", sep = "")
indels_final$INFO = paste(indels_final$INFO, rep("BULK_NM=", nrow(indels_final)), indels_final$BULK_NM, "", sep = "")
}

# Describe in header:
Expand Down
35 changes: 35 additions & 0 deletions build/libInstall2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env Rscript

########## LICENCE ##########
# Copyright (c) 2022 Genome Research Ltd
#
# Author: CASM/Cancer IT <[email protected]>
#
# This file is part of NanoSeq.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# 1. The usage of a range of years within a copyright statement contained within
# this distribution should be interpreted as being equivalent to a list of years
# including the first and last year specified and all consecutive years between
# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
# 2009, 2011-2012’ should be interpreted as being identical to a statement that
# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
# 2009, 2010, 2011, 2012’.
###########################

#install R packages
install.packages(c("ggplot2", "data.table", "epitools", "gridExtra", "seqinr"))
6 changes: 4 additions & 2 deletions build/opt-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,10 @@ else
get_distro "libdeflate" "https://github.com/ebiggers/libdeflate/archive/$VER_LIBDEFLATE.tar.gz"
tar --strip-components 1 -C libdeflate -zxf libdeflate.tar.gz
cd libdeflate
make -j$CPU CFLAGS="-fPIC -O3" libdeflate.a
PREFIX=$INST_PATH make install
cmake -B build
cmake --build build
cmake --install build
cmake --install build --prefix $INST_PATH
cd $SETUP_DIR
rm -r libdeflate.tar.gz
touch $SETUP_DIR/libdeflate.success
Expand Down
17 changes: 14 additions & 3 deletions perl/indelCaller_step1.pl
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@
my $bulk_rev = $bulkReverseTotal;
next if($bulk_fwd + $bulk_rev < $bulk_min_cov); # Bulk minimum coverage
next if($dplxCLIP > $max_clip);
next if($dplxNM > 20); # made very liberal to allow long indels. Check the impact!
next if($dplxASXS < $min_asxs | $bulkASXS < $min_asxs ); #fa8: fixed bug, we needed to check AS-XS for the bulk too
next if($dplxNM > 20); # made very liberal to allow long indels. Check the impact! --> it seems is working, good
next if($dplxASXS < $min_asxs || $bulkASXS < $min_asxs ); #fa8: fixed bug, we needed to check AS-XS for the bulk too
# Fixed but: from bitwise OR to logical OR (ainsss)

if($r1 >= $min_size_subfam && $r2 >= $min_size_subfam) {
my $bulktotal = $bulkForwardTotal+$bulkReverseTotal;
Expand Down Expand Up @@ -152,7 +153,17 @@
$signature_trinuc = &reverse_signature($signature_trinuc);
}
$site_tags .= "$signature_trinuc;SW=$shearwater;cSNP=$commonSNP";

$site_tags .= ";BBEG=$dplxBreakpointBeg";
$site_tags .= ";BEND=$dplxBreakpointEnd";
$site_tags .= ";DEPTH_FWD=$r1";
$site_tags .= ";DEPTH_REV=$r2";
$site_tags .= ";DEPTH_NORM_FWD=$bulkForwardTotal";
$site_tags .= ";DEPTH_NORM_REV=$bulkReverseTotal";
$site_tags .= ";DPLX_ASXS=$dplxASXS";
$site_tags .= ";DPLX_CLIP=$dplxCLIP";
$site_tags .= ";DPLX_NM=$dplxNM";
$site_tags .= ";BULK_ASXS=$bulkASXS";
$site_tags .= ";BULK_NM=$bulkNM";
# If seen in the bulk, flag it:
if($bulkForwardIndel+$bulkReverseIndel > $max_vaf * $bulktotal) {
$site_tags .= ";BULK_SEEN($dplxForwardIndel+$dplxReverseIndel/$bulktotal)";
Expand Down
Loading

0 comments on commit a05afe2

Please sign in to comment.