diff --git a/.circleci/config.yml b/.circleci/config.yml
index fbc8d7b6..6da56726 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -2,7 +2,7 @@ version: 2
jobs:
build:
docker:
- - image: rocker/verse:4.2
+ - image: rocker/verse:4.3
environment:
_R_CHECK_FORCE_SUGGESTS_: false
steps:
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 8d242c1b..8c7b2c23 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -4,6 +4,7 @@ name: Publish Docker image
on:
release:
types: [published]
+ workflow_dispatch:
# push:
# branches: [main, master]
@@ -16,22 +17,22 @@ jobs:
uses: actions/checkout@v3
- name: Log in to Docker Hub
- uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
+ uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Extract metadata (tags, labels) for Docker
id: meta
- uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
+ uses: docker/metadata-action@v5
with:
images: pkharchenkolab/numbat-rbase
- name: Build and push Docker image
- uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
+ uses: docker/build-push-action@v5
with:
context: .
file: docker/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
- labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
+ labels: ${{ steps.meta.outputs.labels }}
diff --git a/DESCRIPTION b/DESCRIPTION
index 317e621c..93103446 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: numbat
Title: Haplotype-Aware CNV Analysis from scRNA-Seq
URL: https://github.com/kharchenkolab/numbat/, https://kharchenkolab.github.io/numbat/
-Version: 1.3.4
+Version: 1.4.0
Authors@R: c(person("Teng","Gao", email="tgaoteng@gmail.com", role=c("cre", "aut")), person("Ruslan", "Soldatov", email="soldatr@mskcc.org", role="aut"), person("Hirak", "Sarkar", email="hirak_sarkar@hms.harvard.edu", role="aut"), person("Evan", "Biederstedt", email="evan.biederstedt@gmail.com", role="aut"), person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut"))
Description: A computational method that infers copy number variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor phylogeny. 'numbat' integrates signals from gene expression, allelic ratio, and population haplotype structures to accurately infer allele-specific CNVs in single cells and reconstruct their lineage relationship. 'numbat' can be used to: 1. detect allele-specific copy number variations from single-cells; 2. differentiate tumor versus normal cells in the tumor microenvironment; 3. infer the clonal architecture and evolutionary history of profiled tumors. 'numbat' does not require tumor/normal-paired DNA or genotype data, but operates solely on the donor scRNA-data data (for example, 10x Cell Ranger output). Additional examples and documentations are available at . For details on the method please see Gao et al. Nature Biotechnology (2022) .
License: MIT + file LICENSE
@@ -21,6 +21,7 @@ Imports:
ggraph,
ggtree,
glue,
+ hahmmr,
igraph,
IRanges,
logger,
diff --git a/NAMESPACE b/NAMESPACE
index e968827c..d075cc39 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -32,6 +32,16 @@ importFrom(data.table,fread)
importFrom(data.table,fwrite)
importFrom(ggtree,"%<+%")
importFrom(grDevices,colorRampPalette)
+importFrom(hahmmr,dbbinom)
+importFrom(hahmmr,dpoilog)
+importFrom(hahmmr,fit_lnpois_cpp)
+importFrom(hahmmr,forward_back_allele)
+importFrom(hahmmr,l_bbinom)
+importFrom(hahmmr,l_lnpois)
+importFrom(hahmmr,likelihood_allele)
+importFrom(hahmmr,logSumExp)
+importFrom(hahmmr,run_allele_hmm_s5)
+importFrom(hahmmr,run_joint_hmm_s15)
importFrom(igraph,"E<-")
importFrom(igraph,"V<-")
importFrom(igraph,E)
@@ -74,4 +84,4 @@ importFrom(stats,setNames)
importFrom(stats,start)
importFrom(stats,t.test)
importFrom(utils,combn)
-useDynLib(numbat)
+useDynLib(numbat, .registration=TRUE)
diff --git a/NEWS.md b/NEWS.md
index 16a7411a..541f956a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,13 @@
+# numbat 1.4.0 - 02/23/2023
+
+* Integration with hahmmr
+
+* Better input checking for pileup_and_phase
+
+* Fix compatibility with igraph v2.0+ and tidygraph v1.3+ (#150)
+
+* Fix multiallelic CNV state probability reporting (#146)
+
# numbat 1.3.3 - 08/15/2023
* Fix plotting issue #135
diff --git a/R/RcppExports.R b/R/RcppExports.R
index de4ce30d..354af170 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,43 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
-cppdbbinom <- function(x, size, alpha, beta, log_prob = FALSE) {
- .Call('_numbat_cppdbbinom', PACKAGE = 'numbat', x, size, alpha, beta, log_prob)
-}
-
-cpp_dgpois <- function(x, alpha, beta, log_prob = FALSE) {
- .Call('_numbat_cpp_dgpois', PACKAGE = 'numbat', x, alpha, beta, log_prob)
-}
-
-logSumExp <- function(x) {
- .Call('_numbat_logSumExp', PACKAGE = 'numbat', x)
-}
-
-likelihood_compute <- function(logphi, logprob, logPi, n, m) {
- .Call('_numbat_likelihood_compute', PACKAGE = 'numbat', logphi, logprob, logPi, n, m)
-}
-
-forward_backward_compute <- function(logphi, logprob, logPi, n, m) {
- .Call('_numbat_forward_backward_compute', PACKAGE = 'numbat', logphi, logprob, logPi, n, m)
-}
-
-viterbi_compute <- function(log_delta, logprob, logPi, n, m, nu, z) {
- .Call('_numbat_viterbi_compute', PACKAGE = 'numbat', log_delta, logprob, logPi, n, m, nu, z)
-}
-
roman2int_internal <- function(letters, nchar) {
- .Call('_numbat_roman2int_internal', PACKAGE = 'numbat', letters, nchar)
-}
-
-fit_lnpois_cpp <- function(Y_obs, lambda_ref, d) {
- .Call('_numbat_fit_lnpois_cpp', PACKAGE = 'numbat', Y_obs, lambda_ref, d)
-}
-
-poilog1 <- function(x, my, sig) {
- .Call('_numbat_poilog1', PACKAGE = 'numbat', x, my, sig)
-}
-
-l_lnpois_cpp <- function(Y_obs, lambda_ref, d, mu, sig, phi = 1.0) {
- .Call('_numbat_l_lnpois_cpp', PACKAGE = 'numbat', Y_obs, lambda_ref, d, mu, sig, phi)
+ .Call(`_numbat_roman2int_internal`, letters, nchar)
}
diff --git a/R/diagnostics.R b/R/diagnostics.R
index 0e7cd58a..ea127ba2 100644
--- a/R/diagnostics.R
+++ b/R/diagnostics.R
@@ -149,6 +149,12 @@ check_exp_ref = function(lambdas_ref) {
if (!is.matrix(lambdas_ref)) {
lambdas_ref = as.matrix(lambdas_ref) %>% magrittr::set_colnames('ref')
}
+
+ if (any(is.na(lambdas_ref))) {
+ msg = "The reference expression matrix 'lambdas_ref' should not contain any NA values."
+ log_error(msg)
+ stop(msg)
+ }
# check if all entries in the reference profile are integers
if (all(lambdas_ref == as.integer(lambdas_ref))) {
diff --git a/R/genotyping.R b/R/genotyping.R
index 1f773b89..9827ab2b 100644
--- a/R/genotyping.R
+++ b/R/genotyping.R
@@ -211,9 +211,6 @@ preprocess_allele = function(
# phased VCF
vcf_phased = vcf_phased %>%
- mutate(INFO = str_remove_all(INFO, '[:alpha:]|=')) %>%
- tidyr::separate(col = 'INFO', into = c('AD', 'DP', 'OTH'), sep = ';') %>%
- mutate_at(c('AD', 'DP', 'OTH'), as.integer) %>%
mutate(snp_id = paste(CHROM, POS, REF, ALT, sep = '_')) %>%
mutate(GT = get(sample))
diff --git a/R/hmm.R b/R/hmm.R
index 596c1f16..6cdd5d22 100644
--- a/R/hmm.R
+++ b/R/hmm.R
@@ -1,62 +1,8 @@
-############ Probability distributions ############
-
-#'@useDynLib numbat
#'@import Rcpp
NULL
-## refer to https://github.com/evanbiederstedt/poilogcpp
-
-#' Returns the density for the Poisson lognormal distribution with parameters mu and sig
-#'
-#' @param x vector of integers, the observations
-#' @param mu mean of lognormal distribution
-#' @param sig standard deviation of lognormal distribution
-#' @param log boolean Return the log density if TRUE (default=FALSE)
-#' @return NULL
-#' @keywords internal
-dpoilog <- function(x, mu, sig, log=FALSE){
- if (!(length(x) == length(mu) & length(x) == length(sig))) stop('dpoilog: All parameters must be same length')
- if (any((x[x!=0]/trunc(x[x!=0]))!=1)) stop('dpoilog: all x must be integers')
- if (any(x<0)) stop('dpoilog: one or several values of x are negative')
- if (!all(is.finite(c(mu,sig)))) {
- stop('dpoilog: all parameters should be finite')
- }
- if (any(is.na(c(x,mu,sig)))) stop('dpoilog: Parameters cannot be NA')
- if (any(sig<=0)) {
- stop(c('dpoilog: sig is not larger than 0', unique(sig)))
- }
-
- p = poilog1(x=as.integer(x), my=as.double(mu), sig=as.double(sig^2))
-
- p[p == 0] = 1e-15
-
- if (log) {
- return(log(p))
- } else {
- return(p)
- }
-}
-
-#' Beta-binomial distribution density function
-#' A distribution is beta-binomial if p, the probability of success,
-#' in a binomial distribution has a beta distribution with shape
-#' parameters alpha > 0 and beta > 0
-#' For more details, see extraDistr::dbbinom
-#'
-#' @param x vector of quantiles
-#' @param size number of trials (zero or more)
-#' @param alpha numeric (default=1)
-#' @param beta numeric (default=1)
-#' @param log boolean (default=FALSE)
-#' @return density values returned as numeric vector
-#' @keywords internal
-dbbinom <- function(x, size, alpha = 1, beta = 1, log = FALSE) {
- cppdbbinom(x, size, alpha, beta, log[1L])
-}
-
############ Allele HMMs ############
-
#' Get an allele HMM
#' @param pAD integer vector Paternal allele counts
#' @param DP integer vector Total alelle counts
@@ -100,162 +46,6 @@ get_allele_hmm = function(pAD, DP, p_s, theta, gamma = 20) {
return(hmm)
}
-#' Viterbi algorithm for allele HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth),
-#' logPi (log transition prob matrix), delta (prior for each state),
-#' alpha (alpha for each state), beta (beta for each state),
-#' states (states), p_s (phase switch probs)
-#' @keywords internal
-viterbi_allele <- function(hmm) {
-
- N <- hmm$N
- M <- hmm$M
- nu <- matrix(NA, nrow = N, ncol = M)
- z <- rep(NA, N)
-
- logprob = sapply(1:M, function(m) {
-
- l_x = dbbinom(x = hmm$x, size = hmm$d, alpha = hmm$alpha[,m], beta = hmm$beta[,m], log = TRUE)
-
- l_x[is.na(l_x)] = 0
-
- return(l_x)
-
- })
-
- z = viterbi_compute(log(hmm$delta), logprob, hmm$logPi, N, M, nu, z)
-
- return(z)
-}
-
-#' Forward-backward algorithm for allele HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth),
-#' logPi (log transition prob matrix), delta (prior for each state),
-#' alpha (alpha for each state), beta (beta for each state),
-#' states (states), p_s (phase switch probs)
-#' @keywords internal
-forward_back_allele = function(hmm) {
-
- # case of one-data point
- if (hmm$N == 1) {
- return(NA)
- }
-
- N <- hmm$N
- M <- hmm$M
-
- logprob = sapply(1:M, function(m) {
-
- l_x = dbbinom(x = hmm$x, size = hmm$d, alpha = hmm$alpha[,m], beta = hmm$beta[,m], log = TRUE)
-
- l_x[is.na(l_x)] = 0
-
- return(l_x)
-
- })
-
- logphi <- log(hmm$delta)
-
- marginals = forward_backward_compute(logphi, logprob, hmm$logPi, N, M)
-
- colnames(marginals) = hmm$states
-
- return(marginals)
-}
-
-#' Only compute total log likelihood from an allele HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth),
-#' logPi (log transition prob matrix), delta (prior for each state),
-#' alpha (alpha for each state), beta (beta for each state),
-#' states (states), p_s (phase switch probs)
-#' @keywords internal
-likelihood_allele = function(hmm) {
-
- N <- hmm$N
- M <- hmm$M
-
- logprob = sapply(1:M, function(m) {
-
- l_x = dbbinom(x = hmm$x, size = hmm$d, alpha = hmm$alpha[,m], beta = hmm$beta[,m], log = TRUE)
-
- l_x[is.na(l_x)] = 0
-
- return(l_x)
-
- })
-
- logphi <- log(hmm$delta)
-
- LL <- likelihood_compute(logphi, logprob, hmm$logPi, N, M)
-
- return(LL)
-}
-
-#' allele-only HMM
-#' @param pAD integer vector Paternal allele counts
-#' @param DP integer vector Total alelle counts
-#' @param p_s numeric vector Phase switch probabilities
-#' @param t numeric Transition probability between copy number states
-#' @param theta_min numeric Minimum haplotype frequency deviation threshold
-#' @param gamma numeric Overdispersion in the allele-specific expression
-#' @return character vector Decoded states
-#' @keywords internal
-run_allele_hmm = function(pAD, DP, p_s, t = 1e-5, theta_min = 0.08, gamma = 20, prior = NULL) {
-
- gamma = unique(gamma)
-
- if (length(gamma) > 1) {
- stop('More than one gamma parameter')
- }
-
- # states
- states = c("neu", "theta_1_up", "theta_1_down", "theta_2_up", "theta_2_down")
-
- N = length(pAD)
- M = 5
-
- # transition matrices
- calc_trans_mat = function(p_s, t) {
- matrix(
- c(1-t, t/4, t/4, t/4, t/4,
- t/2, (1-t)*(1-p_s), (1-t)*p_s, t/4, t/4,
- t/2, (1-t)*p_s, (1-t)*(1-p_s), t/4, t/4,
- t/2, t/4, t/4, (1-t)*(1-p_s), (1-t)*p_s,
- t/2, t/4, t/4, (1-t)*p_s, (1-t)*(1-p_s)),
- ncol = 5,
- byrow = TRUE
- )
- }
-
- Pi = sapply(p_s, function(p_s) {calc_trans_mat(p_s, t)}) %>%
- array(dim = c(M, M, N))
-
- # intitial probabilities
- if (is.null(prior)) {
- prior = rep(1/5, 5)
- }
-
- theta_1 = theta_min
- theta_2 = 0.4
- alphas = gamma * c(0.5, 0.5 + theta_1, 0.5 - theta_1, 0.5 + theta_2, 0.5 - theta_2)
- betas = gamma * c(0.5, 0.5 - theta_1, 0.5 + theta_1, 0.5 - theta_2, 0.5 + theta_2)
-
- hmm = list(
- x = pAD,
- logPi = log(Pi),
- delta = prior,
- alpha = matrix(rep(alphas, N), ncol = M, byrow = TRUE),
- beta = matrix(rep(betas, N), ncol = M, byrow = TRUE),
- d = DP,
- N = N,
- M = M,
- states = states
- )
-
- mpc = states[viterbi_allele(hmm)]
-
- return(mpc)
-}
#' Calculate allele likelihoods
#' @param pAD integer vector Paternal allele counts
@@ -270,260 +60,6 @@ calc_allele_lik = function (pAD, DP, p_s, theta, gamma = 20) {
return(LL)
}
-############ Joint HMM ############
-
-
-#' Run joint HMM on a pseudobulk profile
-#' @param pAD integer vector Paternal allele counts
-#' @param DP integer vector Total alelle counts
-#' @param p_s numeric vector Phase switch probabilities
-#' @param theta_min numeric Minimum haplotype imbalance threshold
-#' @param gamma numeric Overdispersion in the allele-specific expression
-#' @param Y_obs numeric vector Observed gene counts
-#' @param lambda_ref numeric vector Reference expression rates
-#' @param d_total integer Total library size for expression counts
-#' @param phi_del numeric Expected fold change for deletion
-#' @param phi_amp numeric Expected fold change for amplification
-#' @param phi_bamp numeric Expected fold change for balanced amplification
-#' @param phi_bdel numeric Expected fold change for balanced deletion
-#' @param mu numeric Global expression bias
-#' @param sig numeric Global expression variance
-#' @param t numeric Transition probability between copy number states
-#' @param exp_only logical Whether to only use expression data
-#' @param allele_only logical Whether to only use allele data
-#' @keywords internal
-run_joint_hmm = function(
- pAD, DP, p_s, Y_obs = 0, lambda_ref = 0, d_total = 0, theta_min = 0.08, theta_neu = 0,
- bal_cnv = TRUE, phi_del = 2^(-0.25), phi_amp = 2^(0.25), phi_bamp = phi_amp, phi_bdel = phi_del,
- alpha = 1, beta = 1,
- mu = 0, sig = 1,
- t = 1e-5, gamma = 18,
- prior = NULL, exp_only = FALSE, allele_only = FALSE,
- classify_allele = FALSE, phasing = TRUE, debug = FALSE
-) {
-
- # states
- states = c(
- "1" = "neu", "2" = "del_1_up", "3" = "del_1_down", "4" = "del_2_up", "5" = "del_2_down",
- "6" = "loh_1_up", "7" = "loh_1_down", "8" = "loh_2_up", "9" = "loh_2_down",
- "10" = "amp_1_up", "11" = "amp_1_down", "12" = "amp_2_up", "13" = "amp_2_down",
- "14" = "bamp", "15" = "bdel"
- )
-
- states_cn = str_remove(states, '_up|_down')
- states_phase = str_extract(states, 'up|down')
-
- # relative abundance of states
- w = c('neu' = 1, 'del_1' = 1, 'del_2' = 1e-10, 'loh_1' = 1, 'loh_2' = 1e-10, 'amp_1' = 1, 'amp_2' = 1e-10, 'bamp' = 1e-4, 'bdel' = 1e-10)
-
- # intitial probabilities
- if (is.null(prior)) {
- # encourage CNV from telomeres
- prior = sapply(1:length(states), function(to){
- get_trans_probs(
- t = min(t * 100, 1), p_s = 0, w,
- cn_from = 'neu', phase_from = NA,
- cn_to = states_cn[to], phase_to = states_phase[to])
- })
- }
-
- # to do: renormalize the probabilities after deleting states
- states_index = 1:length(states)
-
- if (!bal_cnv) {
- states_index = 1:13
- }
-
- if (exp_only) {
- pAD = rep(NA, length(pAD))
- p_s = rep(0, length(p_s))
- }
-
- if (allele_only) {
- states_index = c(1, 6:9)
-
- Y_obs = rep(NA, length(Y_obs))
- }
-
- if (!phasing) {
- states_index = c(1, 6)
-
- p_s = ifelse(is.na(pAD), p_s, 0)
- pAD = ifelse(pAD > (DP - pAD), pAD, DP - pAD)
- theta_neu = 0.1
- theta_min = 0.45
- }
-
- if (classify_allele) {
- states_index = c(6,7)
- }
-
- # transition matrices
- As = calc_trans_mat(t, p_s, w, states_cn, states_phase)
-
- theta_u_1 = 0.5 + theta_min
- theta_d_1 = 0.5 - theta_min
-
- theta_u_2 = 0.9
- theta_d_2 = 0.1
-
- theta_u_neu = 0.5 + theta_neu
- theta_d_neu = 0.5 - theta_neu
-
- # parameters for each state
- alpha_states = gamma * c(theta_u_neu, rep(c(theta_u_1, theta_d_1, theta_u_2, theta_d_2), 3), theta_u_neu, theta_u_neu)
- beta_states = gamma * c(theta_d_neu, rep(c(theta_d_1, theta_u_1, theta_d_2, theta_u_2), 3), theta_d_neu, theta_d_neu)
- phi_states = c(1, rep(phi_del, 2), rep(0.5, 2), rep(1, 4), rep(phi_amp, 2), rep(2.5, 2), phi_bamp, phi_bdel)
-
- # subset for relevant states
- prior = prior[states_index]
- As = As[states_index, states_index,]
- alpha_states = alpha_states[states_index]
- beta_states = beta_states[states_index]
- phi_states = phi_states[states_index]
- states = states[states_index] %>% setNames(1:length(.))
-
- N = length(Y_obs)
-
- if (length(mu) == 1) {
- mu = rep(mu, N)
- sig = rep(sig, N)
- }
-
- if (length(d_total) == 1) {
- d_total = rep(d_total, N)
- }
-
- hmm = list(
- x = matrix(pAD),
- d = matrix(DP),
- y = matrix(Y_obs),
- l = matrix(d_total),
- lambda = matrix(lambda_ref),
- mu = matrix(mu),
- sig = matrix(sig),
- logPi = log(As),
- phi = matrix(phi_states),
- delta = matrix(prior),
- alpha = matrix(alpha_states),
- beta = matrix(beta_states),
- states = matrix(states),
- p_s = p_s
- )
-
- MPC = states[viterbi_joint(hmm)]
-
- return(MPC)
-}
-
-
-#' Calculate the transition matrix for joint HMM
-#' @param t numeric; CNV state transition probability
-#' @param p_s numeric; phase switch probability
-#' @param w numeric; relative abundance of states
-#' @param states_cn character; CNV states
-#' @param states_phase character; haplotype phase states
-#' @return array; transition matrix
-#' @keywords internal
-calc_trans_mat = function(t, p_s, w, states_cn, states_phase) {
-
- sapply(1:length(states_cn), function(from) {
- sapply(1:length(states_cn), function(to) {
- get_trans_probs(t, p_s, w, states_cn[from], states_phase[from], states_cn[to], states_phase[to])
- }) %>% t
- }) %>% t %>%
- array(dim = c(length(states_cn), length(states_cn), length(p_s)))
-
-}
-
-
-#' Helper function to calculate transition porbabilities
-#' cn/phase are sclars, only p_s is vectorized
-#' @param t numeric; CNV state transition probability
-#' @param p_s numeric; phase switch probability
-#' @param w numeric; relative abundance of states
-#' @param cn_from character; originating CNV state
-#' @param phase_from character; originating haplotype phase state
-#' @param cn_to character; destination CNV state
-#' @param phase_to character; destination haplotype phase state
-#' @return numeric; transition probability
-#' @keywords internal
-get_trans_probs = function(t, p_s, w, cn_from, phase_from, cn_to, phase_to) {
-
- if (cn_from == 'neu' & cn_to == 'neu') {
- p_s = rep(0.5, length(p_s))
- }
-
- if (cn_from == cn_to) {
- if (is.na(phase_from) & is.na(phase_to)) {
- p = 1-t
- p = rep(p, length(p_s))
- } else if (phase_from == phase_to) {
- p = (1-t) * (1-p_s)
- } else {
- p = (1-t) * p_s
- }
- } else {
- p = t * w[[cn_to]]/sum(w[names(w)!=cn_from])
- if (!is.na(phase_to)) {
- p = p/2
- }
- p = rep(p, length(p_s))
- }
-
- return(p)
-}
-
-#' Generalized viterbi algorithm for joint HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth), y (expression count), l (cell total library size),
-#' lambda (reference expression rate), mu (global expression mean), sig (global expression standard deviation),
-#' logPi (log transition prob matrix), phi (expression fold change for each state), delta (prior for each state),
-#' alpha (alpha for each state), beta (beta for each state), states (states), p_s (phase switch probs)
-#' @keywords internal
-viterbi_joint <- function(hmm) {
-
- N <- nrow(hmm$x)
- M <- nrow(hmm$logPi[,,1])
- K <- ncol(hmm$x)
- nu <- matrix(NA, nrow = N, ncol = M)
- z <- rep(NA, N)
-
- logprob = sapply(1:M, function(m) {
-
- sapply(
- 1:K,
- function(k) {
-
- l_x = dbbinom(x = hmm$x[,k], size = hmm$d[,k], alpha = hmm$alpha[m,k], beta = hmm$beta[m,k], log = TRUE)
-
- l_x[is.na(l_x)] = 0
-
- if (!is.null(hmm$y)) {
- l_y = rep(0,N)
- valid = !is.na(hmm$y[,k])
-
- l_y[valid] = dpoilog(
- x = hmm$y[valid,k],
- sig = hmm$sig[valid,k],
- mu = hmm$mu[valid,k] + log(hmm$phi[m,k] * hmm$l[valid,k] * hmm$lambda[valid,k]),
- log = TRUE
- )
- } else {
- l_y = 0
- }
-
- return(l_x + l_y)
-
- }
- ) %>% rowSums()
-
- })
-
- z = viterbi_compute(log(hmm$delta), logprob, hmm$logPi, N, M, nu, z)
-
- return(z)
-}
-
############ Clonal deletion HMM ############
#' Viterbi for clonal LOH detection
diff --git a/R/main.R b/R/main.R
index a5a8148c..38f78ffc 100644
--- a/R/main.R
+++ b/R/main.R
@@ -15,9 +15,10 @@
#' @import patchwork
#' @importFrom grDevices colorRampPalette
#' @importFrom stats as.dendrogram as.dist cor cutree dbinom dnbinom dnorm dpois end hclust integrate model.matrix na.omit optim p.adjust pnorm reorder rnorm setNames start t.test as.ts complete.cases is.leaf na.contiguous
+#' @importFrom hahmmr logSumExp dpoilog dbbinom l_lnpois l_bbinom fit_lnpois_cpp likelihood_allele forward_back_allele run_joint_hmm_s15 run_allele_hmm_s5
#' @import tibble
#' @importFrom utils combn
-#' @useDynLib numbat
+#' @useDynLib numbat, .registration=TRUE
NULL
#' Run workflow to decompose tumor subclones
@@ -133,8 +134,9 @@ run_numbat = function(
######### Log parameters #########
log_message(paste('\n',
- glue('Numbat version: ', as.character(utils::packageVersion("numbat"))),
- glue('Scistreer version: ', as.character(utils::packageVersion("scistreer"))),
+ glue('numbat version: ', as.character(utils::packageVersion("numbat"))),
+ glue('scistreer version: ', as.character(utils::packageVersion("scistreer"))),
+ glue('hahmmr version: ', as.character(utils::packageVersion("hahmmr"))),
'Running under parameters:',
glue('t = {t}'),
glue('alpha = {alpha}'),
diff --git a/R/utils.R b/R/utils.R
index 0b792af5..88fe112b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -558,7 +558,7 @@ analyze_bulk = function(
bulk = bulk %>%
group_by(CHROM) %>%
mutate(state =
- run_joint_hmm(
+ run_joint_hmm_s15(
pAD = pAD,
DP = DP,
p_s = p_s,
@@ -1128,7 +1128,7 @@ find_common_diploid = function(
bulk %>%
group_by(CHROM) %>%
mutate(state =
- run_allele_hmm(
+ run_allele_hmm_s5(
pAD = pAD,
DP = DP,
p_s = p_s,
@@ -1355,17 +1355,6 @@ get_segs_neu = function(bulks) {
return(segs_neu)
}
-#' calculate joint likelihood of allele data
-#' @param AD numeric vector Variant allele depth
-#' @param DP numeric vector Total allele depth
-#' @param alpha numeric Alpha parameter of Beta-Binomial distribution
-#' @param beta numeric Beta parameter of Beta-Binomial distribution
-#' @return numeric Joint log likelihood
-#' @keywords internal
-l_bbinom = function(AD, DP, alpha, beta) {
- sum(dbbinom(AD, DP, alpha, beta, log = TRUE))
-}
-
#' fit a Beta-Binomial model by maximum likelihood
#' @param AD numeric vector Variant allele depth
#' @param DP numeric vector Total allele depth
@@ -1407,63 +1396,6 @@ fit_gamma = function(AD, DP, start = 20) {
return(gamma)
}
-#' Density function for a gamma-poisson distribution
-#' @keywords internal
-dgpois <- function(x, shape, rate, scale = 1/rate, log = FALSE) {
- cpp_dgpois(x, shape, scale, log[1L])
-}
-
-
-#' calculate joint likelihood of a gamma-poisson model
-#' @param Y_obs numeric vector Gene expression counts
-#' @param lambda_ref numeric vector Reference expression levels
-#' @param d numeric Total library size
-#' @param alpha numeric Alpha parameter of Gamma-Poisson distribution
-#' @param beta numeric Beta parameter of Gamma-Poisson distribution
-#' @return numeric Joint log likelihood
-#' @keywords internal
-l_gpois = function(Y_obs, lambda_ref, d, alpha, beta, phi = 1) {
- sum(dgpois(Y_obs, shape = alpha, rate = beta/(phi * d * lambda_ref), log = TRUE))
-}
-
-#' fit a Gamma-Poisson model by maximum likelihood
-#' @param Y_obs numeric vector Gene expression counts
-#' @param lambda_ref numeric vector Reference expression levels
-#' @param d numeric Total library size
-#' @return numeric MLE of alpha and beta
-#' @keywords internal
-fit_gpois = function(Y_obs, lambda_ref, d) {
-
- Y_obs = Y_obs[lambda_ref > 0]
- lambda_ref = lambda_ref[lambda_ref > 0]
-
- fit = stats4::mle(
- minuslogl = function(alpha, beta) {
- -l_gpois(Y_obs, lambda_ref, d, alpha, beta)
- },
- start = c(1, 1),
- lower = c(0.01, 0.01),
- control = list('trace' = FALSE)
- )
-
- alpha = fit@coef[1]
- beta = fit@coef[2]
-
- return(fit)
-}
-
-#' calculate joint likelihood of a PLN model
-#' @param Y_obs numeric vector Gene expression counts
-#' @param lambda_ref numeric vector Reference expression levels
-#' @param d numeric Total library size
-#' @return numeric Joint log likelihood
-#' @keywords internal
-l_lnpois = function(Y_obs, lambda_ref, d, mu, sig, phi = 1) {
- if (any(sig <= 0)) {stop(glue('negative sigma. value: {sig}'))}
- if (length(sig) == 1) {sig = rep(sig, length(Y_obs))}
- sum(log(dpoilog(Y_obs, mu + log(phi * d * lambda_ref), sig)))
-}
-
#' fit a PLN model by maximum likelihood
#' @param Y_obs numeric vector Gene expression counts
#' @param lambda_ref numeric vector Reference expression levels
diff --git a/docker/Dockerfile b/docker/Dockerfile
index e54293e4..c620741f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,10 +1,9 @@
-FROM r-base:4.2.1
+FROM r-base:4.3.1
-LABEL authors="Evan Biederstedt , Teng Gao " \
+LABEL authors="Evan Biederstedt , Teng Gao " \
version.image="0.1.3" \
version.pagoda2="0.1.3" \
- description="r-base image R 4.2.1 to run numbat with R"
-
+ description="r-base image to run numbat with R"
RUN apt-get update --yes && apt-get install --yes build-essential \
libcurl4-gnutls-dev libxml2-dev libssl-dev libbz2-dev zlib1g-dev \
@@ -14,16 +13,33 @@ RUN apt-get update --yes && apt-get install --yes build-essential \
libglpk-dev git autoconf gettext libtool automake \
samtools sudo
-
RUN cd /usr/bin && \
wget https://github.com/samtools/htslib/releases/download/1.15.1/htslib-1.15.1.tar.bz2 && \
tar -vxjf htslib-1.15.1.tar.bz2 && cd htslib-1.15.1 && make && sudo make install
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("BiocManager")'
+
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("ragg")'
+
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("pkgdown")'
+
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("devtools")'
+
+RUN R -e 'devtools::install_github("YuLab-SMU/ggtree", dependencies=TRUE)'
+
+RUN R -e 'devtools::install_github("kharchenkolab/numbat", dependencies=TRUE)'
+
+RUN git clone https://github.com/kharchenkolab/numbat.git
+
+RUN mkdir -p /tmp && chmod 777 /tmp
+
+RUN chmod 777 /numbat/inst/bin/pileup_and_phase.R
+
RUN mkdir data
# hg 38
-RUN cd /data && wget https://sourceforge.net/projects/cellsnp/files/SNPlist/genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz && gzip -d genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz
+RUN cd /data && wget -q https://sourceforge.net/projects/cellsnp/files/SNPlist/genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz && gzip -d genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz
## Note: not run; the user can follow these commands if they want hg19
@@ -32,7 +48,7 @@ RUN cd /data && wget https://sourceforge.net/projects/cellsnp/files/SNPlist/geno
# hg38
-RUN cd /data && wget http://pklab.med.harvard.edu/teng/data/1000G_hg38.zip && unzip 1000G_hg38.zip
+RUN cd /data && wget -q http://pklab.med.harvard.edu/teng/data/1000G_hg38.zip && unzip 1000G_hg38.zip
# hg19
##RUN cd /data && wget http://pklab.med.harvard.edu/teng/data/1000G_hg19.zip && unzip 1000G_hg19.zip
@@ -42,26 +58,7 @@ RUN cd /data && wget http://pklab.med.harvard.edu/teng/data/1000G_hg38.zip && un
RUN git clone https://github.com/single-cell-genetics/cellsnp-lite.git && cd cellsnp-lite && \
autoreconf -iv && ./configure && make && sudo make install
-RUN wget https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/Eagle_v2.4.1.tar.gz && cd .. && tar -xvzf Eagle_v2.4.1.tar.gz && cd /Eagle_v2.4.1 && cp eagle /usr/bin
-
-
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("BiocManager")'
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("ragg")'
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("pkgdown")'
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("devtools")'
-
-
-RUN R -e 'devtools::install_github("kharchenkolab/numbat", dependencies=TRUE)'
-
-RUN git clone https://github.com/kharchenkolab/numbat.git
-
-RUN mkdir -p /tmp && chmod 777 /tmp
-
-RUN chmod 777 /numbat/inst/bin/pileup_and_phase.R
+RUN wget -q https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/Eagle_v2.4.1.tar.gz && cd .. && tar -xvzf Eagle_v2.4.1.tar.gz && cd /Eagle_v2.4.1 && cp eagle /usr/bin
WORKDIR /numbat
diff --git a/inst/bin/pileup_and_phase.R b/inst/bin/pileup_and_phase.R
index cbf16d7d..8bcceb0c 100644
--- a/inst/bin/pileup_and_phase.R
+++ b/inst/bin/pileup_and_phase.R
@@ -11,12 +11,12 @@ parser = add_option(parser, '--gmap', default = NULL, type = "character", help =
parser = add_option(parser, '--eagle', type = "character", default = 'eagle', help = "Path to Eagle2 binary file")
parser = add_option(parser, '--snpvcf', default = NULL, type = "character", help = "SNP VCF for pileup")
parser = add_option(parser, '--paneldir', default = NULL, type = "character", help = "Directory to phasing reference panel (BCF files)")
-parser = add_option(parser, '--outdir', default = './pileup_and_phase', type = "character", help = "Output directory")
+parser = add_option(parser, '--outdir', default = NULL, type = "character", help = "Output directory")
parser = add_option(parser, '--ncores', default = 1, type = "integer", help = "Number of cores")
parser = add_option(parser, '--UMItag', default = "Auto", type = "character", help = "UMI tag in bam; should be Auto for 10x and XM for Slide-seq")
parser = add_option(parser, '--cellTAG', default = "CB", type = "character", help = "Cell tag in bam; should be CB for 10x and XC for Slide-seq")
parser = add_option(parser, '--smartseq', default = FALSE, action = 'store_true', help = "Running with SMART-seq mode; Supply a txt file containing directories of BAM files to --bams and a txt file containing cell names to --barcodes (each entry on its own line for both; ordering must match).")
-parser = add_option(parser, '--bulk', default = FALSE, action = 'store_true', help = "Running with bulk RNA-seq mode")
+parser = add_option(parser, '--bulk', default = FALSE, action = 'store_true', help = "Running with bulk RNA-seq mode; supply --bams and --samples but not --barcodes.")
args <- parse_args(parser)
suppressPackageStartupMessages({
@@ -29,9 +29,25 @@ suppressPackageStartupMessages({
library(numbat)
})
-# required args
-if (any(is.null(c(args$bams, args$barcodes, args$gmap, args$snpvcf, args$paneldir)))) {
- stop('Missing one or more required arguments: bams, barcodes, gmap, snpvcf, paneldir')
+if (any(sapply(list(args$bams, args$snpvcf, args$outdir, args$paneldir, args$gmap), is.null))) {
+ stop('Missing one or more always required arguments: --bams, --snpvcf, --outdir, --paneldir, --gmap')
+}
+
+if (args$smartseq) {
+ if (any(sapply(list(args$barcodes), is.null))) {
+ stop('Missing one or more required arguments for smartseq mode: --barcodes')
+ }
+ mode = 'SMART-Seq'
+} else if (args$bulk) {
+ if (any(sapply(list(args$samples), is.null))) {
+ stop('Missing one or more required arguments for bulk mode: --samples')
+ }
+ mode = 'Bulk'
+} else {
+ if (any(sapply(list(args$samples, args$barcodes), is.null))) {
+ stop('Missing one or more required arguments for 10x mode: --samples, --barcodes')
+ }
+ mode = '10X'
}
label = args$label
@@ -52,8 +68,39 @@ cellTAG = args$cellTAG
smartseq = args$smartseq
bulk = args$bulk
genome = ifelse(str_detect(args$gmap, 'hg19'), 'hg19', 'hg38')
+
+message(paste0('Running in ', mode, ' mode'))
message(paste0('Using genome version: ', genome))
+## check if files exist
+for (bam in bams) {
+ if (!file.exists(bam)) {
+ stop('BAM file not found')
+ }
+}
+
+if (!file.exists(snpvcf)) {
+ stop('SNP VCF not found')
+}
+
+
+if (!is.null(barcodes)) {
+ for (barcode in barcodes) {
+ if (!file.exists(barcode)) {
+ stop('Barcode file not found')
+ }
+ }
+}
+
+if (!file.exists(gmap)) {
+ stop('Genetic map not found')
+}
+
+if (!file.exists(paneldir)) {
+ stop('Phasing reference panel not found')
+}
+
+
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
dir.create(glue('{outdir}/pileup'), showWarnings = FALSE)
dir.create(glue('{outdir}/phasing'), showWarnings = FALSE)
diff --git a/man/calc_trans_mat.Rd b/man/calc_trans_mat.Rd
deleted file mode 100644
index 5e322034..00000000
--- a/man/calc_trans_mat.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{calc_trans_mat}
-\alias{calc_trans_mat}
-\title{Calculate the transition matrix for joint HMM}
-\usage{
-calc_trans_mat(t, p_s, w, states_cn, states_phase)
-}
-\arguments{
-\item{t}{numeric; CNV state transition probability}
-
-\item{p_s}{numeric; phase switch probability}
-
-\item{w}{numeric; relative abundance of states}
-
-\item{states_cn}{character; CNV states}
-
-\item{states_phase}{character; haplotype phase states}
-}
-\value{
-array; transition matrix
-}
-\description{
-Calculate the transition matrix for joint HMM
-}
-\keyword{internal}
diff --git a/man/dbbinom.Rd b/man/dbbinom.Rd
deleted file mode 100644
index f8266053..00000000
--- a/man/dbbinom.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{dbbinom}
-\alias{dbbinom}
-\title{Beta-binomial distribution density function
-A distribution is beta-binomial if p, the probability of success,
-in a binomial distribution has a beta distribution with shape
-parameters alpha > 0 and beta > 0
-For more details, see extraDistr::dbbinom}
-\usage{
-dbbinom(x, size, alpha = 1, beta = 1, log = FALSE)
-}
-\arguments{
-\item{x}{vector of quantiles}
-
-\item{size}{number of trials (zero or more)}
-
-\item{alpha}{numeric (default=1)}
-
-\item{beta}{numeric (default=1)}
-
-\item{log}{boolean (default=FALSE)}
-}
-\value{
-density values returned as numeric vector
-}
-\description{
-Beta-binomial distribution density function
-A distribution is beta-binomial if p, the probability of success,
-in a binomial distribution has a beta distribution with shape
-parameters alpha > 0 and beta > 0
-For more details, see extraDistr::dbbinom
-}
-\keyword{internal}
diff --git a/man/dgpois.Rd b/man/dgpois.Rd
deleted file mode 100644
index 5c6a14f5..00000000
--- a/man/dgpois.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{dgpois}
-\alias{dgpois}
-\title{Density function for a gamma-poisson distribution}
-\usage{
-dgpois(x, shape, rate, scale = 1/rate, log = FALSE)
-}
-\description{
-Density function for a gamma-poisson distribution
-}
-\keyword{internal}
diff --git a/man/dpoilog.Rd b/man/dpoilog.Rd
deleted file mode 100644
index 816b5ec8..00000000
--- a/man/dpoilog.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{dpoilog}
-\alias{dpoilog}
-\title{Returns the density for the Poisson lognormal distribution with parameters mu and sig}
-\usage{
-dpoilog(x, mu, sig, log = FALSE)
-}
-\arguments{
-\item{x}{vector of integers, the observations}
-
-\item{mu}{mean of lognormal distribution}
-
-\item{sig}{standard deviation of lognormal distribution}
-
-\item{log}{boolean Return the log density if TRUE (default=FALSE)}
-}
-\description{
-Returns the density for the Poisson lognormal distribution with parameters mu and sig
-}
-\keyword{internal}
diff --git a/man/fit_gpois.Rd b/man/fit_gpois.Rd
deleted file mode 100644
index f7688725..00000000
--- a/man/fit_gpois.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{fit_gpois}
-\alias{fit_gpois}
-\title{fit a Gamma-Poisson model by maximum likelihood}
-\usage{
-fit_gpois(Y_obs, lambda_ref, d)
-}
-\arguments{
-\item{Y_obs}{numeric vector Gene expression counts}
-
-\item{lambda_ref}{numeric vector Reference expression levels}
-
-\item{d}{numeric Total library size}
-}
-\value{
-numeric MLE of alpha and beta
-}
-\description{
-fit a Gamma-Poisson model by maximum likelihood
-}
-\keyword{internal}
diff --git a/man/forward_back_allele.Rd b/man/forward_back_allele.Rd
deleted file mode 100644
index 2f03aa80..00000000
--- a/man/forward_back_allele.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{forward_back_allele}
-\alias{forward_back_allele}
-\title{Forward-backward algorithm for allele HMM}
-\usage{
-forward_back_allele(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth),
-logPi (log transition prob matrix), delta (prior for each state),
-alpha (alpha for each state), beta (beta for each state),
-states (states), p_s (phase switch probs)}
-}
-\description{
-Forward-backward algorithm for allele HMM
-}
-\keyword{internal}
diff --git a/man/get_trans_probs.Rd b/man/get_trans_probs.Rd
deleted file mode 100644
index 9e71ef52..00000000
--- a/man/get_trans_probs.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{get_trans_probs}
-\alias{get_trans_probs}
-\title{Helper function to calculate transition porbabilities
-cn/phase are sclars, only p_s is vectorized}
-\usage{
-get_trans_probs(t, p_s, w, cn_from, phase_from, cn_to, phase_to)
-}
-\arguments{
-\item{t}{numeric; CNV state transition probability}
-
-\item{p_s}{numeric; phase switch probability}
-
-\item{w}{numeric; relative abundance of states}
-
-\item{cn_from}{character; originating CNV state}
-
-\item{phase_from}{character; originating haplotype phase state}
-
-\item{cn_to}{character; destination CNV state}
-
-\item{phase_to}{character; destination haplotype phase state}
-}
-\value{
-numeric; transition probability
-}
-\description{
-Helper function to calculate transition porbabilities
-cn/phase are sclars, only p_s is vectorized
-}
-\keyword{internal}
diff --git a/man/l_bbinom.Rd b/man/l_bbinom.Rd
deleted file mode 100644
index 4325edda..00000000
--- a/man/l_bbinom.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{l_bbinom}
-\alias{l_bbinom}
-\title{calculate joint likelihood of allele data}
-\usage{
-l_bbinom(AD, DP, alpha, beta)
-}
-\arguments{
-\item{AD}{numeric vector Variant allele depth}
-
-\item{DP}{numeric vector Total allele depth}
-
-\item{alpha}{numeric Alpha parameter of Beta-Binomial distribution}
-
-\item{beta}{numeric Beta parameter of Beta-Binomial distribution}
-}
-\value{
-numeric Joint log likelihood
-}
-\description{
-calculate joint likelihood of allele data
-}
-\keyword{internal}
diff --git a/man/l_gpois.Rd b/man/l_gpois.Rd
deleted file mode 100644
index 72ce8e17..00000000
--- a/man/l_gpois.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{l_gpois}
-\alias{l_gpois}
-\title{calculate joint likelihood of a gamma-poisson model}
-\usage{
-l_gpois(Y_obs, lambda_ref, d, alpha, beta, phi = 1)
-}
-\arguments{
-\item{Y_obs}{numeric vector Gene expression counts}
-
-\item{lambda_ref}{numeric vector Reference expression levels}
-
-\item{d}{numeric Total library size}
-
-\item{alpha}{numeric Alpha parameter of Gamma-Poisson distribution}
-
-\item{beta}{numeric Beta parameter of Gamma-Poisson distribution}
-}
-\value{
-numeric Joint log likelihood
-}
-\description{
-calculate joint likelihood of a gamma-poisson model
-}
-\keyword{internal}
diff --git a/man/l_lnpois.Rd b/man/l_lnpois.Rd
deleted file mode 100644
index c8018053..00000000
--- a/man/l_lnpois.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{l_lnpois}
-\alias{l_lnpois}
-\title{calculate joint likelihood of a PLN model}
-\usage{
-l_lnpois(Y_obs, lambda_ref, d, mu, sig, phi = 1)
-}
-\arguments{
-\item{Y_obs}{numeric vector Gene expression counts}
-
-\item{lambda_ref}{numeric vector Reference expression levels}
-
-\item{d}{numeric Total library size}
-}
-\value{
-numeric Joint log likelihood
-}
-\description{
-calculate joint likelihood of a PLN model
-}
-\keyword{internal}
diff --git a/man/likelihood_allele.Rd b/man/likelihood_allele.Rd
deleted file mode 100644
index 93ff3e30..00000000
--- a/man/likelihood_allele.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{likelihood_allele}
-\alias{likelihood_allele}
-\title{Only compute total log likelihood from an allele HMM}
-\usage{
-likelihood_allele(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth),
-logPi (log transition prob matrix), delta (prior for each state),
-alpha (alpha for each state), beta (beta for each state),
-states (states), p_s (phase switch probs)}
-}
-\description{
-Only compute total log likelihood from an allele HMM
-}
-\keyword{internal}
diff --git a/man/run_allele_hmm.Rd b/man/run_allele_hmm.Rd
deleted file mode 100644
index 087dd635..00000000
--- a/man/run_allele_hmm.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{run_allele_hmm}
-\alias{run_allele_hmm}
-\title{allele-only HMM}
-\usage{
-run_allele_hmm(
- pAD,
- DP,
- p_s,
- t = 1e-05,
- theta_min = 0.08,
- gamma = 20,
- prior = NULL
-)
-}
-\arguments{
-\item{pAD}{integer vector Paternal allele counts}
-
-\item{DP}{integer vector Total alelle counts}
-
-\item{p_s}{numeric vector Phase switch probabilities}
-
-\item{t}{numeric Transition probability between copy number states}
-
-\item{theta_min}{numeric Minimum haplotype frequency deviation threshold}
-
-\item{gamma}{numeric Overdispersion in the allele-specific expression}
-}
-\value{
-character vector Decoded states
-}
-\description{
-allele-only HMM
-}
-\keyword{internal}
diff --git a/man/run_joint_hmm.Rd b/man/run_joint_hmm.Rd
deleted file mode 100644
index 44d1521d..00000000
--- a/man/run_joint_hmm.Rd
+++ /dev/null
@@ -1,73 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{run_joint_hmm}
-\alias{run_joint_hmm}
-\title{Run joint HMM on a pseudobulk profile}
-\usage{
-run_joint_hmm(
- pAD,
- DP,
- p_s,
- Y_obs = 0,
- lambda_ref = 0,
- d_total = 0,
- theta_min = 0.08,
- theta_neu = 0,
- bal_cnv = TRUE,
- phi_del = 2^(-0.25),
- phi_amp = 2^(0.25),
- phi_bamp = phi_amp,
- phi_bdel = phi_del,
- alpha = 1,
- beta = 1,
- mu = 0,
- sig = 1,
- t = 1e-05,
- gamma = 18,
- prior = NULL,
- exp_only = FALSE,
- allele_only = FALSE,
- classify_allele = FALSE,
- phasing = TRUE,
- debug = FALSE
-)
-}
-\arguments{
-\item{pAD}{integer vector Paternal allele counts}
-
-\item{DP}{integer vector Total alelle counts}
-
-\item{p_s}{numeric vector Phase switch probabilities}
-
-\item{Y_obs}{numeric vector Observed gene counts}
-
-\item{lambda_ref}{numeric vector Reference expression rates}
-
-\item{d_total}{integer Total library size for expression counts}
-
-\item{theta_min}{numeric Minimum haplotype imbalance threshold}
-
-\item{phi_del}{numeric Expected fold change for deletion}
-
-\item{phi_amp}{numeric Expected fold change for amplification}
-
-\item{phi_bamp}{numeric Expected fold change for balanced amplification}
-
-\item{phi_bdel}{numeric Expected fold change for balanced deletion}
-
-\item{mu}{numeric Global expression bias}
-
-\item{sig}{numeric Global expression variance}
-
-\item{t}{numeric Transition probability between copy number states}
-
-\item{gamma}{numeric Overdispersion in the allele-specific expression}
-
-\item{exp_only}{logical Whether to only use expression data}
-
-\item{allele_only}{logical Whether to only use allele data}
-}
-\description{
-Run joint HMM on a pseudobulk profile
-}
-\keyword{internal}
diff --git a/man/viterbi_allele.Rd b/man/viterbi_allele.Rd
deleted file mode 100644
index 7ffa6a1d..00000000
--- a/man/viterbi_allele.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{viterbi_allele}
-\alias{viterbi_allele}
-\title{Viterbi algorithm for allele HMM}
-\usage{
-viterbi_allele(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth),
-logPi (log transition prob matrix), delta (prior for each state),
-alpha (alpha for each state), beta (beta for each state),
-states (states), p_s (phase switch probs)}
-}
-\description{
-Viterbi algorithm for allele HMM
-}
-\keyword{internal}
diff --git a/man/viterbi_joint.Rd b/man/viterbi_joint.Rd
deleted file mode 100644
index 0ce57e53..00000000
--- a/man/viterbi_joint.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{viterbi_joint}
-\alias{viterbi_joint}
-\title{Generalized viterbi algorithm for joint HMM}
-\usage{
-viterbi_joint(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth), y (expression count), l (cell total library size),
-lambda (reference expression rate), mu (global expression mean), sig (global expression standard deviation),
-logPi (log transition prob matrix), phi (expression fold change for each state), delta (prior for each state),
-alpha (alpha for each state), beta (beta for each state), states (states), p_s (phase switch probs)}
-}
-\description{
-Generalized viterbi algorithm for joint HMM
-}
-\keyword{internal}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 2db09143..57445a49 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -11,93 +11,6 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif
-// cppdbbinom
-NumericVector cppdbbinom(const NumericVector& x, const NumericVector& size, const NumericVector& alpha, const NumericVector& beta, const bool& log_prob);
-RcppExport SEXP _numbat_cppdbbinom(SEXP xSEXP, SEXP sizeSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP log_probSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP);
- Rcpp::traits::input_parameter< const NumericVector& >::type size(sizeSEXP);
- Rcpp::traits::input_parameter< const NumericVector& >::type alpha(alphaSEXP);
- Rcpp::traits::input_parameter< const NumericVector& >::type beta(betaSEXP);
- Rcpp::traits::input_parameter< const bool& >::type log_prob(log_probSEXP);
- rcpp_result_gen = Rcpp::wrap(cppdbbinom(x, size, alpha, beta, log_prob));
- return rcpp_result_gen;
-END_RCPP
-}
-// cpp_dgpois
-NumericVector cpp_dgpois(const NumericVector& x, const NumericVector& alpha, const NumericVector& beta, const bool& log_prob);
-RcppExport SEXP _numbat_cpp_dgpois(SEXP xSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP log_probSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP);
- Rcpp::traits::input_parameter< const NumericVector& >::type alpha(alphaSEXP);
- Rcpp::traits::input_parameter< const NumericVector& >::type beta(betaSEXP);
- Rcpp::traits::input_parameter< const bool& >::type log_prob(log_probSEXP);
- rcpp_result_gen = Rcpp::wrap(cpp_dgpois(x, alpha, beta, log_prob));
- return rcpp_result_gen;
-END_RCPP
-}
-// logSumExp
-double logSumExp(const arma::vec& x);
-RcppExport SEXP _numbat_logSumExp(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(logSumExp(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// likelihood_compute
-double likelihood_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m);
-RcppExport SEXP _numbat_likelihood_compute(SEXP logphiSEXP, SEXP logprobSEXP, SEXP logPiSEXP, SEXP nSEXP, SEXP mSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< Rcpp::NumericVector >::type logphi(logphiSEXP);
- Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type logprob(logprobSEXP);
- Rcpp::traits::input_parameter< arma::cube >::type logPi(logPiSEXP);
- Rcpp::traits::input_parameter< int >::type n(nSEXP);
- Rcpp::traits::input_parameter< int >::type m(mSEXP);
- rcpp_result_gen = Rcpp::wrap(likelihood_compute(logphi, logprob, logPi, n, m));
- return rcpp_result_gen;
-END_RCPP
-}
-// forward_backward_compute
-Rcpp::NumericMatrix forward_backward_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m);
-RcppExport SEXP _numbat_forward_backward_compute(SEXP logphiSEXP, SEXP logprobSEXP, SEXP logPiSEXP, SEXP nSEXP, SEXP mSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< Rcpp::NumericVector >::type logphi(logphiSEXP);
- Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type logprob(logprobSEXP);
- Rcpp::traits::input_parameter< arma::cube >::type logPi(logPiSEXP);
- Rcpp::traits::input_parameter< int >::type n(nSEXP);
- Rcpp::traits::input_parameter< int >::type m(mSEXP);
- rcpp_result_gen = Rcpp::wrap(forward_backward_compute(logphi, logprob, logPi, n, m));
- return rcpp_result_gen;
-END_RCPP
-}
-// viterbi_compute
-Rcpp::NumericVector viterbi_compute(Rcpp::NumericVector log_delta, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m, Rcpp::NumericMatrix nu, Rcpp::NumericVector z);
-RcppExport SEXP _numbat_viterbi_compute(SEXP log_deltaSEXP, SEXP logprobSEXP, SEXP logPiSEXP, SEXP nSEXP, SEXP mSEXP, SEXP nuSEXP, SEXP zSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< Rcpp::NumericVector >::type log_delta(log_deltaSEXP);
- Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type logprob(logprobSEXP);
- Rcpp::traits::input_parameter< arma::cube >::type logPi(logPiSEXP);
- Rcpp::traits::input_parameter< int >::type n(nSEXP);
- Rcpp::traits::input_parameter< int >::type m(mSEXP);
- Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type nu(nuSEXP);
- Rcpp::traits::input_parameter< Rcpp::NumericVector >::type z(zSEXP);
- rcpp_result_gen = Rcpp::wrap(viterbi_compute(log_delta, logprob, logPi, n, m, nu, z));
- return rcpp_result_gen;
-END_RCPP
-}
// roman2int_internal
int roman2int_internal(Rcpp::StringVector letters, int nchar);
RcppExport SEXP _numbat_roman2int_internal(SEXP lettersSEXP, SEXP ncharSEXP) {
@@ -110,60 +23,9 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
-// fit_lnpois_cpp
-arma::rowvec fit_lnpois_cpp(std::vector Y_obs, std::vector lambda_ref, int d);
-RcppExport SEXP _numbat_fit_lnpois_cpp(SEXP Y_obsSEXP, SEXP lambda_refSEXP, SEXP dSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< std::vector >::type Y_obs(Y_obsSEXP);
- Rcpp::traits::input_parameter< std::vector >::type lambda_ref(lambda_refSEXP);
- Rcpp::traits::input_parameter< int >::type d(dSEXP);
- rcpp_result_gen = Rcpp::wrap(fit_lnpois_cpp(Y_obs, lambda_ref, d));
- return rcpp_result_gen;
-END_RCPP
-}
-// poilog1
-std::vector poilog1(std::vector x, std::vector my, std::vector sig);
-RcppExport SEXP _numbat_poilog1(SEXP xSEXP, SEXP mySEXP, SEXP sigSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< std::vector >::type x(xSEXP);
- Rcpp::traits::input_parameter< std::vector >::type my(mySEXP);
- Rcpp::traits::input_parameter< std::vector >::type sig(sigSEXP);
- rcpp_result_gen = Rcpp::wrap(poilog1(x, my, sig));
- return rcpp_result_gen;
-END_RCPP
-}
-// l_lnpois_cpp
-double l_lnpois_cpp(std::vector Y_obs, std::vector lambda_ref, int d, double mu, double sig, double phi);
-RcppExport SEXP _numbat_l_lnpois_cpp(SEXP Y_obsSEXP, SEXP lambda_refSEXP, SEXP dSEXP, SEXP muSEXP, SEXP sigSEXP, SEXP phiSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< std::vector >::type Y_obs(Y_obsSEXP);
- Rcpp::traits::input_parameter< std::vector >::type lambda_ref(lambda_refSEXP);
- Rcpp::traits::input_parameter< int >::type d(dSEXP);
- Rcpp::traits::input_parameter< double >::type mu(muSEXP);
- Rcpp::traits::input_parameter< double >::type sig(sigSEXP);
- Rcpp::traits::input_parameter< double >::type phi(phiSEXP);
- rcpp_result_gen = Rcpp::wrap(l_lnpois_cpp(Y_obs, lambda_ref, d, mu, sig, phi));
- return rcpp_result_gen;
-END_RCPP
-}
static const R_CallMethodDef CallEntries[] = {
- {"_numbat_cppdbbinom", (DL_FUNC) &_numbat_cppdbbinom, 5},
- {"_numbat_cpp_dgpois", (DL_FUNC) &_numbat_cpp_dgpois, 4},
- {"_numbat_logSumExp", (DL_FUNC) &_numbat_logSumExp, 1},
- {"_numbat_likelihood_compute", (DL_FUNC) &_numbat_likelihood_compute, 5},
- {"_numbat_forward_backward_compute", (DL_FUNC) &_numbat_forward_backward_compute, 5},
- {"_numbat_viterbi_compute", (DL_FUNC) &_numbat_viterbi_compute, 7},
{"_numbat_roman2int_internal", (DL_FUNC) &_numbat_roman2int_internal, 2},
- {"_numbat_fit_lnpois_cpp", (DL_FUNC) &_numbat_fit_lnpois_cpp, 3},
- {"_numbat_poilog1", (DL_FUNC) &_numbat_poilog1, 3},
- {"_numbat_l_lnpois_cpp", (DL_FUNC) &_numbat_l_lnpois_cpp, 6},
{NULL, NULL, 0}
};
diff --git a/src/bbinom.cpp b/src/bbinom.cpp
deleted file mode 100644
index b4007773..00000000
--- a/src/bbinom.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-
-#include
-
-// https://github.com/twolodzko/extraDistr/blob/master/src/beta-binomial-distribution.cpp
-
-using std::pow;
-using std::sqrt;
-using std::abs;
-using std::exp;
-using std::log;
-using std::floor;
-using std::ceil;
-using Rcpp::NumericVector;
-
-
-// MACROS
-
-#define GETV(x, i) x[i % x.length()] // wrapped indexing of vector
-#define GETM(x, i, j) x(i % x.nrow(), j) // wrapped indexing of matrix
-#define VALID_PROB(p) ((p >= 0.0) && (p <= 1.0))
-
-
-bool isInteger(double x, bool warn = true);
-
-
-bool isInteger(double x, bool warn) {
- if (ISNAN(x))
- return false;
- if (((x < 0.0) ? std::ceil(x) : std::floor(x)) != x) {
- if (warn) {
- char msg[55];
- std::snprintf(msg, sizeof(msg), "non-integer: %f", x);
- Rcpp::warning(msg);
- }
- return false;
- }
- return true;
-}
-
-
-inline double logpmf_bbinom(double k, double n, double alpha,
- double beta, bool& throw_warning) {
-#ifdef IEEE_754
- if (ISNAN(k) || ISNAN(n) || ISNAN(alpha) || ISNAN(beta))
- return k+n+alpha+beta;
-#endif
- if (alpha < 0.0 || beta < 0.0 || n < 0.0 || !isInteger(n, false)) {
- throw_warning = true;
- return NAN;
- }
- if (!isInteger(k) || k < 0.0 || k > n)
- return R_NegInf;
- // R::choose(n, k) * R::beta(k+alpha, n-k+beta) / R::beta(alpha, beta);
- return R::lchoose(n, k) + R::lbeta(k+alpha, n-k+beta) - R::lbeta(alpha, beta);
-}
-
-
-// https://github.com/twolodzko/extraDistr/blob/master/src/beta-binomial-distribution.cpp
-
-// [[Rcpp::export]]
-NumericVector cppdbbinom(
- const NumericVector& x,
- const NumericVector& size,
- const NumericVector& alpha,
- const NumericVector& beta,
- const bool& log_prob = false
- ) {
-
- if (std::min({x.length(), size.length(),
- alpha.length(), beta.length()}) < 1) {
- return NumericVector(0);
- }
-
- int Nmax = std::max({
- x.length(),
- size.length(),
- alpha.length(),
- beta.length()
- });
- NumericVector p(Nmax);
-
- bool throw_warning = false;
-
- for (int i = 0; i < Nmax; i++){
- p[i] = logpmf_bbinom(GETV(x, i), GETV(size, i),
- GETV(alpha, i), GETV(beta, i),
- throw_warning);
- }
-
- if (!log_prob){
- p = Rcpp::exp(p);
- }
-
- if (throw_warning){
- Rcpp::warning("NaNs produced");
- }
-
- return p;
-}
-
-
-// https://github.com/cran/extraDistr/blob/19708536de6549404dec52588a36f4b598d0016a/src/gamma-poisson-distribution.cpp
-
-inline double lfactorial(double x) {
- return R::lgammafn(x + 1.0);
-}
-
-inline double logpmf_gpois(double x, double alpha, double beta,
- bool& throw_warning) {
-#ifdef IEEE_754
- if (ISNAN(x) || ISNAN(alpha) || ISNAN(beta))
- return x+alpha+beta;
-#endif
- if (alpha <= 0.0 || beta <= 0.0) {
- throw_warning = true;
- return NAN;
- }
- if (!isInteger(x) || x < 0.0 || !R_FINITE(x))
- return R_NegInf;
- // p = beta/(1.0+beta);
- double p = exp( log(beta) - log1p(beta) );
- return R::lgammafn(alpha+x) - lfactorial(x) - R::lgammafn(alpha) +
- log(p)*x + log(1.0-p)*alpha;
-}
-
-// [[Rcpp::export]]
-NumericVector cpp_dgpois(
- const NumericVector& x,
- const NumericVector& alpha,
- const NumericVector& beta,
- const bool& log_prob = false
- ) {
-
- if (std::min({x.length(), alpha.length(), beta.length()}) < 1) {
- return NumericVector(0);
- }
-
- int Nmax = std::max({
- x.length(),
- alpha.length(),
- beta.length()
- });
- NumericVector p(Nmax);
-
- bool throw_warning = false;
-
- for (int i = 0; i < Nmax; i++)
- p[i] = logpmf_gpois(GETV(x, i), GETV(alpha, i),
- GETV(beta, i), throw_warning);
-
- if (!log_prob)
- p = Rcpp::exp(p);
-
- if (throw_warning)
- Rcpp::warning("NaNs produced");
-
- return p;
-}
-
-
-
diff --git a/src/hmm.cpp b/src/hmm.cpp
deleted file mode 100644
index 7ed5c2cc..00000000
--- a/src/hmm.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-
-#include
-
-using namespace Rcpp;
-
-
-#ifdef HAVE_LONG_DOUBLE
-# define LDOUBLE long double
-# define EXPL expl
-#else
-# define LDOUBLE double
-# define EXPL exp
-#endif
-
-// [[Rcpp::export]]
-double logSumExp(const arma::vec& x) {
- // https://github.com/helske/seqHMM/blob/master/src/logSumExp.cpp
- unsigned int maxi = x.index_max();
- LDOUBLE maxv = x(maxi);
- if (!(maxv > -arma::datum::inf)) {
- return -arma::datum::inf;
- }
- LDOUBLE cumsum = 0.0;
- for (unsigned int i = 0; i < x.n_elem; i++) {
- if ((i != maxi) & (x(i) > -arma::datum::inf)) {
- cumsum += EXPL(x(i) - maxv);
- }
- }
-
- return maxv + log1p(cumsum);
-}
-
-
-// expensive for loops in likelihood_allele() and forward_back_allele)()
-
-// [[Rcpp::export]]
-double likelihood_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m) {
-
- double LL = 0.0;
-
- for (int i = 0; i < n; i++) {
-
- if (i > 0) {
- Rcpp::NumericVector logphi_new(m);
- for (int j = 0; j < m; j++) {
- Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(i));
- Rcpp::NumericVector logphi_logPi = logphi + subset_logPi(_, j);
- logphi_new[j] = logSumExp(logphi_logPi);
- }
- logphi = logphi_new;
- }
-
- logphi = logphi + logprob(i, _); // Note: logprob(i, _) is Rcpp::NumericVector
-
- double logSumPhi = logSumExp(logphi);
-
- logphi = logphi - logSumPhi;
-
- LL = LL + logSumPhi;
- }
-
- return LL;
-}
-
-
-// [[Rcpp::export]]
-Rcpp::NumericMatrix forward_backward_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m) {
-
- const int nrow = n;
- const int ncol = m;
- Rcpp::NumericMatrix logalpha(nrow, ncol); // logalpha <- matrix(as.double(rep(0, m * n)), nrow = n)
-
- double lscale = 0.0;
- double LL = 0.0;
-
- for (int t = 0; t < n; t++) {
-
- if (t > 0) {
- Rcpp::NumericVector logphi_new(m);
- for (int j = 0; j < m; j++) {
- Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(t));
- Rcpp::NumericVector logphi_logPi = logphi + subset_logPi(_, j);
- logphi_new[j] = logSumExp(logphi_logPi);
- }
- logphi = logphi_new;
- }
-
- logphi = logphi + logprob(t, _); // Note: logprob(i, _) is Rcpp::NumericVector
-
- double logSumPhi = logSumExp(logphi);
-
- logphi = logphi - logSumPhi;
-
- lscale = lscale + logSumPhi;
-
- logalpha(t, _) = logphi + lscale;
-
- }
-
- LL = lscale;
-
- Rcpp::NumericMatrix logbeta(nrow, ncol); // logalpha <- matrix(as.double(rep(0, m * n)), nrow = n)
- Rcpp::NumericVector logphi_(m); // logphi <- log(as.double(rep(1/m, m)))
- for (int i = 0; i < m; i++){
- double mval = static_cast(m);
- logphi_[i] = std::log(1/mval);
- }
-
- double lscale_ = std::log(m);
-
- for (int t = n-2; t>=0; t--) {
-
- // logphi = sapply(1:m, function(i) matrixStats::logSumExp(logphi + logprob[t+1,] + logPi[[t+1]][i,]))
-
- Rcpp::NumericVector logphinew(m);
- for (int j = 0; j < m; j++) {
- Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(t+1));
- Rcpp::NumericVector logphi_logprob_logPi = logphi_ + logprob(t+1, _) + subset_logPi(j, _);
- logphinew[j] = logSumExp(logphi_logprob_logPi);
- }
- logphi_ = logphinew;
-
- logbeta(t, _) = logphi_ + lscale_;
-
- double logSumPhi = logSumExp(logphi_);
-
- logphi_ = logphi_ - logSumPhi;
-
- lscale_ = lscale_ + logSumPhi;
-
- }
-
- // p_up = exp(logalpha + logbeta - LL)[,1]
-
- // matrix addition
- Rcpp::NumericMatrix expoutput(nrow, ncol);
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < ncol; j++) {
- double sum = logalpha(i, j) + logbeta(i, j) - LL;
- expoutput(i, j) = std::exp(sum); // Rcpp::exp() works with vectors
- }
- }
-
- return expoutput;
-}
-
-// [[Rcpp::export]]
-Rcpp::NumericVector viterbi_compute(Rcpp::NumericVector log_delta, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m, Rcpp::NumericMatrix nu, Rcpp::NumericVector z) {
-
- const int nrow = n;
- const int ncol = m;
-
- nu(0, _) = log_delta + logprob(0, _); // nu[1, ] <- log(hmm$delta) + logprob[1,]
-
- Rcpp::NumericMatrix matrixnu(ncol, ncol);
- for (int i = 1; i < nrow; i++) {
- Rcpp::NumericVector nu_vec = nu(i - 1, _);
- for (int j = 0; j < ncol; j++) {
- matrixnu(j, _) = rep(nu_vec[j], ncol);
- }
- // nu[i, ] = apply(matrixnu + hmm$logPi[,,i], 2, max)
- // Step 1)
- // Add the two matrices matrixnu + hmm$logPi[,,i]
- Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(i));
- Rcpp::NumericMatrix sum_matrixnu_logPi(ncol, ncol);
- for (int ii = 0; ii < ncol; ii++) {
- for (int jj = 0; jj < ncol; jj++) {
- sum_matrixnu_logPi(ii, jj) = matrixnu(ii, jj) + subset_logPi(ii, jj);
- }
- }
- // Step 2)
- // Calculate the 'max' for each column, return a NumericVector with 'max' for each column
- Rcpp::NumericVector newnu;
- for (int k = 0; k < ncol; k++) {
- newnu.push_back(max(sum_matrixnu_logPi(_, k)));
- }
- nu(i, _) = newnu; // nu[i, ] = apply(matrixnu + hmm$logPi[,,i], 2, max)
-
- nu(i, _) = nu(i, _) + logprob(i, _); // nu[i, ] = nu[i, ] + logprob[i,]
-
- }
-
- z[n - 1] = which_max(nu(n-1, _)) + 1; // which_max() uses 0-indexing, so add 1
-
- // for (i in seq(N - 1, 1, -1)) z[i] <- which.max(hmm$logPi[,,i+1][, z[i+1]] + nu[i, ])
- for (int t = n-2; t>=0; t--) {
- Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(t+1));
- // note: 'z[t+1] - 1' as we convert R 1-indexing to C++ 0-indexing
- Rcpp::NumericVector logPi_nu = subset_logPi(_, z[t+1] - 1) + nu(t, _);
- z[t] = which_max(logPi_nu) + 1; // which_max() uses 0-indexing, so add 1
- }
-
- return z;
-}
diff --git a/src/mle.cpp b/src/mle.cpp
deleted file mode 100644
index 9c6d0e47..00000000
--- a/src/mle.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include // std::pow
-#include
-// [[Rcpp::depends(RcppArmadillo)]]
-#include
-// [[Rcpp::depends(roptim)]]
-using namespace roptim;
-
-double l_lnpois_cpp(std::vector Y_obs, std::vector lambda_ref, int d, double mu, double sig, double phi = 1.0);
-
-class fit_lnpois : public Functor {
-
- public:
-
- const std::vector Y_obs;
-
- const std::vector lambda_ref;
-
- const int d;
-
- // initialize with source and destination
- fit_lnpois(std::vector Y_obs, const std::vector lambda_ref, const int d):
- Y_obs(Y_obs), lambda_ref(lambda_ref), d(d) {}
-
- double operator()(const arma::vec &x) override {
- return -l_lnpois_cpp(Y_obs, lambda_ref, d, x[0], x[1]);
- };
-};
-
-// [[Rcpp::export]]
-arma::rowvec fit_lnpois_cpp(std::vector Y_obs, std::vector lambda_ref, int d) {
-
- fit_lnpois model(Y_obs, lambda_ref, d);
-
- Roptim opt("L-BFGS-B");
- opt.control.trace = 0;
- opt.set_hessian(false);
- arma::vec lower = {-arma::datum::inf, 0.01};
- opt.set_lower(lower);
-
- arma::vec x = {0, 1};
- opt.minimize(model, x);
-
- return opt.par().t();
-}
-
diff --git a/src/poilog.cpp b/src/poilog.cpp
deleted file mode 100644
index 4974f6c6..00000000
--- a/src/poilog.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-
-#include
-#include
-
-using namespace Rcpp;
-
-// refer to:
-// https://github.com/evanbiederstedt/poilogcpp
-
-// Function declarations
-double poilog(int x, double my, double sig);
-double bipoilog(int x, int y, double my1, double my2, double sig1, double sig2, double ro);
-double maxf(int x, double my, double sig);
-double upper(int x, double m, double my, double sig);
-double lower(int x, double m, double my, double sig);
-double my_f(double z, int x, double my, double sig, double fac);
-void my_f_vec(double *z, int n, void *p);
-std::vector poilog1(std::vector x, std::vector my, std::vector sig);
-
-struct My_fparams {
- int x;
- double sig;
- double my;
- double fac;
-};
-
-// [[Rcpp::export]]
-std::vector poilog1(std::vector x, std::vector my, std::vector sig){
- int nrN = x.size();
- std::vector vect(nrN);
- for (int i = 0; i < nrN; i++) {
- vect[i] = poilog(x[i], my[i], sig[i]);
- }
- return vect;
-}
-
-double maxf(int x, double my, double sig){
- double d,z;
- z=0;
- d=100;
- while (d>0.00001) {
- if (x-1-exp(z)-1/sig*(z-my)>0) {
- z=z+d;
- } else {
- z=z-d;
- }
- d=d/2;
- }
- return(z);
-}
-
-
-double upper(int x, double m, double my, double sig){
- double d,z,mf;
- mf = (x-1)*m-exp(m)-0.5/sig*((m-my)*(m-my));
- z = m+20;
- d = 10;
- while (d>0.000001) {
- if ((x-1)*z-exp(z)-0.5/sig*((z-my)*(z-my))-mf+log(1000000)>0) {
- z=z+d;
- } else {
- z=z-d;
- }
- d=d/2;
- }
- return(z);
-}
-
-
-double lower(int x, double m, double my, double sig) {
- double d,z,mf;
- mf = (x-1)*m-exp(m)-0.5/sig*((m-my)*(m-my));
- z = m-20;
- d = 10;
- while (d>0.000001) {
- if ((x-1)*z-exp(z)-0.5/sig*((z-my)*(z-my))-mf+log(1000000)>0) {
- z=z-d;
- } else {
- z=z+d;
- }
- d=d/2;
- }
- return(z);
-}
-
-
-double my_f(double z, int x, double my, double sig, double fac) {
- return exp(z*x-exp(z)-0.5/sig*((z-my)*(z-my))-fac);
-}
-
-void my_f_vec(double *z, int n, void *p){
- struct My_fparams *params = (struct My_fparams *)p;
- int x = (params->x);
- double sig = (params->sig);
- double my = (params->my);
- double fac = (params->fac);
- for (int i=0;i Y_obs, std::vector lambda_ref, int d, double mu, double sig, double phi = 1.0) {
-
- int n = Y_obs.size();
-
- double l = 0;
- double p = 0;
-
- for (int i = 0; i < n; i++) {
- p = poilog(Y_obs[i], mu + log(phi * d * lambda_ref[i]), std::pow(sig, 2));
- if (p == 0) {p = 1e-15;}
- l += log(p);
- }
-
- return l;
-}
\ No newline at end of file
diff --git a/tests/testthat/test_functions.R b/tests/testthat/test_functions.R
index a6486371..bc050a8e 100644
--- a/tests/testthat/test_functions.R
+++ b/tests/testthat/test_functions.R
@@ -1,73 +1,6 @@
library(numbat)
-test_that("dpoilog works", {
- expect_equal(numbat:::dpoilog(c(1,11),c(1,1),c(1,1)), c(0.175733342664327, 0.0150105250670325))
-})
-
-# test_that("HMM works", {
-
-# message('getting pseudobulk ..')
-
-# bulk = get_bulk(
-# count_mat = count_mat_ATC2,
-# df_allele = df_allele_ATC2,
-# lambdas_ref = ref_hca,
-# gtf = gtf_hg38
-# )
-
-# expect_true(length(bulk) > 0)
-
-# message('running HMM..')
-
-# bulk = analyze_bulk(bulk, t = 1e-5)
-
-# expect_true(length(bulk) > 0)
-
-# })
-
-test_that("logSumExp() works", {
-
- b = numbat:::logSumExp(c(1.2, 3.4))
- expect_equal(b, 3.5050833)
- d = numbat:::logSumExp(c(1.2, 3.4, -5.6, -7.8))
- expect_equal(d, 3.5052067)
-
-})
-
-test_that("Check that likelihood_allele() works as expected", {
-
- LL = numbat:::likelihood_allele(pre_likelihood_hmm)
- expect_equal(as.integer(LL), -736)
-
-})
-
-test_that("Check that forward_backward() works as expected", {
-
- p_major = numbat:::forward_back_allele(pre_likelihood_hmm)[,1]
- expect_equal(is.vector(p_major), TRUE)
- expect_equal(length(p_major), 1042)
- expect_equal(round(p_major[1], 3), 0.963)
- expect_equal(round(p_major[2], 3), 0)
- expect_equal(round(p_major[3], 3), 0)
- expect_equal(round(p_major[10], 3), 0.745)
-
-})
-
-test_that("Check that viterbi_allele() works as expected", {
-
- states = numbat:::viterbi_allele(pre_likelihood_hmm)
- expect_equal(sum(states == 1), 440)
- expect_equal(sum(states == 2), 602)
- expect_equal(states[1], 1)
- expect_equal(states[2], 2)
- expect_equal(states[3], 2)
- expect_equal(states[1024], 1)
-
-})
-
-
-
test_that("Check that roman2int works as expected", {
val.3899 <- 'MMMDCCCXCIX'
val.3900 <- 'MMMCM'
diff --git a/vignettes/numbat.Rmd b/vignettes/numbat.Rmd
index a768435e..d6eec7a6 100644
--- a/vignettes/numbat.Rmd
+++ b/vignettes/numbat.Rmd
@@ -53,7 +53,7 @@ devtools::install_github("https://github.com/kharchenkolab/numbat")
## Docker
We provide a ready-to-run Docker container that includes the Numbat R package and all of its dependencies. You can launch it as follows:
```
-docker run -v /work:/mnt/mydata -it pkharchenkolab/numbat-rbase:main /bin/bash
+docker run -v /work:/mnt/mydata -it pkharchenkolab/numbat-rbase:latest /bin/bash
```
where `/work` is a local data folder you would like to access and write to. Within the container, `cellsnp-lite`, `eagle2` and `samtools` are available in path and the 1000 Genome SNP VCF and phasing panel files (hg38) are stored under `/data`. You can also launch R and run interactive analysis using Numbat (or run an R script).