diff --git a/.circleci/config.yml b/.circleci/config.yml
index fbc8d7b6..6da56726 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -2,7 +2,7 @@ version: 2
 jobs:
   build:
     docker:
-      - image: rocker/verse:4.2
+      - image: rocker/verse:4.3
     environment:
             _R_CHECK_FORCE_SUGGESTS_: false
     steps:
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 8d242c1b..8c7b2c23 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -4,6 +4,7 @@ name: Publish Docker image
 on:
   release:
     types: [published]
+  workflow_dispatch:
   # push:
   #   branches: [main, master]
 
@@ -16,22 +17,22 @@ jobs:
         uses: actions/checkout@v3
       
       - name: Log in to Docker Hub
-        uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
+        uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
       
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
+        uses: docker/metadata-action@v5
         with:
           images: pkharchenkolab/numbat-rbase
       
       - name: Build and push Docker image
-        uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
+        uses: docker/build-push-action@v5
         with:
           context: .
           file: docker/Dockerfile
           push: true
           tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/DESCRIPTION b/DESCRIPTION
index 317e621c..93103446 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: numbat
 Title: Haplotype-Aware CNV Analysis from scRNA-Seq
 URL: https://github.com/kharchenkolab/numbat/, https://kharchenkolab.github.io/numbat/
-Version: 1.3.4
+Version: 1.4.0
 Authors@R: c(person("Teng","Gao", email="tgaoteng@gmail.com", role=c("cre", "aut")), person("Ruslan", "Soldatov", email="soldatr@mskcc.org", role="aut"), person("Hirak", "Sarkar", email="hirak_sarkar@hms.harvard.edu", role="aut"), person("Evan", "Biederstedt", email="evan.biederstedt@gmail.com", role="aut"), person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut"))
 Description: A computational method that infers copy number variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor phylogeny. 'numbat' integrates signals from gene expression, allelic ratio, and population haplotype structures to accurately infer allele-specific CNVs in single cells and reconstruct their lineage relationship. 'numbat' can be used to: 1. detect allele-specific copy number variations from single-cells; 2. differentiate tumor versus normal cells in the tumor microenvironment; 3. infer the clonal architecture and evolutionary history of profiled tumors. 'numbat' does not require tumor/normal-paired DNA or genotype data, but operates solely on the donor scRNA-data data (for example, 10x Cell Ranger output). Additional examples and documentations are available at <https://kharchenkolab.github.io/numbat/>. For details on the method please see Gao et al. Nature Biotechnology (2022) <doi:10.1038/s41587-022-01468-y>.
 License: MIT + file LICENSE
@@ -21,6 +21,7 @@ Imports:
     ggraph,
     ggtree,
     glue,
+    hahmmr,
     igraph,
     IRanges,
     logger,
diff --git a/NAMESPACE b/NAMESPACE
index e968827c..d075cc39 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -32,6 +32,16 @@ importFrom(data.table,fread)
 importFrom(data.table,fwrite)
 importFrom(ggtree,"%<+%")
 importFrom(grDevices,colorRampPalette)
+importFrom(hahmmr,dbbinom)
+importFrom(hahmmr,dpoilog)
+importFrom(hahmmr,fit_lnpois_cpp)
+importFrom(hahmmr,forward_back_allele)
+importFrom(hahmmr,l_bbinom)
+importFrom(hahmmr,l_lnpois)
+importFrom(hahmmr,likelihood_allele)
+importFrom(hahmmr,logSumExp)
+importFrom(hahmmr,run_allele_hmm_s5)
+importFrom(hahmmr,run_joint_hmm_s15)
 importFrom(igraph,"E<-")
 importFrom(igraph,"V<-")
 importFrom(igraph,E)
@@ -74,4 +84,4 @@ importFrom(stats,setNames)
 importFrom(stats,start)
 importFrom(stats,t.test)
 importFrom(utils,combn)
-useDynLib(numbat)
+useDynLib(numbat, .registration=TRUE)
diff --git a/NEWS.md b/NEWS.md
index 16a7411a..541f956a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,13 @@
+# numbat 1.4.0 - 02/23/2023
+
+* Integration with hahmmr 
+
+* Better input checking for pileup_and_phase
+
+* Fix compatibility with igraph v2.0+ and tidygraph v1.3+ (#150)
+
+* Fix multiallelic CNV state probability reporting (#146)
+
 # numbat 1.3.3 - 08/15/2023
 
 * Fix plotting issue #135
diff --git a/R/RcppExports.R b/R/RcppExports.R
index de4ce30d..354af170 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,43 +1,7 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-cppdbbinom <- function(x, size, alpha, beta, log_prob = FALSE) {
-    .Call('_numbat_cppdbbinom', PACKAGE = 'numbat', x, size, alpha, beta, log_prob)
-}
-
-cpp_dgpois <- function(x, alpha, beta, log_prob = FALSE) {
-    .Call('_numbat_cpp_dgpois', PACKAGE = 'numbat', x, alpha, beta, log_prob)
-}
-
-logSumExp <- function(x) {
-    .Call('_numbat_logSumExp', PACKAGE = 'numbat', x)
-}
-
-likelihood_compute <- function(logphi, logprob, logPi, n, m) {
-    .Call('_numbat_likelihood_compute', PACKAGE = 'numbat', logphi, logprob, logPi, n, m)
-}
-
-forward_backward_compute <- function(logphi, logprob, logPi, n, m) {
-    .Call('_numbat_forward_backward_compute', PACKAGE = 'numbat', logphi, logprob, logPi, n, m)
-}
-
-viterbi_compute <- function(log_delta, logprob, logPi, n, m, nu, z) {
-    .Call('_numbat_viterbi_compute', PACKAGE = 'numbat', log_delta, logprob, logPi, n, m, nu, z)
-}
-
 roman2int_internal <- function(letters, nchar) {
-    .Call('_numbat_roman2int_internal', PACKAGE = 'numbat', letters, nchar)
-}
-
-fit_lnpois_cpp <- function(Y_obs, lambda_ref, d) {
-    .Call('_numbat_fit_lnpois_cpp', PACKAGE = 'numbat', Y_obs, lambda_ref, d)
-}
-
-poilog1 <- function(x, my, sig) {
-    .Call('_numbat_poilog1', PACKAGE = 'numbat', x, my, sig)
-}
-
-l_lnpois_cpp <- function(Y_obs, lambda_ref, d, mu, sig, phi = 1.0) {
-    .Call('_numbat_l_lnpois_cpp', PACKAGE = 'numbat', Y_obs, lambda_ref, d, mu, sig, phi)
+    .Call(`_numbat_roman2int_internal`, letters, nchar)
 }
 
diff --git a/R/diagnostics.R b/R/diagnostics.R
index 0e7cd58a..ea127ba2 100644
--- a/R/diagnostics.R
+++ b/R/diagnostics.R
@@ -149,6 +149,12 @@ check_exp_ref = function(lambdas_ref) {
     if (!is.matrix(lambdas_ref)) {
         lambdas_ref = as.matrix(lambdas_ref) %>% magrittr::set_colnames('ref')
     }
+
+    if (any(is.na(lambdas_ref))) {
+        msg = "The reference expression matrix 'lambdas_ref' should not contain any NA values."
+        log_error(msg)
+        stop(msg)
+    }
     
     # check if all entries in the reference profile are integers
     if (all(lambdas_ref == as.integer(lambdas_ref))) {
diff --git a/R/genotyping.R b/R/genotyping.R
index 1f773b89..9827ab2b 100644
--- a/R/genotyping.R
+++ b/R/genotyping.R
@@ -211,9 +211,6 @@ preprocess_allele = function(
     
     # phased VCF
     vcf_phased = vcf_phased %>% 
-        mutate(INFO = str_remove_all(INFO, '[:alpha:]|=')) %>%
-        tidyr::separate(col = 'INFO', into = c('AD', 'DP', 'OTH'), sep = ';') %>%
-        mutate_at(c('AD', 'DP', 'OTH'), as.integer) %>%
         mutate(snp_id = paste(CHROM, POS, REF, ALT, sep = '_')) %>%
         mutate(GT = get(sample))
 
diff --git a/R/hmm.R b/R/hmm.R
index 596c1f16..6cdd5d22 100644
--- a/R/hmm.R
+++ b/R/hmm.R
@@ -1,62 +1,8 @@
-############ Probability distributions ############
-
-#'@useDynLib numbat
 #'@import Rcpp
 NULL
 
-## refer to https://github.com/evanbiederstedt/poilogcpp
-
-#' Returns the density for the Poisson lognormal distribution with parameters mu and sig
-#' 
-#' @param x vector of integers, the observations
-#' @param mu mean of lognormal distribution
-#' @param sig standard deviation of lognormal distribution
-#' @param log boolean Return the log density if TRUE (default=FALSE)
-#' @return NULL
-#' @keywords internal
-dpoilog <- function(x, mu, sig, log=FALSE){
-  if (!(length(x) == length(mu) & length(x) == length(sig))) stop('dpoilog: All parameters must be same length') 
-  if (any((x[x!=0]/trunc(x[x!=0]))!=1)) stop('dpoilog: all x must be integers')
-  if (any(x<0)) stop('dpoilog: one or several values of x are negative')
-  if (!all(is.finite(c(mu,sig)))) {
-    stop('dpoilog: all parameters should be finite')
-  }
-  if (any(is.na(c(x,mu,sig)))) stop('dpoilog: Parameters cannot be NA')
-  if (any(sig<=0)) {
-      stop(c('dpoilog: sig is not larger than 0', unique(sig)))
-  }
-
-  p = poilog1(x=as.integer(x), my=as.double(mu), sig=as.double(sig^2))
-
-  p[p == 0] = 1e-15
-
-  if (log) {
-    return(log(p))
-  } else {
-    return(p)
-  }
-}
-
-#' Beta-binomial distribution density function
-#' A distribution is beta-binomial if p, the probability of success, 
-#' in a binomial distribution has a beta distribution with shape 
-#' parameters alpha > 0 and beta > 0
-#' For more details, see extraDistr::dbbinom
-#'
-#' @param x vector of quantiles
-#' @param size number of trials (zero or more)
-#' @param alpha numeric (default=1)
-#' @param beta numeric (default=1)
-#' @param log boolean (default=FALSE)
-#' @return density values returned as numeric vector
-#' @keywords internal
-dbbinom <- function(x, size, alpha = 1, beta = 1, log = FALSE) {
-    cppdbbinom(x, size, alpha, beta, log[1L])
-}
-
 ############ Allele HMMs ############
 
-
 #' Get an allele HMM
 #' @param pAD integer vector Paternal allele counts
 #' @param DP integer vector Total alelle counts
@@ -100,162 +46,6 @@ get_allele_hmm = function(pAD, DP, p_s, theta, gamma = 20) {
     return(hmm)
 }
 
-#' Viterbi algorithm for allele HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth),
-#' logPi (log transition prob matrix), delta (prior for each state), 
-#' alpha (alpha for each state), beta (beta for each state), 
-#' states (states), p_s (phase switch probs)
-#' @keywords internal
-viterbi_allele <- function(hmm) {
-
-    N <- hmm$N
-    M <- hmm$M
-    nu <- matrix(NA, nrow = N, ncol = M)
-    z <- rep(NA, N)
-
-    logprob = sapply(1:M, function(m) {
-
-        l_x = dbbinom(x = hmm$x, size = hmm$d, alpha = hmm$alpha[,m], beta = hmm$beta[,m], log = TRUE)
-
-        l_x[is.na(l_x)] = 0
-
-        return(l_x)
-
-    })
-
-    z = viterbi_compute(log(hmm$delta), logprob, hmm$logPi, N, M, nu, z)
-        
-    return(z)
-}
-
-#' Forward-backward algorithm for allele HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth),
-#' logPi (log transition prob matrix), delta (prior for each state), 
-#' alpha (alpha for each state), beta (beta for each state), 
-#' states (states), p_s (phase switch probs)
-#' @keywords internal
-forward_back_allele = function(hmm) {
-
-    # case of one-data point
-    if (hmm$N == 1) {
-        return(NA)
-    }
-
-    N <- hmm$N
-    M <- hmm$M
-        
-    logprob = sapply(1:M, function(m) {
-
-        l_x = dbbinom(x = hmm$x, size = hmm$d, alpha = hmm$alpha[,m], beta = hmm$beta[,m], log = TRUE)
-
-        l_x[is.na(l_x)] = 0
-
-        return(l_x)
-
-    })
-        
-    logphi <- log(hmm$delta)
-        
-    marginals = forward_backward_compute(logphi, logprob, hmm$logPi, N, M)
-
-    colnames(marginals) = hmm$states
-
-    return(marginals)
-}
-
-#' Only compute total log likelihood from an allele HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth),
-#' logPi (log transition prob matrix), delta (prior for each state), 
-#' alpha (alpha for each state), beta (beta for each state), 
-#' states (states), p_s (phase switch probs)
-#' @keywords internal
-likelihood_allele = function(hmm) {
-        
-    N <- hmm$N
-    M <- hmm$M
-        
-    logprob = sapply(1:M, function(m) {
-
-        l_x = dbbinom(x = hmm$x, size = hmm$d, alpha = hmm$alpha[,m], beta = hmm$beta[,m], log = TRUE)
-
-        l_x[is.na(l_x)] = 0
-
-        return(l_x)
-
-    })
-        
-    logphi <- log(hmm$delta)
-        
-    LL <- likelihood_compute(logphi, logprob, hmm$logPi, N, M)
-
-    return(LL)
-}
-
-#' allele-only HMM
-#' @param pAD integer vector Paternal allele counts
-#' @param DP integer vector Total alelle counts
-#' @param p_s numeric vector Phase switch probabilities
-#' @param t numeric Transition probability between copy number states
-#' @param theta_min numeric Minimum haplotype frequency deviation threshold
-#' @param gamma numeric Overdispersion in the allele-specific expression
-#' @return character vector Decoded states
-#' @keywords internal
-run_allele_hmm = function(pAD, DP, p_s, t = 1e-5, theta_min = 0.08, gamma = 20, prior = NULL) {
-
-    gamma = unique(gamma)
-
-    if (length(gamma) > 1) {
-        stop('More than one gamma parameter')
-    }
-
-    # states
-    states = c("neu", "theta_1_up", "theta_1_down", "theta_2_up", "theta_2_down")
-
-    N = length(pAD)
-    M = 5
-
-    # transition matrices
-    calc_trans_mat = function(p_s, t) {
-        matrix(
-            c(1-t, t/4, t/4, t/4, t/4, 
-             t/2, (1-t)*(1-p_s), (1-t)*p_s, t/4, t/4,
-             t/2, (1-t)*p_s, (1-t)*(1-p_s), t/4, t/4,
-             t/2, t/4, t/4, (1-t)*(1-p_s), (1-t)*p_s,
-             t/2, t/4, t/4, (1-t)*p_s, (1-t)*(1-p_s)),
-            ncol = 5,
-            byrow = TRUE
-        )
-    }
-
-    Pi = sapply(p_s, function(p_s) {calc_trans_mat(p_s, t)}) %>% 
-        array(dim = c(M, M, N))
-
-    # intitial probabilities
-    if (is.null(prior)) {
-        prior = rep(1/5, 5)
-    }
-
-    theta_1 = theta_min
-    theta_2 = 0.4
-    alphas = gamma * c(0.5, 0.5 + theta_1, 0.5 - theta_1, 0.5 + theta_2, 0.5 - theta_2)
-    betas = gamma * c(0.5, 0.5 - theta_1, 0.5 + theta_1, 0.5 - theta_2, 0.5 + theta_2)
-            
-    hmm = list(
-        x = pAD, 
-        logPi = log(Pi), 
-        delta = prior, 
-        alpha = matrix(rep(alphas, N), ncol = M, byrow = TRUE),
-        beta = matrix(rep(betas, N), ncol = M, byrow = TRUE),
-        d = DP,
-        N = N,
-        M = M,
-        states = states
-    )
-    
-    mpc = states[viterbi_allele(hmm)]
-    
-    return(mpc)
-}
 
 #' Calculate allele likelihoods
 #' @param pAD integer vector Paternal allele counts
@@ -270,260 +60,6 @@ calc_allele_lik = function (pAD, DP, p_s, theta, gamma = 20) {
     return(LL)
 }
 
-############ Joint HMM ############
-
-
-#' Run joint HMM on a pseudobulk profile
-#' @param pAD integer vector Paternal allele counts
-#' @param DP integer vector Total alelle counts
-#' @param p_s numeric vector Phase switch probabilities
-#' @param theta_min numeric Minimum haplotype imbalance threshold
-#' @param gamma numeric Overdispersion in the allele-specific expression
-#' @param Y_obs numeric vector Observed gene counts
-#' @param lambda_ref numeric vector Reference expression rates
-#' @param d_total integer Total library size for expression counts
-#' @param phi_del numeric Expected fold change for deletion
-#' @param phi_amp numeric Expected fold change for amplification
-#' @param phi_bamp numeric Expected fold change for balanced amplification
-#' @param phi_bdel numeric Expected fold change for balanced deletion
-#' @param mu numeric Global expression bias
-#' @param sig numeric Global expression variance
-#' @param t numeric Transition probability between copy number states
-#' @param exp_only logical Whether to only use expression data
-#' @param allele_only logical Whether to only use allele data
-#' @keywords internal
-run_joint_hmm = function(
-    pAD, DP, p_s, Y_obs = 0, lambda_ref = 0, d_total = 0, theta_min = 0.08, theta_neu = 0,
-    bal_cnv = TRUE, phi_del = 2^(-0.25), phi_amp = 2^(0.25), phi_bamp = phi_amp, phi_bdel = phi_del, 
-    alpha = 1, beta = 1, 
-    mu = 0, sig = 1,
-    t = 1e-5, gamma = 18,
-    prior = NULL, exp_only = FALSE, allele_only = FALSE,
-    classify_allele = FALSE, phasing = TRUE, debug = FALSE
-) {
-
-    # states
-    states = c(
-        "1" = "neu", "2" = "del_1_up", "3" = "del_1_down", "4" = "del_2_up", "5" = "del_2_down",
-        "6" = "loh_1_up", "7" = "loh_1_down", "8" = "loh_2_up", "9" = "loh_2_down", 
-        "10" = "amp_1_up", "11" = "amp_1_down", "12" = "amp_2_up", "13" = "amp_2_down", 
-        "14" = "bamp", "15" = "bdel"
-    )
-
-    states_cn = str_remove(states, '_up|_down')
-    states_phase = str_extract(states, 'up|down')
-
-    # relative abundance of states
-    w = c('neu' = 1, 'del_1' = 1, 'del_2' = 1e-10, 'loh_1' = 1, 'loh_2' = 1e-10, 'amp_1' = 1, 'amp_2' = 1e-10, 'bamp' = 1e-4, 'bdel' = 1e-10)
-        
-    # intitial probabilities
-    if (is.null(prior)) {
-        # encourage CNV from telomeres
-        prior = sapply(1:length(states), function(to){
-                get_trans_probs(
-                    t = min(t * 100, 1), p_s = 0, w,
-                    cn_from = 'neu', phase_from = NA,
-                    cn_to = states_cn[to], phase_to = states_phase[to])
-            })
-    }
-
-    # to do: renormalize the probabilities after deleting states
-    states_index = 1:length(states)
-
-    if (!bal_cnv) {
-        states_index = 1:13
-    }
-        
-    if (exp_only) {
-        pAD = rep(NA, length(pAD))
-        p_s = rep(0, length(p_s))
-    }
-    
-    if (allele_only) {
-        states_index = c(1, 6:9)
-
-        Y_obs = rep(NA, length(Y_obs))
-    }
-
-    if (!phasing) {
-        states_index = c(1, 6)
-        
-        p_s = ifelse(is.na(pAD), p_s, 0)
-        pAD = ifelse(pAD > (DP - pAD), pAD, DP - pAD)
-        theta_neu = 0.1
-        theta_min = 0.45
-    }
-
-    if (classify_allele) {
-        states_index = c(6,7)
-    }
-    
-    # transition matrices
-    As = calc_trans_mat(t, p_s, w, states_cn, states_phase)
-
-    theta_u_1 = 0.5 + theta_min
-    theta_d_1 = 0.5 - theta_min
-
-    theta_u_2 = 0.9
-    theta_d_2 = 0.1
-
-    theta_u_neu = 0.5 + theta_neu
-    theta_d_neu = 0.5 - theta_neu
-
-    # parameters for each state
-    alpha_states = gamma * c(theta_u_neu, rep(c(theta_u_1, theta_d_1, theta_u_2, theta_d_2), 3), theta_u_neu, theta_u_neu)
-    beta_states = gamma * c(theta_d_neu, rep(c(theta_d_1, theta_u_1, theta_d_2, theta_u_2), 3), theta_d_neu, theta_d_neu)
-    phi_states = c(1, rep(phi_del, 2), rep(0.5, 2), rep(1, 4), rep(phi_amp, 2), rep(2.5, 2), phi_bamp, phi_bdel)
-    
-    # subset for relevant states
-    prior = prior[states_index]
-    As = As[states_index, states_index,]
-    alpha_states = alpha_states[states_index]
-    beta_states = beta_states[states_index]
-    phi_states = phi_states[states_index]
-    states = states[states_index] %>% setNames(1:length(.))
-                
-    N = length(Y_obs)
-
-    if (length(mu) == 1) {
-        mu = rep(mu, N)
-        sig = rep(sig, N)
-    }
-
-    if (length(d_total) == 1) {
-        d_total = rep(d_total, N)
-    }
-
-    hmm = list(
-        x = matrix(pAD), 
-        d = matrix(DP),
-        y = matrix(Y_obs),
-        l = matrix(d_total),
-        lambda = matrix(lambda_ref),
-        mu = matrix(mu),
-        sig = matrix(sig),
-        logPi = log(As), 
-        phi = matrix(phi_states),
-        delta = matrix(prior), 
-        alpha = matrix(alpha_states),
-        beta = matrix(beta_states),
-        states = matrix(states),
-        p_s = p_s
-    )
-
-    MPC = states[viterbi_joint(hmm)]
-        
-    return(MPC)
-}
-
-
-#' Calculate the transition matrix for joint HMM
-#' @param t numeric; CNV state transition probability
-#' @param p_s numeric; phase switch probability
-#' @param w numeric; relative abundance of states
-#' @param states_cn character; CNV states
-#' @param states_phase character; haplotype phase states
-#' @return array; transition matrix
-#' @keywords internal
-calc_trans_mat = function(t, p_s, w, states_cn, states_phase) {
-
-    sapply(1:length(states_cn), function(from) {
-        sapply(1:length(states_cn), function(to) {
-            get_trans_probs(t, p_s, w, states_cn[from], states_phase[from], states_cn[to], states_phase[to])
-        }) %>% t
-    }) %>% t %>%
-    array(dim = c(length(states_cn), length(states_cn), length(p_s)))
-
-}
-
-
-#' Helper function to calculate transition porbabilities
-#' cn/phase are sclars, only p_s is vectorized
-#' @param t numeric; CNV state transition probability
-#' @param p_s numeric; phase switch probability
-#' @param w numeric; relative abundance of states
-#' @param cn_from character; originating CNV state
-#' @param phase_from character; originating haplotype phase state
-#' @param cn_to character; destination CNV state
-#' @param phase_to character; destination haplotype phase state
-#' @return numeric; transition probability
-#' @keywords internal
-get_trans_probs = function(t, p_s, w, cn_from, phase_from, cn_to, phase_to) {
-
-    if (cn_from == 'neu' & cn_to == 'neu') {
-        p_s = rep(0.5, length(p_s))
-    }
-
-    if (cn_from == cn_to) {
-        if (is.na(phase_from) & is.na(phase_to)) {
-            p = 1-t
-            p = rep(p, length(p_s))
-        } else if (phase_from == phase_to) {
-            p = (1-t) * (1-p_s)
-        } else {
-            p = (1-t) * p_s
-        }
-    } else {
-        p = t * w[[cn_to]]/sum(w[names(w)!=cn_from])
-        if (!is.na(phase_to)) {
-            p = p/2
-        }
-        p = rep(p, length(p_s))
-    }
-    
-    return(p)
-}
-
-#' Generalized viterbi algorithm for joint HMM
-#' @param hmm HMM object; expect variables x (allele depth), d (total depth), y (expression count), l (cell total library size),
-#' lambda (reference expression rate), mu (global expression mean), sig (global expression standard deviation), 
-#' logPi (log transition prob matrix), phi (expression fold change for each state), delta (prior for each state), 
-#' alpha (alpha for each state), beta (beta for each state), states (states), p_s (phase switch probs)
-#' @keywords internal
-viterbi_joint <- function(hmm) {
-
-    N <- nrow(hmm$x)
-    M <- nrow(hmm$logPi[,,1])
-    K <- ncol(hmm$x)
-    nu <- matrix(NA, nrow = N, ncol = M)
-    z <- rep(NA, N)
-    
-    logprob = sapply(1:M, function(m) {
-
-        sapply(
-            1:K,
-            function(k) {
-
-                l_x = dbbinom(x = hmm$x[,k], size = hmm$d[,k], alpha = hmm$alpha[m,k], beta = hmm$beta[m,k], log = TRUE)
-
-                l_x[is.na(l_x)] = 0
-
-                if (!is.null(hmm$y)) {
-                    l_y = rep(0,N)
-                    valid = !is.na(hmm$y[,k])
-
-                    l_y[valid] = dpoilog(
-                            x = hmm$y[valid,k],
-                            sig = hmm$sig[valid,k],
-                            mu = hmm$mu[valid,k] + log(hmm$phi[m,k] * hmm$l[valid,k] * hmm$lambda[valid,k]),
-                            log = TRUE
-                        )
-                } else {
-                    l_y = 0
-                }
-
-                return(l_x + l_y)
-
-            }
-        ) %>% rowSums()
-
-    })
-
-    z = viterbi_compute(log(hmm$delta), logprob, hmm$logPi, N, M, nu, z)
-        
-    return(z)
-}
-
 ############ Clonal deletion HMM ############
 
 #' Viterbi for clonal LOH detection
diff --git a/R/main.R b/R/main.R
index a5a8148c..38f78ffc 100644
--- a/R/main.R
+++ b/R/main.R
@@ -15,9 +15,10 @@
 #' @import patchwork
 #' @importFrom grDevices colorRampPalette
 #' @importFrom stats as.dendrogram as.dist cor cutree dbinom dnbinom dnorm dpois end hclust integrate model.matrix na.omit optim p.adjust pnorm reorder rnorm setNames start t.test as.ts complete.cases is.leaf na.contiguous
+#' @importFrom hahmmr logSumExp dpoilog dbbinom l_lnpois l_bbinom fit_lnpois_cpp likelihood_allele forward_back_allele run_joint_hmm_s15 run_allele_hmm_s5
 #' @import tibble
 #' @importFrom utils combn
-#' @useDynLib numbat
+#' @useDynLib numbat, .registration=TRUE
 NULL
 
 #' Run workflow to decompose tumor subclones
@@ -133,8 +134,9 @@ run_numbat = function(
 
     ######### Log parameters #########
     log_message(paste('\n',
-        glue('Numbat version: ', as.character(utils::packageVersion("numbat"))),
-        glue('Scistreer version: ', as.character(utils::packageVersion("scistreer"))),
+        glue('numbat version: ', as.character(utils::packageVersion("numbat"))),
+        glue('scistreer version: ', as.character(utils::packageVersion("scistreer"))),
+        glue('hahmmr version: ', as.character(utils::packageVersion("hahmmr"))),
         'Running under parameters:',
         glue('t = {t}'), 
         glue('alpha = {alpha}'),
diff --git a/R/utils.R b/R/utils.R
index 0b792af5..88fe112b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -558,7 +558,7 @@ analyze_bulk = function(
         bulk = bulk %>% 
             group_by(CHROM) %>%
             mutate(state = 
-                run_joint_hmm(
+                run_joint_hmm_s15(
                     pAD = pAD,
                     DP = DP, 
                     p_s = p_s,
@@ -1128,7 +1128,7 @@ find_common_diploid = function(
             bulk %>% 
                 group_by(CHROM) %>%
                 mutate(state = 
-                    run_allele_hmm(
+                    run_allele_hmm_s5(
                         pAD = pAD,
                         DP = DP, 
                         p_s = p_s,
@@ -1355,17 +1355,6 @@ get_segs_neu = function(bulks) {
     return(segs_neu)
 }
 
-#' calculate joint likelihood of allele data
-#' @param AD numeric vector Variant allele depth
-#' @param DP numeric vector Total allele depth
-#' @param alpha numeric Alpha parameter of Beta-Binomial distribution
-#' @param beta numeric Beta parameter of Beta-Binomial distribution
-#' @return numeric Joint log likelihood
-#' @keywords internal
-l_bbinom = function(AD, DP, alpha, beta) {
-    sum(dbbinom(AD, DP, alpha, beta, log = TRUE))
-}
-
 #' fit a Beta-Binomial model by maximum likelihood
 #' @param AD numeric vector Variant allele depth
 #' @param DP numeric vector Total allele depth
@@ -1407,63 +1396,6 @@ fit_gamma = function(AD, DP, start = 20) {
     return(gamma)
 }
 
-#' Density function for a gamma-poisson distribution
-#' @keywords internal
-dgpois <- function(x, shape, rate, scale = 1/rate, log = FALSE) {
-  cpp_dgpois(x, shape, scale, log[1L])
-}
-
-
-#' calculate joint likelihood of a gamma-poisson model
-#' @param Y_obs numeric vector Gene expression counts
-#' @param lambda_ref numeric vector Reference expression levels
-#' @param d numeric Total library size
-#' @param alpha numeric Alpha parameter of Gamma-Poisson distribution
-#' @param beta numeric Beta parameter of Gamma-Poisson distribution
-#' @return numeric Joint log likelihood
-#' @keywords internal
-l_gpois = function(Y_obs, lambda_ref, d, alpha, beta, phi = 1) {
-    sum(dgpois(Y_obs, shape = alpha, rate = beta/(phi * d * lambda_ref), log = TRUE))
-}
-
-#' fit a Gamma-Poisson model by maximum likelihood
-#' @param Y_obs numeric vector Gene expression counts
-#' @param lambda_ref numeric vector Reference expression levels
-#' @param d numeric Total library size
-#' @return numeric MLE of alpha and beta
-#' @keywords internal
-fit_gpois = function(Y_obs, lambda_ref, d) {
-
-    Y_obs = Y_obs[lambda_ref > 0]
-    lambda_ref = lambda_ref[lambda_ref > 0]
-    
-    fit = stats4::mle(
-        minuslogl = function(alpha, beta) {
-            -l_gpois(Y_obs, lambda_ref, d, alpha, beta)
-        },
-        start = c(1, 1),
-        lower = c(0.01, 0.01),
-        control = list('trace' = FALSE)
-    )
-
-    alpha = fit@coef[1]
-    beta = fit@coef[2]
-    
-    return(fit)
-}
-
-#' calculate joint likelihood of a PLN model
-#' @param Y_obs numeric vector Gene expression counts
-#' @param lambda_ref numeric vector Reference expression levels
-#' @param d numeric Total library size
-#' @return numeric Joint log likelihood
-#' @keywords internal
-l_lnpois = function(Y_obs, lambda_ref, d, mu, sig, phi = 1) {
-    if (any(sig <= 0)) {stop(glue('negative sigma. value: {sig}'))}
-    if (length(sig) == 1) {sig = rep(sig, length(Y_obs))}
-    sum(log(dpoilog(Y_obs, mu + log(phi * d * lambda_ref), sig)))
-}
-
 #' fit a PLN model by maximum likelihood
 #' @param Y_obs numeric vector Gene expression counts
 #' @param lambda_ref numeric vector Reference expression levels
diff --git a/docker/Dockerfile b/docker/Dockerfile
index e54293e4..c620741f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,10 +1,9 @@
-FROM r-base:4.2.1
+FROM r-base:4.3.1
 
-LABEL authors="Evan Biederstedt <evan.biederstedt@gmail.com>, Teng Gao <tgao@g.harvard.edu>" \
+LABEL authors="Evan Biederstedt <evan.biederstedt@gmail.com>, Teng Gao <tgaoteng@gmail.com>" \
     version.image="0.1.3" \
     version.pagoda2="0.1.3" \
-    description="r-base image R 4.2.1 to run numbat with R"
-
+    description="r-base image to run numbat with R"
 
 RUN apt-get update --yes && apt-get install --yes build-essential \
   libcurl4-gnutls-dev libxml2-dev libssl-dev libbz2-dev zlib1g-dev \
@@ -14,16 +13,33 @@ RUN apt-get update --yes && apt-get install --yes build-essential \
   libglpk-dev git autoconf gettext libtool automake  \
   samtools sudo
 
-
 RUN cd /usr/bin && \
   wget https://github.com/samtools/htslib/releases/download/1.15.1/htslib-1.15.1.tar.bz2 && \
   tar -vxjf htslib-1.15.1.tar.bz2 && cd htslib-1.15.1 && make && sudo make install
 
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("BiocManager")'
+
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("ragg")'
+
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("pkgdown")'
+
+RUN R -e 'chooseCRANmirror(ind=42); install.packages("devtools")'
+
+RUN R -e 'devtools::install_github("YuLab-SMU/ggtree", dependencies=TRUE)'
+
+RUN R -e 'devtools::install_github("kharchenkolab/numbat", dependencies=TRUE)'
+
+RUN git clone https://github.com/kharchenkolab/numbat.git
+
+RUN mkdir -p /tmp && chmod 777 /tmp
+
+RUN chmod 777 /numbat/inst/bin/pileup_and_phase.R 
+
 
 RUN mkdir data
 
 # hg 38
-RUN cd /data && wget https://sourceforge.net/projects/cellsnp/files/SNPlist/genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz && gzip -d genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz 
+RUN cd /data && wget -q https://sourceforge.net/projects/cellsnp/files/SNPlist/genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz && gzip -d genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz 
 
 
 ## Note: not run; the user can follow these commands if they want hg19
@@ -32,7 +48,7 @@ RUN cd /data && wget https://sourceforge.net/projects/cellsnp/files/SNPlist/geno
 
 
 # hg38
-RUN cd /data && wget http://pklab.med.harvard.edu/teng/data/1000G_hg38.zip && unzip 1000G_hg38.zip 
+RUN cd /data && wget -q http://pklab.med.harvard.edu/teng/data/1000G_hg38.zip && unzip 1000G_hg38.zip 
 
 # hg19
 ##RUN cd /data && wget http://pklab.med.harvard.edu/teng/data/1000G_hg19.zip && unzip 1000G_hg19.zip
@@ -42,26 +58,7 @@ RUN cd /data && wget http://pklab.med.harvard.edu/teng/data/1000G_hg38.zip && un
 RUN git clone https://github.com/single-cell-genetics/cellsnp-lite.git && cd cellsnp-lite && \
   autoreconf -iv && ./configure && make && sudo make install    
 
-RUN wget https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/Eagle_v2.4.1.tar.gz && cd .. && tar -xvzf Eagle_v2.4.1.tar.gz && cd /Eagle_v2.4.1 && cp eagle /usr/bin
-
-
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("BiocManager")'
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("ragg")'
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("pkgdown")'
-
-RUN R -e 'chooseCRANmirror(ind=42); install.packages("devtools")'
-
-
-RUN R -e 'devtools::install_github("kharchenkolab/numbat", dependencies=TRUE)'
-
-RUN git clone https://github.com/kharchenkolab/numbat.git
-
-RUN mkdir -p /tmp && chmod 777 /tmp
-
-RUN chmod 777 /numbat/inst/bin/pileup_and_phase.R 
+RUN wget -q https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/Eagle_v2.4.1.tar.gz && cd .. && tar -xvzf Eagle_v2.4.1.tar.gz && cd /Eagle_v2.4.1 && cp eagle /usr/bin
 
 
 WORKDIR /numbat
diff --git a/inst/bin/pileup_and_phase.R b/inst/bin/pileup_and_phase.R
index cbf16d7d..8bcceb0c 100644
--- a/inst/bin/pileup_and_phase.R
+++ b/inst/bin/pileup_and_phase.R
@@ -11,12 +11,12 @@ parser = add_option(parser, '--gmap', default = NULL, type = "character", help =
 parser = add_option(parser, '--eagle', type = "character", default = 'eagle', help = "Path to Eagle2 binary file")
 parser = add_option(parser, '--snpvcf', default = NULL, type = "character", help = "SNP VCF for pileup")
 parser = add_option(parser, '--paneldir', default = NULL, type = "character", help = "Directory to phasing reference panel (BCF files)")
-parser = add_option(parser, '--outdir', default = './pileup_and_phase', type = "character", help = "Output directory")
+parser = add_option(parser, '--outdir', default = NULL, type = "character", help = "Output directory")
 parser = add_option(parser, '--ncores', default = 1, type = "integer", help = "Number of cores")
 parser = add_option(parser, '--UMItag', default = "Auto", type = "character", help = "UMI tag in bam; should be Auto for 10x and XM for Slide-seq")
 parser = add_option(parser, '--cellTAG', default = "CB", type = "character", help = "Cell tag in bam; should be CB for 10x and XC for Slide-seq")
 parser = add_option(parser, '--smartseq', default = FALSE, action = 'store_true', help = "Running with SMART-seq mode; Supply a txt file containing directories of BAM files to --bams and a txt file containing cell names to --barcodes (each entry on its own line for both; ordering must match).")
-parser = add_option(parser, '--bulk', default = FALSE, action = 'store_true', help = "Running with bulk RNA-seq mode")
+parser = add_option(parser, '--bulk', default = FALSE, action = 'store_true', help = "Running with bulk RNA-seq mode; supply --bams and --samples but not --barcodes.")
 args <- parse_args(parser)
 
 suppressPackageStartupMessages({
@@ -29,9 +29,25 @@ suppressPackageStartupMessages({
     library(numbat)
 })
 
-# required args
-if (any(is.null(c(args$bams, args$barcodes, args$gmap, args$snpvcf, args$paneldir)))) {
-    stop('Missing one or more required arguments: bams, barcodes, gmap, snpvcf, paneldir')
+if (any(sapply(list(args$bams, args$snpvcf, args$outdir, args$paneldir, args$gmap), is.null))) {
+    stop('Missing one or more always required arguments: --bams, --snpvcf, --outdir, --paneldir, --gmap')
+}
+
+if (args$smartseq) {
+    if (any(sapply(list(args$barcodes), is.null))) {
+        stop('Missing one or more required arguments for smartseq mode: --barcodes')
+    }
+    mode = 'SMART-Seq'
+} else if (args$bulk) {
+    if (any(sapply(list(args$samples), is.null))) {
+        stop('Missing one or more required arguments for bulk mode: --samples')
+    }
+    mode = 'Bulk'
+} else {
+    if (any(sapply(list(args$samples, args$barcodes), is.null))) {
+        stop('Missing one or more required arguments for 10x mode: --samples, --barcodes')
+    }
+    mode = '10X'
 }
 
 label = args$label
@@ -52,8 +68,39 @@ cellTAG = args$cellTAG
 smartseq = args$smartseq
 bulk = args$bulk
 genome = ifelse(str_detect(args$gmap, 'hg19'), 'hg19', 'hg38')
+
+message(paste0('Running in ', mode, ' mode'))
 message(paste0('Using genome version: ', genome))
 
+## check if files exist
+for (bam in bams) {
+    if (!file.exists(bam)) {
+        stop('BAM file not found')
+    }
+}
+
+if (!file.exists(snpvcf)) {
+    stop('SNP VCF not found')
+}
+
+
+if (!is.null(barcodes)) {
+    for (barcode in barcodes) {
+        if (!file.exists(barcode)) {
+            stop('Barcode file not found')
+        }
+    }
+}
+
+if (!file.exists(gmap)) {
+    stop('Genetic map not found')
+}
+
+if (!file.exists(paneldir)) {
+    stop('Phasing reference panel not found')
+}
+
+
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 dir.create(glue('{outdir}/pileup'), showWarnings = FALSE)
 dir.create(glue('{outdir}/phasing'), showWarnings = FALSE)
diff --git a/man/calc_trans_mat.Rd b/man/calc_trans_mat.Rd
deleted file mode 100644
index 5e322034..00000000
--- a/man/calc_trans_mat.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{calc_trans_mat}
-\alias{calc_trans_mat}
-\title{Calculate the transition matrix for joint HMM}
-\usage{
-calc_trans_mat(t, p_s, w, states_cn, states_phase)
-}
-\arguments{
-\item{t}{numeric; CNV state transition probability}
-
-\item{p_s}{numeric; phase switch probability}
-
-\item{w}{numeric; relative abundance of states}
-
-\item{states_cn}{character; CNV states}
-
-\item{states_phase}{character; haplotype phase states}
-}
-\value{
-array; transition matrix
-}
-\description{
-Calculate the transition matrix for joint HMM
-}
-\keyword{internal}
diff --git a/man/dbbinom.Rd b/man/dbbinom.Rd
deleted file mode 100644
index f8266053..00000000
--- a/man/dbbinom.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{dbbinom}
-\alias{dbbinom}
-\title{Beta-binomial distribution density function
-A distribution is beta-binomial if p, the probability of success, 
-in a binomial distribution has a beta distribution with shape 
-parameters alpha > 0 and beta > 0
-For more details, see extraDistr::dbbinom}
-\usage{
-dbbinom(x, size, alpha = 1, beta = 1, log = FALSE)
-}
-\arguments{
-\item{x}{vector of quantiles}
-
-\item{size}{number of trials (zero or more)}
-
-\item{alpha}{numeric (default=1)}
-
-\item{beta}{numeric (default=1)}
-
-\item{log}{boolean (default=FALSE)}
-}
-\value{
-density values returned as numeric vector
-}
-\description{
-Beta-binomial distribution density function
-A distribution is beta-binomial if p, the probability of success, 
-in a binomial distribution has a beta distribution with shape 
-parameters alpha > 0 and beta > 0
-For more details, see extraDistr::dbbinom
-}
-\keyword{internal}
diff --git a/man/dgpois.Rd b/man/dgpois.Rd
deleted file mode 100644
index 5c6a14f5..00000000
--- a/man/dgpois.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{dgpois}
-\alias{dgpois}
-\title{Density function for a gamma-poisson distribution}
-\usage{
-dgpois(x, shape, rate, scale = 1/rate, log = FALSE)
-}
-\description{
-Density function for a gamma-poisson distribution
-}
-\keyword{internal}
diff --git a/man/dpoilog.Rd b/man/dpoilog.Rd
deleted file mode 100644
index 816b5ec8..00000000
--- a/man/dpoilog.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{dpoilog}
-\alias{dpoilog}
-\title{Returns the density for the Poisson lognormal distribution with parameters mu and sig}
-\usage{
-dpoilog(x, mu, sig, log = FALSE)
-}
-\arguments{
-\item{x}{vector of integers, the observations}
-
-\item{mu}{mean of lognormal distribution}
-
-\item{sig}{standard deviation of lognormal distribution}
-
-\item{log}{boolean Return the log density if TRUE (default=FALSE)}
-}
-\description{
-Returns the density for the Poisson lognormal distribution with parameters mu and sig
-}
-\keyword{internal}
diff --git a/man/fit_gpois.Rd b/man/fit_gpois.Rd
deleted file mode 100644
index f7688725..00000000
--- a/man/fit_gpois.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{fit_gpois}
-\alias{fit_gpois}
-\title{fit a Gamma-Poisson model by maximum likelihood}
-\usage{
-fit_gpois(Y_obs, lambda_ref, d)
-}
-\arguments{
-\item{Y_obs}{numeric vector Gene expression counts}
-
-\item{lambda_ref}{numeric vector Reference expression levels}
-
-\item{d}{numeric Total library size}
-}
-\value{
-numeric MLE of alpha and beta
-}
-\description{
-fit a Gamma-Poisson model by maximum likelihood
-}
-\keyword{internal}
diff --git a/man/forward_back_allele.Rd b/man/forward_back_allele.Rd
deleted file mode 100644
index 2f03aa80..00000000
--- a/man/forward_back_allele.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{forward_back_allele}
-\alias{forward_back_allele}
-\title{Forward-backward algorithm for allele HMM}
-\usage{
-forward_back_allele(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth),
-logPi (log transition prob matrix), delta (prior for each state), 
-alpha (alpha for each state), beta (beta for each state), 
-states (states), p_s (phase switch probs)}
-}
-\description{
-Forward-backward algorithm for allele HMM
-}
-\keyword{internal}
diff --git a/man/get_trans_probs.Rd b/man/get_trans_probs.Rd
deleted file mode 100644
index 9e71ef52..00000000
--- a/man/get_trans_probs.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{get_trans_probs}
-\alias{get_trans_probs}
-\title{Helper function to calculate transition porbabilities
-cn/phase are sclars, only p_s is vectorized}
-\usage{
-get_trans_probs(t, p_s, w, cn_from, phase_from, cn_to, phase_to)
-}
-\arguments{
-\item{t}{numeric; CNV state transition probability}
-
-\item{p_s}{numeric; phase switch probability}
-
-\item{w}{numeric; relative abundance of states}
-
-\item{cn_from}{character; originating CNV state}
-
-\item{phase_from}{character; originating haplotype phase state}
-
-\item{cn_to}{character; destination CNV state}
-
-\item{phase_to}{character; destination haplotype phase state}
-}
-\value{
-numeric; transition probability
-}
-\description{
-Helper function to calculate transition porbabilities
-cn/phase are sclars, only p_s is vectorized
-}
-\keyword{internal}
diff --git a/man/l_bbinom.Rd b/man/l_bbinom.Rd
deleted file mode 100644
index 4325edda..00000000
--- a/man/l_bbinom.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{l_bbinom}
-\alias{l_bbinom}
-\title{calculate joint likelihood of allele data}
-\usage{
-l_bbinom(AD, DP, alpha, beta)
-}
-\arguments{
-\item{AD}{numeric vector Variant allele depth}
-
-\item{DP}{numeric vector Total allele depth}
-
-\item{alpha}{numeric Alpha parameter of Beta-Binomial distribution}
-
-\item{beta}{numeric Beta parameter of Beta-Binomial distribution}
-}
-\value{
-numeric Joint log likelihood
-}
-\description{
-calculate joint likelihood of allele data
-}
-\keyword{internal}
diff --git a/man/l_gpois.Rd b/man/l_gpois.Rd
deleted file mode 100644
index 72ce8e17..00000000
--- a/man/l_gpois.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{l_gpois}
-\alias{l_gpois}
-\title{calculate joint likelihood of a gamma-poisson model}
-\usage{
-l_gpois(Y_obs, lambda_ref, d, alpha, beta, phi = 1)
-}
-\arguments{
-\item{Y_obs}{numeric vector Gene expression counts}
-
-\item{lambda_ref}{numeric vector Reference expression levels}
-
-\item{d}{numeric Total library size}
-
-\item{alpha}{numeric Alpha parameter of Gamma-Poisson distribution}
-
-\item{beta}{numeric Beta parameter of Gamma-Poisson distribution}
-}
-\value{
-numeric Joint log likelihood
-}
-\description{
-calculate joint likelihood of a gamma-poisson model
-}
-\keyword{internal}
diff --git a/man/l_lnpois.Rd b/man/l_lnpois.Rd
deleted file mode 100644
index c8018053..00000000
--- a/man/l_lnpois.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{l_lnpois}
-\alias{l_lnpois}
-\title{calculate joint likelihood of a PLN model}
-\usage{
-l_lnpois(Y_obs, lambda_ref, d, mu, sig, phi = 1)
-}
-\arguments{
-\item{Y_obs}{numeric vector Gene expression counts}
-
-\item{lambda_ref}{numeric vector Reference expression levels}
-
-\item{d}{numeric Total library size}
-}
-\value{
-numeric Joint log likelihood
-}
-\description{
-calculate joint likelihood of a PLN model
-}
-\keyword{internal}
diff --git a/man/likelihood_allele.Rd b/man/likelihood_allele.Rd
deleted file mode 100644
index 93ff3e30..00000000
--- a/man/likelihood_allele.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{likelihood_allele}
-\alias{likelihood_allele}
-\title{Only compute total log likelihood from an allele HMM}
-\usage{
-likelihood_allele(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth),
-logPi (log transition prob matrix), delta (prior for each state), 
-alpha (alpha for each state), beta (beta for each state), 
-states (states), p_s (phase switch probs)}
-}
-\description{
-Only compute total log likelihood from an allele HMM
-}
-\keyword{internal}
diff --git a/man/run_allele_hmm.Rd b/man/run_allele_hmm.Rd
deleted file mode 100644
index 087dd635..00000000
--- a/man/run_allele_hmm.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{run_allele_hmm}
-\alias{run_allele_hmm}
-\title{allele-only HMM}
-\usage{
-run_allele_hmm(
-  pAD,
-  DP,
-  p_s,
-  t = 1e-05,
-  theta_min = 0.08,
-  gamma = 20,
-  prior = NULL
-)
-}
-\arguments{
-\item{pAD}{integer vector Paternal allele counts}
-
-\item{DP}{integer vector Total alelle counts}
-
-\item{p_s}{numeric vector Phase switch probabilities}
-
-\item{t}{numeric Transition probability between copy number states}
-
-\item{theta_min}{numeric Minimum haplotype frequency deviation threshold}
-
-\item{gamma}{numeric Overdispersion in the allele-specific expression}
-}
-\value{
-character vector Decoded states
-}
-\description{
-allele-only HMM
-}
-\keyword{internal}
diff --git a/man/run_joint_hmm.Rd b/man/run_joint_hmm.Rd
deleted file mode 100644
index 44d1521d..00000000
--- a/man/run_joint_hmm.Rd
+++ /dev/null
@@ -1,73 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{run_joint_hmm}
-\alias{run_joint_hmm}
-\title{Run joint HMM on a pseudobulk profile}
-\usage{
-run_joint_hmm(
-  pAD,
-  DP,
-  p_s,
-  Y_obs = 0,
-  lambda_ref = 0,
-  d_total = 0,
-  theta_min = 0.08,
-  theta_neu = 0,
-  bal_cnv = TRUE,
-  phi_del = 2^(-0.25),
-  phi_amp = 2^(0.25),
-  phi_bamp = phi_amp,
-  phi_bdel = phi_del,
-  alpha = 1,
-  beta = 1,
-  mu = 0,
-  sig = 1,
-  t = 1e-05,
-  gamma = 18,
-  prior = NULL,
-  exp_only = FALSE,
-  allele_only = FALSE,
-  classify_allele = FALSE,
-  phasing = TRUE,
-  debug = FALSE
-)
-}
-\arguments{
-\item{pAD}{integer vector Paternal allele counts}
-
-\item{DP}{integer vector Total alelle counts}
-
-\item{p_s}{numeric vector Phase switch probabilities}
-
-\item{Y_obs}{numeric vector Observed gene counts}
-
-\item{lambda_ref}{numeric vector Reference expression rates}
-
-\item{d_total}{integer Total library size for expression counts}
-
-\item{theta_min}{numeric Minimum haplotype imbalance threshold}
-
-\item{phi_del}{numeric Expected fold change for deletion}
-
-\item{phi_amp}{numeric Expected fold change for amplification}
-
-\item{phi_bamp}{numeric Expected fold change for balanced amplification}
-
-\item{phi_bdel}{numeric Expected fold change for balanced deletion}
-
-\item{mu}{numeric Global expression bias}
-
-\item{sig}{numeric Global expression variance}
-
-\item{t}{numeric Transition probability between copy number states}
-
-\item{gamma}{numeric Overdispersion in the allele-specific expression}
-
-\item{exp_only}{logical Whether to only use expression data}
-
-\item{allele_only}{logical Whether to only use allele data}
-}
-\description{
-Run joint HMM on a pseudobulk profile
-}
-\keyword{internal}
diff --git a/man/viterbi_allele.Rd b/man/viterbi_allele.Rd
deleted file mode 100644
index 7ffa6a1d..00000000
--- a/man/viterbi_allele.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{viterbi_allele}
-\alias{viterbi_allele}
-\title{Viterbi algorithm for allele HMM}
-\usage{
-viterbi_allele(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth),
-logPi (log transition prob matrix), delta (prior for each state), 
-alpha (alpha for each state), beta (beta for each state), 
-states (states), p_s (phase switch probs)}
-}
-\description{
-Viterbi algorithm for allele HMM
-}
-\keyword{internal}
diff --git a/man/viterbi_joint.Rd b/man/viterbi_joint.Rd
deleted file mode 100644
index 0ce57e53..00000000
--- a/man/viterbi_joint.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/hmm.R
-\name{viterbi_joint}
-\alias{viterbi_joint}
-\title{Generalized viterbi algorithm for joint HMM}
-\usage{
-viterbi_joint(hmm)
-}
-\arguments{
-\item{hmm}{HMM object; expect variables x (allele depth), d (total depth), y (expression count), l (cell total library size),
-lambda (reference expression rate), mu (global expression mean), sig (global expression standard deviation), 
-logPi (log transition prob matrix), phi (expression fold change for each state), delta (prior for each state), 
-alpha (alpha for each state), beta (beta for each state), states (states), p_s (phase switch probs)}
-}
-\description{
-Generalized viterbi algorithm for joint HMM
-}
-\keyword{internal}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 2db09143..57445a49 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -11,93 +11,6 @@ Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
-// cppdbbinom
-NumericVector cppdbbinom(const NumericVector& x, const NumericVector& size, const NumericVector& alpha, const NumericVector& beta, const bool& log_prob);
-RcppExport SEXP _numbat_cppdbbinom(SEXP xSEXP, SEXP sizeSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP log_probSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP);
-    Rcpp::traits::input_parameter< const NumericVector& >::type size(sizeSEXP);
-    Rcpp::traits::input_parameter< const NumericVector& >::type alpha(alphaSEXP);
-    Rcpp::traits::input_parameter< const NumericVector& >::type beta(betaSEXP);
-    Rcpp::traits::input_parameter< const bool& >::type log_prob(log_probSEXP);
-    rcpp_result_gen = Rcpp::wrap(cppdbbinom(x, size, alpha, beta, log_prob));
-    return rcpp_result_gen;
-END_RCPP
-}
-// cpp_dgpois
-NumericVector cpp_dgpois(const NumericVector& x, const NumericVector& alpha, const NumericVector& beta, const bool& log_prob);
-RcppExport SEXP _numbat_cpp_dgpois(SEXP xSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP log_probSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP);
-    Rcpp::traits::input_parameter< const NumericVector& >::type alpha(alphaSEXP);
-    Rcpp::traits::input_parameter< const NumericVector& >::type beta(betaSEXP);
-    Rcpp::traits::input_parameter< const bool& >::type log_prob(log_probSEXP);
-    rcpp_result_gen = Rcpp::wrap(cpp_dgpois(x, alpha, beta, log_prob));
-    return rcpp_result_gen;
-END_RCPP
-}
-// logSumExp
-double logSumExp(const arma::vec& x);
-RcppExport SEXP _numbat_logSumExp(SEXP xSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP);
-    rcpp_result_gen = Rcpp::wrap(logSumExp(x));
-    return rcpp_result_gen;
-END_RCPP
-}
-// likelihood_compute
-double likelihood_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m);
-RcppExport SEXP _numbat_likelihood_compute(SEXP logphiSEXP, SEXP logprobSEXP, SEXP logPiSEXP, SEXP nSEXP, SEXP mSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< Rcpp::NumericVector >::type logphi(logphiSEXP);
-    Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type logprob(logprobSEXP);
-    Rcpp::traits::input_parameter< arma::cube >::type logPi(logPiSEXP);
-    Rcpp::traits::input_parameter< int >::type n(nSEXP);
-    Rcpp::traits::input_parameter< int >::type m(mSEXP);
-    rcpp_result_gen = Rcpp::wrap(likelihood_compute(logphi, logprob, logPi, n, m));
-    return rcpp_result_gen;
-END_RCPP
-}
-// forward_backward_compute
-Rcpp::NumericMatrix forward_backward_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m);
-RcppExport SEXP _numbat_forward_backward_compute(SEXP logphiSEXP, SEXP logprobSEXP, SEXP logPiSEXP, SEXP nSEXP, SEXP mSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< Rcpp::NumericVector >::type logphi(logphiSEXP);
-    Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type logprob(logprobSEXP);
-    Rcpp::traits::input_parameter< arma::cube >::type logPi(logPiSEXP);
-    Rcpp::traits::input_parameter< int >::type n(nSEXP);
-    Rcpp::traits::input_parameter< int >::type m(mSEXP);
-    rcpp_result_gen = Rcpp::wrap(forward_backward_compute(logphi, logprob, logPi, n, m));
-    return rcpp_result_gen;
-END_RCPP
-}
-// viterbi_compute
-Rcpp::NumericVector viterbi_compute(Rcpp::NumericVector log_delta, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m, Rcpp::NumericMatrix nu, Rcpp::NumericVector z);
-RcppExport SEXP _numbat_viterbi_compute(SEXP log_deltaSEXP, SEXP logprobSEXP, SEXP logPiSEXP, SEXP nSEXP, SEXP mSEXP, SEXP nuSEXP, SEXP zSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< Rcpp::NumericVector >::type log_delta(log_deltaSEXP);
-    Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type logprob(logprobSEXP);
-    Rcpp::traits::input_parameter< arma::cube >::type logPi(logPiSEXP);
-    Rcpp::traits::input_parameter< int >::type n(nSEXP);
-    Rcpp::traits::input_parameter< int >::type m(mSEXP);
-    Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type nu(nuSEXP);
-    Rcpp::traits::input_parameter< Rcpp::NumericVector >::type z(zSEXP);
-    rcpp_result_gen = Rcpp::wrap(viterbi_compute(log_delta, logprob, logPi, n, m, nu, z));
-    return rcpp_result_gen;
-END_RCPP
-}
 // roman2int_internal
 int roman2int_internal(Rcpp::StringVector letters, int nchar);
 RcppExport SEXP _numbat_roman2int_internal(SEXP lettersSEXP, SEXP ncharSEXP) {
@@ -110,60 +23,9 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// fit_lnpois_cpp
-arma::rowvec fit_lnpois_cpp(std::vector<int> Y_obs, std::vector<double> lambda_ref, int d);
-RcppExport SEXP _numbat_fit_lnpois_cpp(SEXP Y_obsSEXP, SEXP lambda_refSEXP, SEXP dSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< std::vector<int> >::type Y_obs(Y_obsSEXP);
-    Rcpp::traits::input_parameter< std::vector<double> >::type lambda_ref(lambda_refSEXP);
-    Rcpp::traits::input_parameter< int >::type d(dSEXP);
-    rcpp_result_gen = Rcpp::wrap(fit_lnpois_cpp(Y_obs, lambda_ref, d));
-    return rcpp_result_gen;
-END_RCPP
-}
-// poilog1
-std::vector<double> poilog1(std::vector<int> x, std::vector<double> my, std::vector<double> sig);
-RcppExport SEXP _numbat_poilog1(SEXP xSEXP, SEXP mySEXP, SEXP sigSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< std::vector<int> >::type x(xSEXP);
-    Rcpp::traits::input_parameter< std::vector<double> >::type my(mySEXP);
-    Rcpp::traits::input_parameter< std::vector<double> >::type sig(sigSEXP);
-    rcpp_result_gen = Rcpp::wrap(poilog1(x, my, sig));
-    return rcpp_result_gen;
-END_RCPP
-}
-// l_lnpois_cpp
-double l_lnpois_cpp(std::vector<int> Y_obs, std::vector<double> lambda_ref, int d, double mu, double sig, double phi);
-RcppExport SEXP _numbat_l_lnpois_cpp(SEXP Y_obsSEXP, SEXP lambda_refSEXP, SEXP dSEXP, SEXP muSEXP, SEXP sigSEXP, SEXP phiSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< std::vector<int> >::type Y_obs(Y_obsSEXP);
-    Rcpp::traits::input_parameter< std::vector<double> >::type lambda_ref(lambda_refSEXP);
-    Rcpp::traits::input_parameter< int >::type d(dSEXP);
-    Rcpp::traits::input_parameter< double >::type mu(muSEXP);
-    Rcpp::traits::input_parameter< double >::type sig(sigSEXP);
-    Rcpp::traits::input_parameter< double >::type phi(phiSEXP);
-    rcpp_result_gen = Rcpp::wrap(l_lnpois_cpp(Y_obs, lambda_ref, d, mu, sig, phi));
-    return rcpp_result_gen;
-END_RCPP
-}
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_numbat_cppdbbinom", (DL_FUNC) &_numbat_cppdbbinom, 5},
-    {"_numbat_cpp_dgpois", (DL_FUNC) &_numbat_cpp_dgpois, 4},
-    {"_numbat_logSumExp", (DL_FUNC) &_numbat_logSumExp, 1},
-    {"_numbat_likelihood_compute", (DL_FUNC) &_numbat_likelihood_compute, 5},
-    {"_numbat_forward_backward_compute", (DL_FUNC) &_numbat_forward_backward_compute, 5},
-    {"_numbat_viterbi_compute", (DL_FUNC) &_numbat_viterbi_compute, 7},
     {"_numbat_roman2int_internal", (DL_FUNC) &_numbat_roman2int_internal, 2},
-    {"_numbat_fit_lnpois_cpp", (DL_FUNC) &_numbat_fit_lnpois_cpp, 3},
-    {"_numbat_poilog1", (DL_FUNC) &_numbat_poilog1, 3},
-    {"_numbat_l_lnpois_cpp", (DL_FUNC) &_numbat_l_lnpois_cpp, 6},
     {NULL, NULL, 0}
 };
 
diff --git a/src/bbinom.cpp b/src/bbinom.cpp
deleted file mode 100644
index b4007773..00000000
--- a/src/bbinom.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-
-#include <Rcpp.h>
-
-// https://github.com/twolodzko/extraDistr/blob/master/src/beta-binomial-distribution.cpp
-
-using std::pow;
-using std::sqrt;
-using std::abs;
-using std::exp;
-using std::log;
-using std::floor;
-using std::ceil;
-using Rcpp::NumericVector;
-
-
-// MACROS
-
-#define GETV(x, i)      x[i % x.length()]    // wrapped indexing of vector
-#define GETM(x, i, j)   x(i % x.nrow(), j)   // wrapped indexing of matrix
-#define VALID_PROB(p)   ((p >= 0.0) && (p <= 1.0))
-
-
-bool isInteger(double x, bool warn = true);
-
-
-bool isInteger(double x, bool warn) {
-  if (ISNAN(x))
-    return false;
-  if (((x < 0.0) ? std::ceil(x) : std::floor(x)) != x) {
-    if (warn) {
-      char msg[55];
-      std::snprintf(msg, sizeof(msg), "non-integer: %f", x);
-      Rcpp::warning(msg);
-    }
-    return false;
-  }
-  return true;
-}
-
-
-inline double logpmf_bbinom(double k, double n, double alpha,
-                            double beta, bool& throw_warning) {
-#ifdef IEEE_754
-  if (ISNAN(k) || ISNAN(n) || ISNAN(alpha) || ISNAN(beta))
-    return k+n+alpha+beta;
-#endif
-  if (alpha < 0.0 || beta < 0.0 || n < 0.0 || !isInteger(n, false)) {
-    throw_warning = true;
-    return NAN;
-  }
-  if (!isInteger(k) || k < 0.0 || k > n)
-    return R_NegInf;
-  // R::choose(n, k) * R::beta(k+alpha, n-k+beta) / R::beta(alpha, beta);
-  return R::lchoose(n, k) + R::lbeta(k+alpha, n-k+beta) - R::lbeta(alpha, beta);
-}
-
-
-// https://github.com/twolodzko/extraDistr/blob/master/src/beta-binomial-distribution.cpp
-
-// [[Rcpp::export]]
-NumericVector cppdbbinom(
-    const NumericVector& x,
-    const NumericVector& size,
-    const NumericVector& alpha,
-    const NumericVector& beta,
-    const bool& log_prob = false
-  ) {
-  
-  if (std::min({x.length(), size.length(),
-                alpha.length(), beta.length()}) < 1) {
-    return NumericVector(0);
-  }
-
-  int Nmax = std::max({
-    x.length(),
-    size.length(),
-    alpha.length(),
-    beta.length()
-  });
-  NumericVector p(Nmax);
-  
-  bool throw_warning = false;
-
-  for (int i = 0; i < Nmax; i++){
-    p[i] = logpmf_bbinom(GETV(x, i), GETV(size, i),
-                         GETV(alpha, i), GETV(beta, i),
-                         throw_warning);
-  }
-
-  if (!log_prob){
-    p = Rcpp::exp(p);
-  }
-  
-  if (throw_warning){
-    Rcpp::warning("NaNs produced");
-  }
-
-  return p;
-}
-
-
-// https://github.com/cran/extraDistr/blob/19708536de6549404dec52588a36f4b598d0016a/src/gamma-poisson-distribution.cpp
-
-inline double lfactorial(double x) {
-  return R::lgammafn(x + 1.0);
-}
-
-inline double logpmf_gpois(double x, double alpha, double beta,
-                           bool& throw_warning) {
-#ifdef IEEE_754
-  if (ISNAN(x) || ISNAN(alpha) || ISNAN(beta))
-    return x+alpha+beta;
-#endif
-  if (alpha <= 0.0 || beta <= 0.0) {
-    throw_warning = true;
-    return NAN;
-  }
-  if (!isInteger(x) || x < 0.0 || !R_FINITE(x))
-    return R_NegInf;
-  // p = beta/(1.0+beta);
-  double p = exp( log(beta) - log1p(beta) );
-  return R::lgammafn(alpha+x) - lfactorial(x) - R::lgammafn(alpha) +
-    log(p)*x + log(1.0-p)*alpha;
-}
-
-// [[Rcpp::export]]
-NumericVector cpp_dgpois(
-    const NumericVector& x,
-    const NumericVector& alpha,
-    const NumericVector& beta,
-    const bool& log_prob = false
-  ) {
-
-  if (std::min({x.length(), alpha.length(), beta.length()}) < 1) {
-    return NumericVector(0);
-  }
-
-  int Nmax = std::max({
-    x.length(),
-    alpha.length(),
-    beta.length()
-  });
-  NumericVector p(Nmax);
-
-  bool throw_warning = false;
-
-  for (int i = 0; i < Nmax; i++)
-    p[i] = logpmf_gpois(GETV(x, i), GETV(alpha, i),
-                        GETV(beta, i), throw_warning);
-
-  if (!log_prob)
-    p = Rcpp::exp(p);
-
-  if (throw_warning)
-    Rcpp::warning("NaNs produced");
-
-  return p;
-}
-
-
-
diff --git a/src/hmm.cpp b/src/hmm.cpp
deleted file mode 100644
index 7ed5c2cc..00000000
--- a/src/hmm.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-
-#include <RcppArmadillo.h>
-
-using namespace Rcpp;
-
-
-#ifdef HAVE_LONG_DOUBLE
-#  define LDOUBLE long double
-#  define EXPL expl
-#else
-#  define LDOUBLE double
-#  define EXPL exp
-#endif
-
-// [[Rcpp::export]]
-double logSumExp(const arma::vec& x) {
-    // https://github.com/helske/seqHMM/blob/master/src/logSumExp.cpp
-    unsigned int maxi = x.index_max();
-    LDOUBLE maxv = x(maxi);
-    if (!(maxv > -arma::datum::inf)) {
-        return -arma::datum::inf;
-    }
-    LDOUBLE cumsum = 0.0;
-    for (unsigned int i = 0; i < x.n_elem; i++) {
-        if ((i != maxi) & (x(i) > -arma::datum::inf)) {
-            cumsum += EXPL(x(i) - maxv);
-        }
-    }
-  
-    return maxv + log1p(cumsum);
-}
-
-
-// expensive for loops in likelihood_allele() and forward_back_allele)()
-
-// [[Rcpp::export]]
-double likelihood_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m) {
-
-    double LL = 0.0;
-
-    for (int i = 0; i < n; i++) {
-
-        if (i > 0) {
-            Rcpp::NumericVector logphi_new(m); 
-            for (int j = 0; j < m; j++) {
-                Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(i));    
-                Rcpp::NumericVector logphi_logPi = logphi + subset_logPi(_, j);
-                logphi_new[j] = logSumExp(logphi_logPi);
-            }
-            logphi = logphi_new;
-        }
-
-        logphi = logphi + logprob(i, _);  // Note: logprob(i, _) is Rcpp::NumericVector
-
-        double logSumPhi = logSumExp(logphi);
-
-        logphi = logphi - logSumPhi;
-
-        LL = LL + logSumPhi;
-    }
-    
-    return LL;
-}
-
-
-// [[Rcpp::export]]
-Rcpp::NumericMatrix forward_backward_compute(Rcpp::NumericVector logphi, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m) {
-
-    const int nrow = n;
-    const int ncol = m;
-    Rcpp::NumericMatrix logalpha(nrow, ncol); // logalpha <- matrix(as.double(rep(0, m * n)), nrow = n)
-
-    double lscale = 0.0;
-    double LL = 0.0;
-
-    for (int t = 0; t < n; t++) {
-
-        if (t > 0) {
-            Rcpp::NumericVector logphi_new(m); 
-            for (int j = 0; j < m; j++) {
-                Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(t));    
-                Rcpp::NumericVector logphi_logPi = logphi + subset_logPi(_, j);
-                logphi_new[j] = logSumExp(logphi_logPi);
-            }
-            logphi = logphi_new;
-        }
-
-        logphi = logphi + logprob(t, _);  // Note: logprob(i, _) is Rcpp::NumericVector
-
-        double logSumPhi = logSumExp(logphi);
-
-        logphi = logphi - logSumPhi;
-
-        lscale = lscale + logSumPhi;
-
-        logalpha(t, _) = logphi + lscale;
-
-    }
-
-    LL = lscale;
-
-    Rcpp::NumericMatrix logbeta(nrow, ncol); // logalpha <- matrix(as.double(rep(0, m * n)), nrow = n)
-    Rcpp::NumericVector logphi_(m);  // logphi <- log(as.double(rep(1/m, m)))
-    for (int i = 0; i < m; i++){
-        double mval =  static_cast<double>(m);
-        logphi_[i] = std::log(1/mval);
-    }
-
-    double lscale_ = std::log(m);
-
-    for (int t = n-2; t>=0; t--) {
-
-        // logphi = sapply(1:m, function(i) matrixStats::logSumExp(logphi + logprob[t+1,] + logPi[[t+1]][i,]))
-
-        Rcpp::NumericVector logphinew(m);
-        for (int j = 0; j < m; j++) {  
-            Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(t+1));   
-            Rcpp::NumericVector logphi_logprob_logPi = logphi_ + logprob(t+1, _) + subset_logPi(j, _);
-            logphinew[j] = logSumExp(logphi_logprob_logPi);
-        }
-        logphi_ = logphinew;
-
-        logbeta(t, _) = logphi_ + lscale_;
-        
-        double logSumPhi = logSumExp(logphi_);
-
-        logphi_ = logphi_ - logSumPhi;
-
-        lscale_ = lscale_ + logSumPhi;
-        
-    }
-
-    // p_up = exp(logalpha + logbeta - LL)[,1]
-
-    // matrix addition
-    Rcpp::NumericMatrix expoutput(nrow, ncol);
-    for (int i = 0; i < nrow; i++) {
-        for (int j = 0; j < ncol; j++) {
-            double sum = logalpha(i, j) + logbeta(i, j) - LL;
-            expoutput(i, j) = std::exp(sum);  // Rcpp::exp() works with vectors
-        }
-    }
-    
-    return expoutput;
-}
-
-// [[Rcpp::export]]
-Rcpp::NumericVector viterbi_compute(Rcpp::NumericVector log_delta, Rcpp::NumericMatrix logprob, arma::cube logPi, int n, int m, Rcpp::NumericMatrix nu, Rcpp::NumericVector z) {
-
-    const int nrow = n;
-    const int ncol = m;
-
-    nu(0, _) = log_delta + logprob(0, _);  // nu[1, ] <- log(hmm$delta) + logprob[1,]
-
-    Rcpp::NumericMatrix matrixnu(ncol, ncol);
-    for (int i = 1; i < nrow; i++) {
-        Rcpp::NumericVector nu_vec = nu(i - 1, _);
-        for (int j = 0; j < ncol; j++) {
-            matrixnu(j, _) = rep(nu_vec[j], ncol);
-        }  
-        // nu[i, ] = apply(matrixnu + hmm$logPi[,,i], 2, max)
-        // Step 1) 
-        //  Add the two matrices matrixnu + hmm$logPi[,,i]
-        Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(i));  
-        Rcpp::NumericMatrix sum_matrixnu_logPi(ncol, ncol);
-        for (int ii = 0; ii < ncol; ii++) {
-            for (int jj = 0; jj < ncol; jj++) {
-                sum_matrixnu_logPi(ii, jj) = matrixnu(ii, jj) + subset_logPi(ii, jj);
-            }
-        }
-        // Step 2)
-        // Calculate the 'max' for each column, return a NumericVector with 'max' for each column
-        Rcpp::NumericVector newnu;
-        for (int k = 0; k < ncol; k++) {
-            newnu.push_back(max(sum_matrixnu_logPi(_, k)));
-        }
-        nu(i, _) = newnu; // nu[i, ] = apply(matrixnu + hmm$logPi[,,i], 2, max)
-
-        nu(i, _) = nu(i, _)  + logprob(i, _); // nu[i, ] = nu[i, ] + logprob[i,]
-     
-    }
-
-    z[n - 1] = which_max(nu(n-1, _)) + 1; // which_max() uses 0-indexing, so add 1
-
-    // for (i in seq(N - 1, 1, -1)) z[i] <- which.max(hmm$logPi[,,i+1][, z[i+1]] + nu[i, ])
-    for (int t = n-2; t>=0; t--) {
-        Rcpp::NumericMatrix subset_logPi = wrap(logPi.slice(t+1)); 
-        // note: 'z[t+1] - 1' as we convert R 1-indexing to C++ 0-indexing
-        Rcpp::NumericVector logPi_nu = subset_logPi(_, z[t+1] - 1) + nu(t, _);
-        z[t] = which_max(logPi_nu) + 1; // which_max() uses 0-indexing, so add 1
-    }
-
-    return z;
-}
diff --git a/src/mle.cpp b/src/mle.cpp
deleted file mode 100644
index 9c6d0e47..00000000
--- a/src/mle.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <cmath>  // std::pow
-#include <RcppArmadillo.h>
-// [[Rcpp::depends(RcppArmadillo)]]
-#include <roptim.h>
-// [[Rcpp::depends(roptim)]]
-using namespace roptim;
-
-double l_lnpois_cpp(std::vector<int> Y_obs, std::vector<double> lambda_ref, int d, double mu, double sig, double phi = 1.0);
-
-class fit_lnpois : public Functor {
-
-    public:
-    
-    const std::vector<int> Y_obs;
-    
-    const std::vector<double> lambda_ref;
-
-    const int d;
-
-    // initialize with source and destination
-    fit_lnpois(std::vector<int> Y_obs, const std::vector<double> lambda_ref, const int d): 
-        Y_obs(Y_obs), lambda_ref(lambda_ref), d(d) {}
-
-    double operator()(const arma::vec &x) override {
-        return -l_lnpois_cpp(Y_obs, lambda_ref, d, x[0], x[1]);
-    };
-};
-
-// [[Rcpp::export]]
-arma::rowvec fit_lnpois_cpp(std::vector<int> Y_obs, std::vector<double> lambda_ref, int d) {
-
-  fit_lnpois model(Y_obs, lambda_ref, d);
-
-  Roptim<fit_lnpois> opt("L-BFGS-B");
-  opt.control.trace = 0;
-  opt.set_hessian(false);
-  arma::vec lower = {-arma::datum::inf, 0.01};
-  opt.set_lower(lower);
-
-  arma::vec x = {0, 1};
-  opt.minimize(model, x);
-
-  return opt.par().t();
-}
-
diff --git a/src/poilog.cpp b/src/poilog.cpp
deleted file mode 100644
index 4974f6c6..00000000
--- a/src/poilog.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-
-#include <Rcpp.h>
-#include <R_ext/Applic.h>
-
-using namespace Rcpp;
-
-// refer to:
-// https://github.com/evanbiederstedt/poilogcpp
-
-// Function declarations
-double poilog(int x, double my, double sig);
-double bipoilog(int x, int y, double my1, double my2, double sig1, double sig2, double ro);
-double maxf(int x, double my, double sig);
-double upper(int x, double m, double my, double sig);
-double lower(int x, double m, double my, double sig);
-double my_f(double z, int x, double my, double sig, double fac);
-void my_f_vec(double *z, int n, void *p);
-std::vector<double> poilog1(std::vector<int> x, std::vector<double> my, std::vector<double> sig);
-
-struct My_fparams { 
-   int x; 
-   double sig; 
-   double my; 
-   double fac;
-};
-
-// [[Rcpp::export]]
-std::vector<double> poilog1(std::vector<int> x, std::vector<double> my, std::vector<double> sig){
-   int nrN = x.size();
-   std::vector<double> vect(nrN);
-   for (int i = 0; i < nrN; i++) {
-      vect[i] = poilog(x[i], my[i], sig[i]);
-   }
-   return vect;
-}
-
-double maxf(int x, double my, double sig){
-   double d,z;
-   z=0;
-   d=100;
-   while (d>0.00001) {
-      if (x-1-exp(z)-1/sig*(z-my)>0) {
-         z=z+d; 
-      } else {
-         z=z-d;
-      }
-      d=d/2;
-   }
-   return(z);
-}
-
-
-double upper(int x, double m, double my, double sig){
-   double d,z,mf;
-   mf = (x-1)*m-exp(m)-0.5/sig*((m-my)*(m-my));
-   z = m+20;
-   d = 10;
-   while (d>0.000001) {
-      if ((x-1)*z-exp(z)-0.5/sig*((z-my)*(z-my))-mf+log(1000000)>0) {
-         z=z+d; 
-      } else {
-         z=z-d;
-      }
-      d=d/2;
-   }
-   return(z);
-}
-
-
-double lower(int x, double m, double my, double sig) {
-   double d,z,mf;
-   mf = (x-1)*m-exp(m)-0.5/sig*((m-my)*(m-my));
-   z = m-20;
-   d = 10;
-   while (d>0.000001) {
-      if ((x-1)*z-exp(z)-0.5/sig*((z-my)*(z-my))-mf+log(1000000)>0) {
-         z=z-d; 
-      } else {
-         z=z+d;
-      }
-      d=d/2;
-   }
-   return(z);
-}
-
-
-double my_f(double z, int x, double my, double sig, double fac) {
-   return exp(z*x-exp(z)-0.5/sig*((z-my)*(z-my))-fac);
-}
-
-void my_f_vec(double *z, int n, void *p){
-   struct My_fparams *params = (struct My_fparams *)p;
-   int x      = (params->x);
-   double sig = (params->sig);
-   double my  = (params->my);
-   double fac = (params->fac);
-   for (int i=0;i<n;i++) {
-      z[i]=my_f(z[i],x,my,sig,fac);
-   }
-   return;
-}
-
-double poilog(int x, double my, double sig) {
-   double a, b, m, fac, val;
-   double result, abserr;
-   int last, neval, ier;
-   int lenw;
-   int limit=100;
-   double reltol=0.00001;
-   double abstol=0.00001;
-   lenw = 4 * limit;
-   int *iwork = (int *)calloc(limit, sizeof(int));
-   double *work = (double *)calloc(lenw, sizeof(double));
-
-   m = maxf(x,my,sig);
-   a = lower(x,m,my,sig);
-   b = upper(x,m,my,sig);
-   fac = lgamma(x+1);
-
-   struct My_fparams p = { x, sig, my, fac };
-
-   Rdqags(my_f_vec, (void *) &p, &a, &b,
-      &abstol,&reltol,&result,&abserr,&neval,&ier,
-      &limit,&lenw, &last, iwork, work);
-
-   if (ier!=0) {
-      Rcpp::stop("error in integration\n");
-   } 
-
-   val = result*(1/sqrt(2*M_PI*sig));
-   free(iwork);
-   free(work);
-   return(val);
-}
-
-// [[Rcpp::export]]
-double l_lnpois_cpp(std::vector<int> Y_obs, std::vector<double> lambda_ref, int d, double mu, double sig, double phi = 1.0) {
-
-   int n = Y_obs.size();
-
-   double l = 0;
-   double p = 0;
-
-   for (int i = 0; i < n; i++) {
-      p = poilog(Y_obs[i], mu + log(phi * d * lambda_ref[i]), std::pow(sig, 2));
-      if (p == 0) {p = 1e-15;}
-      l += log(p);
-   }
-
-   return l;
-}
\ No newline at end of file
diff --git a/tests/testthat/test_functions.R b/tests/testthat/test_functions.R
index a6486371..bc050a8e 100644
--- a/tests/testthat/test_functions.R
+++ b/tests/testthat/test_functions.R
@@ -1,73 +1,6 @@
 
 library(numbat)
 
-test_that("dpoilog works", {
-  expect_equal(numbat:::dpoilog(c(1,11),c(1,1),c(1,1)), c(0.175733342664327, 0.0150105250670325))
-})
-
-# test_that("HMM works", {
-
-#   message('getting pseudobulk ..')
-
-#   bulk = get_bulk(
-#       count_mat = count_mat_ATC2,
-#       df_allele = df_allele_ATC2,
-#       lambdas_ref = ref_hca,
-#       gtf = gtf_hg38
-#   )
-
-#   expect_true(length(bulk) > 0)
-
-#   message('running HMM..')
-
-#   bulk = analyze_bulk(bulk, t = 1e-5)
-
-#   expect_true(length(bulk) > 0)
-  
-# })
-
-test_that("logSumExp() works", {
-
-  b = numbat:::logSumExp(c(1.2, 3.4))
-  expect_equal(b, 3.5050833)  
-  d = numbat:::logSumExp(c(1.2, 3.4, -5.6, -7.8))
-  expect_equal(d, 3.5052067) 
-   
-})
-
-test_that("Check that likelihood_allele() works as expected", {
-
-  LL = numbat:::likelihood_allele(pre_likelihood_hmm)
-  expect_equal(as.integer(LL), -736)
-
-})
-
-test_that("Check that forward_backward() works as expected", {
-
-  p_major = numbat:::forward_back_allele(pre_likelihood_hmm)[,1]
-  expect_equal(is.vector(p_major), TRUE)
-  expect_equal(length(p_major), 1042)
-  expect_equal(round(p_major[1], 3), 0.963)
-  expect_equal(round(p_major[2], 3), 0)
-  expect_equal(round(p_major[3], 3), 0)
-  expect_equal(round(p_major[10], 3), 0.745)
-
-})
-
-test_that("Check that viterbi_allele() works as expected", {
-
-  states = numbat:::viterbi_allele(pre_likelihood_hmm)
-  expect_equal(sum(states == 1), 440)
-  expect_equal(sum(states == 2), 602)
-  expect_equal(states[1], 1)
-  expect_equal(states[2], 2)
-  expect_equal(states[3], 2)
-  expect_equal(states[1024], 1)
-  
-})
-
-
-
 test_that("Check that roman2int works as expected", {
    val.3899 <- 'MMMDCCCXCIX'
    val.3900 <- 'MMMCM'
diff --git a/vignettes/numbat.Rmd b/vignettes/numbat.Rmd
index a768435e..d6eec7a6 100644
--- a/vignettes/numbat.Rmd
+++ b/vignettes/numbat.Rmd
@@ -53,7 +53,7 @@ devtools::install_github("https://github.com/kharchenkolab/numbat")
 ## Docker
 We provide a ready-to-run Docker container that includes the Numbat R package and all of its dependencies. You can launch it as follows:
 ```
-docker run -v /work:/mnt/mydata -it pkharchenkolab/numbat-rbase:main /bin/bash
+docker run -v /work:/mnt/mydata -it pkharchenkolab/numbat-rbase:latest /bin/bash
 ```
 where `/work` is a local data folder you would like to access and write to. Within the container, `cellsnp-lite`, `eagle2` and `samtools` are available in path and the 1000 Genome SNP VCF and phasing panel files (hg38) are stored under `/data`. You can also launch R and run interactive analysis using Numbat (or run an R script).