From dab8cb54b07eb8ccb765fddf87f1a6cad7919021 Mon Sep 17 00:00:00 2001
From: viktor_petukhov <viktor.s.petuhov@ya.ru>
Date: Thu, 17 Mar 2022 12:44:06 -0400
Subject: [PATCH] Cleaned coda trees

---
 R/{coda_tree_new.R => coda_tree.R} |  33 +-----
 R/coda_trees.R                     | 170 -----------------------------
 2 files changed, 2 insertions(+), 201 deletions(-)
 rename R/{coda_tree_new.R => coda_tree.R} (91%)
 delete mode 100644 R/coda_trees.R

diff --git a/R/coda_tree_new.R b/R/coda_tree.R
similarity index 91%
rename from R/coda_tree_new.R
rename to R/coda_tree.R
index 44d81dd..f5140ec 100644
--- a/R/coda_tree_new.R
+++ b/R/coda_tree.R
@@ -1,3 +1,5 @@
+#' @import ape
+NULL
 
 #' Construct the canonical tree
 #'
@@ -36,9 +38,6 @@ constructTree <- function(cnts, groups, partition.thresh = 0){
     # Get cell types from opposite sides of the principal balance
     cells.plus <- cells.tmp[can.loadings > partition.thresh]
     cells.minus <- cells.tmp[can.loadings < partition.thresh]
-    #
-    # print(cells.plus)
-    # print(cells.minus)
 
     sbp.cda[cells.minus, id.bal] <- -1
     sbp.cda[cells.plus, id.bal] <- 1
@@ -208,33 +207,6 @@ getNodeBalances <- function(log.f, sbp.my){
   return(balances)
 }
 
-#' @keywords internal
-hclustSbp <- function(freqs, groups){
-  cell.types <- colnames(freqs)
-
-  mx <- matrix(0, nrow = length(cell.types), ncol = length(cell.types),
-              dimnames = list(cell.types, cell.types))
-  for(i in 1:length(cell.types)){
-    for(j in 1:length(cell.types)){
-      if (j <= i) next
-
-      ratio <- log(apply(freqs[,cell.types[i],drop=F], 1, psych::geometric.mean )) -
-        log(apply(freqs[,cell.types[j],drop=F], 1, psych::geometric.mean ))
-      # print('---')
-      # print(freqs)
-      # print(ratio)
-      # print(groups)
-      mod <- lm(groups ~ ratio)
-      res <- summary(mod)
-      pval <- res$coefficients[2,4]
-      mx[i, j] <- pval
-      mx[j, i] <- pval
-    }
-  }
-
-  hclust(dist(mx))
-}
-
 #' @keywords internal
 bestPartition <- function(freqs.tmp, groups){
 
@@ -363,5 +335,4 @@ sbpInNodes <- function(tree){
   sbp <- t(sbp)
 
   return(sbp)
-
 }
diff --git a/R/coda_trees.R b/R/coda_trees.R
deleted file mode 100644
index 87c8cda..0000000
--- a/R/coda_trees.R
+++ /dev/null
@@ -1,170 +0,0 @@
-#' @import ape
-NULL
-
-#' Get inner nodes on the tree from leaves to the root
-#'
-#' @param t A phylo object
-#' @return Order of inner nodes
-#' @keywords internal
-getNodeOrder <- function(t) {
-  checkTree(t)
-  # ---------
-
-  # Sort inner nodes by id
-  x <- t$edge[order(t$edge[,1]),]
-  nx <- nrow(x)
-  x1 <- x[seq(1, nx, 2),]  # First child
-  x2 <- x[seq(2, nx, 2),]  # Second child
-
-  id.used <- 1:(t$Nnode + 1)  # Used nodes and leaves
-  id.nodes <- c()  # order of nodes co collapse
-  id.parents <- x1[,1]  # names of inner nodes
-  id.children <- cbind(x1[,2], x2[,2])  # children
-
-  while(length(id.nodes) < t$Nnode){
-    # which children have already taken?
-    ids.taken <- (id.children %in% id.used) & (id.children > 0)
-    # which nodes can be taken at the current step?
-    node.condition <- rowSums(ids.taken) == 2
-    # Remember new nodes
-    id.nodes <- c(id.nodes, id.parents[node.condition])
-
-    # Do not use current nodes again
-    id.used <- c(id.used, id.nodes)
-    id.children[node.condition,] <- id.children[node.condition,]  * -1
-  }
-
-  return(id.nodes)
-}
-
-# Get balances for all inner nodes of the tree
-# 
-# t A phylo object
-# log.freq Logarithms of frequnencies
-# Balances, contrast matrix and names on cell types
-# We use ellipsis because a previous version of this function
-#           contains additional and insignificant parameters.
-# getNodeBalances <- function(t, log.freq, ...){
-#    checkDataTree(log.freq, t)
-#   # ---------
-# 
-#   sample.names <- rownames(log.freq)
-#   # Initialisation of balances
-#   n.cells <- ncol(log.freq)
-#   n.samples <- nrow(log.freq)
-#   log.freq.ext <- cbind(log.freq[,t$tip.label], matrix(0, nrow=n.samples, ncol=n.cells-1))  # log-frequencoes extended for inner nodes
-#   n.in.clade <- c(rep(1,n.cells), rep(0,n.cells-1)) # number of cell-types in a clade
-# 
-# 
-#   balances <- matrix(0, nrow=n.samples, ncol=n.cells-1)
-#   rownames(balances) <- sample.names
-#   sbp.my <- rbind(diag(x = 1, n.cells, n.cells), matrix(0, n.cells - 1,n.cells))
-# 
-#   edges <- t$edge
-#   id.edges <- getNodeOrder(t)
-#   # Calculate balances in traversing the tree
-#   for(k in id.edges)
-#   {
-#     clades <- edges[edges[,1]==k,2]
-#     id1 <- clades[1]
-#     id2 <- clades[2]
-# 
-#     if (n.in.clade[id1] + n.in.clade[id2] < 2)
-#       break
-# 
-#     n1 <- n.in.clade[id1]
-#     n2 <- n.in.clade[id2]
-#     balances[,k-n.cells] <- (log.freq.ext[,id1] / n1 - log.freq.ext[,id2] / n2) * sqrt(n1*n2/(n1+n2))
-#     log.freq.ext[,k] <- log.freq.ext[,id1] + log.freq.ext[,id2]
-#     n.in.clade[k] <- n.in.clade[id1] + n.in.clade[id2]
-# 
-#     sbp.my[k,] <- abs(sbp.my[id1,]) - abs(sbp.my[id2,])
-#   }
-# 
-#   for(k in id.edges)
-#   {
-#     n1 <- sum(sbp.my[k,]>0)
-#     n2 <- sum(sbp.my[k,]<0)
-#     sbp.my[k,sbp.my[k,]>0] <- 1/n1
-#     sbp.my[k,sbp.my[k,]<0] <- -1/n2
-#     sbp.my[k,] <- sbp.my[k,] * sqrt(n1*n2/(n1+n2))
-#   }
-# 
-#   res <- list(
-#     bal = balances,
-#     sbp = sbp.my[(n.cells+1):(2*n.cells-1),],
-#     cells = t$tip.label)
-#   return(res)
-# }
-
-
-#' Construct the canonical tree
-#'
-#' @param d.counts Table with cell type counts
-#' @param d.groups Groups variable for samples
-#' @return phylo tree
-#' @keywords internal
-constructCanonicalTree <- function(d.counts, d.groups){
-  checkDataGroups(d.counts, d.groups)
-  # ---------
-
-  n.cells <- ncol(d.counts)
-
-  sbp.cda <- matrix(0, nrow = n.cells, ncol = n.cells-1, dimnames = list(colnames(d.counts), c()))
-
-  unsolved.cells <- list(rownames(sbp.cda))  # List of cell types to separate
-  for(id.bal in 1:(n.cells-1)){
-    # If list to separate contains two cell types: perform random separation
-    if(length(unsolved.cells[[id.bal]]) == 2){
-      sbp.cda[unsolved.cells[[id.bal]][1],id.bal] <- 1
-      sbp.cda[unsolved.cells[[id.bal]][2],id.bal] <- -1
-      next
-    }
-    # ------------------------------------------------
-    #  Data for the current subset of cells
-    d.tmp <- d.counts[, colnames(d.counts) %in% unsolved.cells[[id.bal]]]
-
-    # -------
-    # Difene the most contrast balance
-    # can.loadings <- getCdaLoadings(d.tmp, d.groups)
-    can.loadings <- getLoadings(d.tmp, d.groups)
-
-    cells.tmp <- rownames(can.loadings)
-
-    # ga.res <- gaPartitionAll(d.tmp, d.groups, n.epoch = 300)
-    # can.loadings <- ga.res[1,]
-    # names(can.loadings) <- colnames(d.tmp)
-    # print(can.loadings)
-    # cells.tmp <- names(can.loadings)
-    #
-
-    # cda <- composition.sampling(d.tmp, d.groups, n.iter = 100)
-    # can.loadings <- rowMeans(cda$balances)
-    # cells.tmp <- names(can.loadings)
-    # print(cells.tmp)
-
-    # -------
-    # Get cell types from opposite sides of the principal balance
-    cells.plus <- cells.tmp[can.loadings > 0]
-    cells.minus <- cells.tmp[can.loadings < 0]
-    #
-    # print(cells.plus)
-    # print(cells.minus)
-
-    sbp.cda[cells.minus, id.bal] <- -1
-    sbp.cda[cells.plus, id.bal] <- 1
-
-    if(length(cells.minus) > 1){
-      unsolved.cells[[length(unsolved.cells) + 1]] <- cells.minus
-    }
-
-    if(length(cells.plus) > 1){
-      unsolved.cells[[length(unsolved.cells) + 1]] <- cells.plus
-    }
-
-  }
-
-  # res <- list(sbp = sbp.cda, partiotions = unsolved.cells)
-  tree <- sbp2tree(sbp.cda)
-  return(tree)
-}