From 6c1e48df21c82dbbff6586bde86936c594730a00 Mon Sep 17 00:00:00 2001
From: "Kamil S. Jaron" <kamiljaron@gmail.com>
Date: Mon, 4 Nov 2024 02:47:10 -0800
Subject: [PATCH] Sploidyplot (#169)

* resolved speckliness with a new alt plot, its not yet the default, but it works, can be run with --alt_plot flag

* fixing #153 - suprise suprise, smudgeplot package already has a function for weighted quantiles :-)

* keeping track of all the testing and proof of concept functions, most notably, local agregation for the alt plot

* quantile filtering is now applied after coverage filter

* adding the first draft of peak fitting using centrality

* adding code for popart plot

* added peak agregation algorithm in the main smudgeplot source code

* adding python funciton that generates the smudge container given coverges

* the new version is nearly working the only missing bit is the smudgeplot itself

* the over-very-lnog time playiung around stuff

* histogramless, but working version of smudgeplot ORIEL

* updating readme with the new imlpemenation

* giant cleanup - removing the R package and simplifying the installation

* fixing / simplifying installation instructions

* streamlining interface and allowing for plotting parameters to be passed to all subprocess

* making title work

* fixing interface problem to call hetmer module; adding kmer numbers to the log; adding exploration of 1/2 of the final coverage, but it still does not solve the problem of autotetraploids

* adding parametrised coverage range to explore

* adding error filtering step that stops inference of coverage for datasets with very high proportion of errors (>0.8)

* adding small explorations on alternative centralities

* making fix

* small fixes
---
 .gitignore                                    |    2 +
 DESCRIPTION                                   |   14 -
 Makefile                                      |   17 +-
 NAMESPACE                                     |   32 -
 R/annotate_peaks.R                            |   16 -
 R/annotate_summits.R                          |   19 -
 R/bw.nrdW.R                                   |   18 -
 R/coverage_histogram.R                        |   52 -
 R/estimate_1n_coverage_1d_subsets.R           |   65 --
 R/estimate_1n_coverage_highest_peak.R         |   27 -
 R/filter_peak_sizes.R                         |   10 -
 R/filter_peaks.R                              |   14 -
 R/generate_summary.R                          |   40 -
 R/get_1d_peaks.R                              |   24 -
 R/get_col_ramp.R                              |   19 -
 R/get_peak_sizes.R                            |   14 -
 R/get_peak_summary.R                          |   22 -
 R/get_smudge_container.R                      |   49 -
 R/get_trinoploid_1n_est.R                     |   20 -
 R/guess_genome_structure.R                    |   14 -
 R/peak_agregation.R                           |   48 -
 R/plot_expected_haplotype_structure.R         |   21 -
 R/plot_histograms.R                           |   68 --
 R/plot_legend.R                               |   26 -
 R/plot_seq_error_line.R                       |   13 -
 R/plot_smudgeplot.R                           |   36 -
 R/reduce_structure_representation.R           |   19 -
 R/round_minor_variant_cov.R                   |   13 -
 R/rounding.R                                  |   16 -
 R/smudge_summary.R                            |   11 -
 R/smudge_warn.R                               |   12 -
 R/transform_minor_variant_cov.R               |   13 -
 R/transform_pair_cov.R                        |   11 -
 R/wtd.quantile.R                              |   25 -
 README.md                                     |   39 +-
 exec/centrality_plot.R                        |   11 +
 exec/smudgeplot.py                            |  348 +++++-
 exec/smudgeplot_plot.R                        |  423 +++----
 install.R                                     |    5 -
 man/annotate_peaks.Rd                         |   11 -
 man/annotate_summits.Rd                       |   17 -
 man/bw.nrdW.Rd                                |   12 -
 man/coverage_histogram.Rd                     |   11 -
 man/estimate_1n_coverage_1d_subsets.Rd        |   11 -
 man/estimate_1n_coverage_highest_peak.Rd      |   11 -
 man/filter_peak_sizes.Rd                      |   11 -
 man/filter_peaks.Rd                           |   11 -
 man/generate_summary.Rd                       |   11 -
 man/get_1d_peaks.Rd                           |   17 -
 man/get_col_ramp.Rd                           |   14 -
 man/get_peak_sizes.Rd                         |   11 -
 man/get_peak_summary.Rd                       |   11 -
 man/get_smudge_container.Rd                   |   11 -
 man/get_trinoploid_1n_est.Rd                  |   11 -
 man/guess_genome_structure.Rd                 |   11 -
 man/peak_agregation.Rd                        |   16 -
 man/plot_expected_haplotype_structure.Rd      |   17 -
 man/plot_histograms.Rd                        |   19 -
 man/plot_legend.Rd                            |   11 -
 man/plot_seq_error_line.Rd                    |   11 -
 man/plot_smudgeplot.Rd                        |   24 -
 man/reduce_structure_representation.Rd        |   14 -
 man/round_minor_variant_cov.Rd                |   11 -
 man/rounding.Rd                               |   11 -
 man/smudge_summary.Rd                         |   12 -
 man/smudge_warn.Rd                            |   12 -
 man/transform_minor_variant_cov.Rd            |   12 -
 man/transform_pair_cov.Rd                     |   12 -
 man/wtd.quantile.Rd                           |   14 -
 playground/alternative_fitting/README.md      |  250 +++++
 .../alternative_plot_covA_covB.R              |   49 +
 .../alternative_plotting.R                    |   48 +
 .../alternative_plotting_functions.R          |  119 ++
 .../alternative_plotting_testing.R            |   90 ++
 .../alternative_fitting/pair_clustering.py    |   84 ++
 .../fooling_around_impossible_locations.R     |  113 --
 ...wberry_full_kmer_families_fooling_around.R |  154 +--
 playground/popart.R                           |   56 +
 tests/README.md                               |   55 +-
 tests/testthat.R                              |    4 -
 tests/testthat/fmlo_smaple_cov_file.tsv       | 1000 -----------------
 .../test-estimate_1n_coverage_1d_subsets.R    |   18 -
 tests/testthat/test-fitting.R                 |   46 -
 tests/testthat/test-get_smudge_container.R    |   22 -
 tests/testthat/test-get_summary.R             |   49 -
 85 files changed, 1298 insertions(+), 2893 deletions(-)
 delete mode 100644 DESCRIPTION
 delete mode 100644 NAMESPACE
 delete mode 100644 R/annotate_peaks.R
 delete mode 100644 R/annotate_summits.R
 delete mode 100644 R/bw.nrdW.R
 delete mode 100644 R/coverage_histogram.R
 delete mode 100644 R/estimate_1n_coverage_1d_subsets.R
 delete mode 100644 R/estimate_1n_coverage_highest_peak.R
 delete mode 100644 R/filter_peak_sizes.R
 delete mode 100644 R/filter_peaks.R
 delete mode 100644 R/generate_summary.R
 delete mode 100644 R/get_1d_peaks.R
 delete mode 100644 R/get_col_ramp.R
 delete mode 100644 R/get_peak_sizes.R
 delete mode 100644 R/get_peak_summary.R
 delete mode 100644 R/get_smudge_container.R
 delete mode 100644 R/get_trinoploid_1n_est.R
 delete mode 100644 R/guess_genome_structure.R
 delete mode 100644 R/peak_agregation.R
 delete mode 100644 R/plot_expected_haplotype_structure.R
 delete mode 100644 R/plot_histograms.R
 delete mode 100644 R/plot_legend.R
 delete mode 100644 R/plot_seq_error_line.R
 delete mode 100644 R/plot_smudgeplot.R
 delete mode 100644 R/reduce_structure_representation.R
 delete mode 100644 R/round_minor_variant_cov.R
 delete mode 100644 R/rounding.R
 delete mode 100644 R/smudge_summary.R
 delete mode 100644 R/smudge_warn.R
 delete mode 100644 R/transform_minor_variant_cov.R
 delete mode 100644 R/transform_pair_cov.R
 delete mode 100644 R/wtd.quantile.R
 create mode 100644 exec/centrality_plot.R
 delete mode 100644 install.R
 delete mode 100644 man/annotate_peaks.Rd
 delete mode 100644 man/annotate_summits.Rd
 delete mode 100644 man/bw.nrdW.Rd
 delete mode 100644 man/coverage_histogram.Rd
 delete mode 100644 man/estimate_1n_coverage_1d_subsets.Rd
 delete mode 100644 man/estimate_1n_coverage_highest_peak.Rd
 delete mode 100644 man/filter_peak_sizes.Rd
 delete mode 100644 man/filter_peaks.Rd
 delete mode 100644 man/generate_summary.Rd
 delete mode 100644 man/get_1d_peaks.Rd
 delete mode 100644 man/get_col_ramp.Rd
 delete mode 100644 man/get_peak_sizes.Rd
 delete mode 100644 man/get_peak_summary.Rd
 delete mode 100644 man/get_smudge_container.Rd
 delete mode 100644 man/get_trinoploid_1n_est.Rd
 delete mode 100644 man/guess_genome_structure.Rd
 delete mode 100644 man/peak_agregation.Rd
 delete mode 100644 man/plot_expected_haplotype_structure.Rd
 delete mode 100644 man/plot_histograms.Rd
 delete mode 100644 man/plot_legend.Rd
 delete mode 100644 man/plot_seq_error_line.Rd
 delete mode 100644 man/plot_smudgeplot.Rd
 delete mode 100644 man/reduce_structure_representation.Rd
 delete mode 100644 man/round_minor_variant_cov.Rd
 delete mode 100644 man/rounding.Rd
 delete mode 100644 man/smudge_summary.Rd
 delete mode 100644 man/smudge_warn.Rd
 delete mode 100644 man/transform_minor_variant_cov.Rd
 delete mode 100644 man/transform_pair_cov.Rd
 delete mode 100644 man/wtd.quantile.Rd
 create mode 100644 playground/alternative_fitting/README.md
 create mode 100644 playground/alternative_fitting/alternative_plot_covA_covB.R
 create mode 100644 playground/alternative_fitting/alternative_plotting.R
 create mode 100644 playground/alternative_fitting/alternative_plotting_functions.R
 create mode 100644 playground/alternative_fitting/alternative_plotting_testing.R
 create mode 100644 playground/alternative_fitting/pair_clustering.py
 delete mode 100644 playground/fooling_around_impossible_locations.R
 create mode 100644 playground/popart.R
 delete mode 100644 tests/testthat.R
 delete mode 100644 tests/testthat/fmlo_smaple_cov_file.tsv
 delete mode 100644 tests/testthat/test-estimate_1n_coverage_1d_subsets.R
 delete mode 100644 tests/testthat/test-fitting.R
 delete mode 100644 tests/testthat/test-get_smudge_container.R
 delete mode 100644 tests/testthat/test-get_summary.R

diff --git a/.gitignore b/.gitignore
index 7df2771..5a8d476 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,5 +3,7 @@ figures
 playground
 docs
 exec/PloidyPlot
+exec/hetmers
 *.o
 .DS_Store
+smu2text_smu
diff --git a/DESCRIPTION b/DESCRIPTION
deleted file mode 100644
index 0e64686..0000000
--- a/DESCRIPTION
+++ /dev/null
@@ -1,14 +0,0 @@
-Package: smudgeplot
-Type: Package
-Title:
-Version: 0.1.3
-Date: 2016-05-31
-Author: Kamil S. Jaron <kamiljaron@gmail.com>
-Maintainer: Kamil S. Jaron <kamiljaron@gmail.com>
-Description: The package to plot smudgeplot
-Depends: R (>= 3.3.1), viridis, argparse
-License: GPL-3
-LazyLoad: yes
-RoxygenNote: 7.2.3
-Suggests: testthat
-Encoding: UTF-8
diff --git a/Makefile b/Makefile
index 83170da..ad29043 100644
--- a/Makefile
+++ b/Makefile
@@ -1,30 +1,23 @@
 # PATH for libraries is guessed
-RPACKAGE = smudgeplot
-$(eval RPATH := $(shell Rscript -e "noquote(.libPaths())" | tail -1 | cut -f 2 -d ' '))
 CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
 
 ifndef INSTALL_PREFIX
     INSTALL_PREFIX = /usr/local
 endif
 
-R_INSTALLATION = $(RPATH)/$(RPACKAGE)
-HET_KMERS_INST = $(INSTALL_PREFIX)/bin/smudgeplot.py $(INSTALL_PREFIX)/bin/PloidyPlot
-SMUDGEPLOT_INST = $(INSTALL_PREFIX)/bin/smudgeplot_plot.R
+HET_KMERS_INST = $(INSTALL_PREFIX)/bin/smudgeplot.py $(INSTALL_PREFIX)/bin/hetmers
+SMUDGEPLOT_INST = $(INSTALL_PREFIX)/bin/smudgeplot_plot.R $(INSTALL_PREFIX)/bin/centrality_plot.R
 
 .PHONY : install
-install : $(HET_KMERS_INST) $(SMUDGEPLOT_INST) $(CUTOFF_INST) $(R_INSTALLATION) 
-# - removing the R package from the list
+install : $(HET_KMERS_INST) $(SMUDGEPLOT_INST) $(CUTOFF_INST) 
 
 $(INSTALL_PREFIX)/bin/% : exec/%
 	install -C $< $(INSTALL_PREFIX)/bin
 
-$(R_INSTALLATION) : R/*
-	Rscript install.R
-
-exec/PloidyPlot: src_ploidyplot/PloidyPlot.c src_ploidyplot/libfastk.c src_ploidyplot/libfastk.h src_ploidyplot/matrix.c src_ploidyplot/matrix.h
+exec/hetmers: src_ploidyplot/PloidyPlot.c src_ploidyplot/libfastk.c src_ploidyplot/libfastk.h src_ploidyplot/matrix.c src_ploidyplot/matrix.h
 	gcc $(CFLAGS) -o $@ src_ploidyplot/PloidyPlot.c src_ploidyplot/libfastk.c src_ploidyplot/matrix.c -lpthread -lm
 
 
 .PHONY : clean
 clean :
-	rm -f exec/PloidyPlot
+	rm -f exec/hetmers
diff --git a/NAMESPACE b/NAMESPACE
deleted file mode 100644
index e9a49e9..0000000
--- a/NAMESPACE
+++ /dev/null
@@ -1,32 +0,0 @@
-# Generated by roxygen2: do not edit by hand
-
-export(annotate_peaks)
-export(annotate_summits)
-export(bw.nrdW)
-export(coverage_histogram)
-export(estimate_1n_coverage_1d_subsets)
-export(estimate_1n_coverage_highest_peak)
-export(filter_peak_sizes)
-export(filter_peaks)
-export(generate_summary)
-export(get_1d_peaks)
-export(get_col_ramp)
-export(get_peak_sizes)
-export(get_peak_summary)
-export(get_smudge_container)
-export(get_trinoploid_1n_est)
-export(guess_genome_structure)
-export(peak_agregation)
-export(plot_expected_haplotype_structure)
-export(plot_histograms)
-export(plot_legend)
-export(plot_seq_error_line)
-export(plot_smudgeplot)
-export(reduce_structure_representation)
-export(round_minor_variant_cov)
-export(rounding)
-export(smudge_summary)
-export(smudge_warn)
-export(transform_minor_variant_cov)
-export(transform_pair_cov)
-export(wtd.quantile)
diff --git a/R/annotate_peaks.R b/R/annotate_peaks.R
deleted file mode 100644
index f1fdc80..0000000
--- a/R/annotate_peaks.R
+++ /dev/null
@@ -1,16 +0,0 @@
-#' @title annotate_peaks
-#'
-#' @description
-#' \code{annotate_peaks}
-#'
-#' @export
-
-# annotate peaks
-annotate_peaks <- function(.peak_points, .min_kmerpair_cov, .max_kmerpair_cov){
-    for (summit in which(.peak_points$summit == T)){
-        peak <- .peak_points$peak[summit]
-        to_plot <- .peak_points[.peak_points$peak == peak, c('x','y')]
-        to_plot <- to_plot[!is.na(to_plot$x),]
-        points(transform_x(to_plot$y), transform_y(to_plot$x, .min_kmerpair_cov, .max_kmerpair_cov), pch = peak)
-    }
-}
\ No newline at end of file
diff --git a/R/annotate_summits.R b/R/annotate_summits.R
deleted file mode 100644
index 0221c4d..0000000
--- a/R/annotate_summits.R
+++ /dev/null
@@ -1,19 +0,0 @@
-#' @title annotate_summits
-#'
-#' @description
-#' \code{annotate_summits}
-#'
-#' @export
-
-annotate_summits <- function(.peak_points, .peak_sizes, .min_kmerpair_cov, .max_kmerpair_cov, col = 'red'){
-    for (summit in which(.peak_points$summit == T)){
-         peak <- .peak_points$peak[summit]
-         peak_size <- .peak_sizes$rel_size[.peak_sizes$peak == peak]
-         # points(transform_x(.peak_points$y[summit]),
-         #        transform_y(.peak_points$x[summit], .min_kmerpair_cov, .max_kmerpair_cov))
-         text(transform_x(.peak_points$y[summit]),
-              transform_y(.peak_points$x[summit], .min_kmerpair_cov, .max_kmerpair_cov),
-              round(peak_size, 3),
-              offset = 0, cex = 1.2, col = col, xpd=NA)
-    }
-}
\ No newline at end of file
diff --git a/R/bw.nrdW.R b/R/bw.nrdW.R
deleted file mode 100644
index 92266c3..0000000
--- a/R/bw.nrdW.R
+++ /dev/null
@@ -1,18 +0,0 @@
-#' @title bw.nrdW
-#'
-#' @description
-#' \code{bw.nrdW} is a function that mimics bw.nrd0 - Silverman's ‘rule of thumb’ for estimating the kernel width, but using weighted values;
-#' this can be useful if one wants to mimics kerned smoothing to a histogram using counts as weights
-#'
-#' @export
-
-bw.nrdW <- function (x, w)
-{
-    if (length(x) < 2L)
-        stop("need at least 2 data points")
-    Ex <- weighted.mean(x, w)
-    hi <- sqrt(1 / (sum(w) - 1) * sum(w * (x - Ex)^2)) #wieighted SD
-    if (!(lo <- min(hi, wtd.iqr(x, w)/1.34)))
-        (lo <- hi) || (lo <- abs(x[1L])) || (lo <- 1)
-    0.9 * lo * sum(w)^(-0.2)
-}
diff --git a/R/coverage_histogram.R b/R/coverage_histogram.R
deleted file mode 100644
index bb84cf9..0000000
--- a/R/coverage_histogram.R
+++ /dev/null
@@ -1,52 +0,0 @@
-#' @title coverage_histogram
-#'
-#' @description
-#' \code{coverage_histogram} plots a histogram from a table that contains both values (what is normally x) and their respective frequencies (a column called 'freq');
-#'
-#' @export
-
-# coverage_histogram(.cov_tab, bins = .bins, xlim = c(0, 0.5), 
-#                     column = 'minor_variant_rel_cov', horiz = FALSE, .col)
-# bins <- .bins
-# xlim <- c(0, 0.5)
-# column <- 'minor_variant_rel_cov'
-# horiz <- FALSE
-
-coverage_histogram <- function(.cov_tab, bins, xlim, column, horiz, .col){
-
-    # define breaks of the histogram
-    prettified_bins <- length(pretty(c(xlim[1],xlim[2]), n = bins))
-    x_breaks <- seq(xlim[1], xlim[2], length = prettified_bins)
-    # identify buckets of each kmer pair
-    # Note: these are right open intervals so the last ratio bucket contain 0.5
-    kmerpairs2buckets <- list(findInterval(.cov_tab[, column], x_breaks, left.open = TRUE))
-    # and for each bucket aggregate their frequencies
-    agregated_freq <- aggregate(.cov_tab[, 'freq'], by = kmerpairs2buckets, sum)
-
-    # from, to are the borders of the bucket, pos is for plotting, and freq is the agregated value 
-    ratio_hist <- data.frame(from = x_breaks[1:(prettified_bins-1)], to = x_breaks[2:prettified_bins], freq = 0)
-    ratio_hist[, 'pos'] <- (ratio_hist[, 'from'] + ratio_hist[, 'to']) / 2
-    ratio_hist[agregated_freq[, 1], 'freq'] <- agregated_freq[, 2] 
-
-    width = (ratio_hist[1, 'to'] - ratio_hist[1, 'from']) / 2
-
-    # horizontal plot
-    if (horiz){
-        # c(bottom, left, top, right)
-        par(mar=c(3.8,0,0,1))
-        plot(NULL, type="n", xlab="", ylab="", axes=FALSE,
-             ylim=xlim, xlim=c(0, max(ratio_hist[, 'freq'])))
-        for ( i in 1:prettified_bins) {
-            rect(0, ratio_hist[i, 'pos'] - width, ratio_hist[i, 'freq'], ratio_hist[i, 'pos'] + width, col = .col, border = FALSE)
-        }
-    } else {
-        # vertical plot
-        par(mar=c(0,3.8,1,0))
-        plot(NULL, type="n", xlab="", ylab="", axes=FALSE,
-             ylim=c(0, max(ratio_hist[, 'freq'])), xlim=xlim)
-        for ( i in 1:prettified_bins) {
-            rect(ratio_hist[i, 'pos'] - width, 0, ratio_hist[i, 'pos'] + width, ratio_hist[i, 'freq'], col = .col, border = FALSE)
-        }
-    }
-}
-
diff --git a/R/estimate_1n_coverage_1d_subsets.R b/R/estimate_1n_coverage_1d_subsets.R
deleted file mode 100644
index 74a31b3..0000000
--- a/R/estimate_1n_coverage_1d_subsets.R
+++ /dev/null
@@ -1,65 +0,0 @@
-#' @title estimate_1n_coverage_1d_subsets
-#'
-#' @description
-#' \code{estimate_1n_coverage_1d_subsets} is used as the first proxy of 1n coverage. The estimate is subsequentially refined by the brightest smudge.
-#'
-#' @export
-
-estimate_1n_coverage_1d_subsets <- function(.cov_tab){
-    total_kmers <- sum(.cov_tab[, 3])
-    
-    minor_freq_subsets <- list(
-        AB_subset = .cov_tab[, 'minor_variant_rel_cov'] > 0.48,
-        AAB_subset = .cov_tab[, 'minor_variant_rel_cov'] < 0.3433333 & .cov_tab[, 'minor_variant_rel_cov'] > 0.3233333,
-        AAAB_subset = .cov_tab[, 'minor_variant_rel_cov'] < 0.26 & .cov_tab[, 'minor_variant_rel_cov'] > 0.24,
-        AAAAB_subset = .cov_tab[, 'minor_variant_rel_cov'] < 0.21 & .cov_tab[, 'minor_variant_rel_cov'] > 0.19,
-        AAAAAB_subset = .cov_tab[, 'minor_variant_rel_cov'] < 0.1766667 & .cov_tab[, 'minor_variant_rel_cov'] > 0.1566667
-    )
-
-    peak_frame_2 <- get_1d_peaks(.cov_tab[minor_freq_subsets[[1]], ], 3)
-    peak_frame_3 <- get_1d_peaks(.cov_tab[minor_freq_subsets[[2]], ], 2)
-    peak_frame_4 <- get_1d_peaks(.cov_tab[minor_freq_subsets[[3]], ], 1)
-    peak_frame_5 <- get_1d_peaks(.cov_tab[minor_freq_subsets[[4]], ], 1)
-    peak_frame_6 <- get_1d_peaks(.cov_tab[minor_freq_subsets[[5]], ], 1)
-    peak_frame_6_max_height = which.max(peak_frame_6$height)
-
-    skip_peak_4 = FALSE
-    if ( !is.na(peak_frame_2$cov[1]) & !is.na(peak_frame_4$cov[1]) ){
-        if ( round(peak_frame_4$cov[1] / peak_frame_2$cov[1]) >= 2 ) { #This is characteristic of allotetraploids which do not have a high AAAB signal. In this case, peak_frame_4 may pick up on the AAAAAABB signal especially if there is high repetitiveness
-            skip_peak_4 = TRUE #peak_frame_4 will not be included in the weighted mean, since it will artificially inflate the estimate
-        }
-    }
-    skip_peak_5 = FALSE
-    if ( !is.na(peak_frame_5$cov[1]) & !is.na(peak_frame_6$cov[1]) ){
-        if (abs(peak_frame_6$cov[1] - peak_frame_5$cov[1]) <= 5 & peak_frame_6$height[1] > peak_frame_5$height[1]) { #This is characteristic of hexaploids in which peak_frame_5 may pick up on the AAAAAB signal
-          skip_peak_5 = TRUE #peak_frame_5 will not be included in the weighted mean, since it will artificially inflate the estimate
-        }
-    }
-
-    if ( !is.na(peak_frame_2$cov[1]) ){
-        peak_frame_2$cov <- peak_frame_2$cov / (2 * round(peak_frame_2$cov / peak_frame_2$cov[1]))
-    }
-    if ( !is.na(peak_frame_3$cov[1]) ){
-        peak_frame_3$cov <- peak_frame_3$cov / (3 * round(peak_frame_3$cov / peak_frame_3$cov[1]))
-    }
-    if ( !is.na(peak_frame_4$cov[1]) ){
-        peak_frame_4$cov <- peak_frame_4$cov / (4 * round(peak_frame_4$cov / peak_frame_4$cov[1]))
-    }
-    if ( !is.na(peak_frame_5$cov[1]) ){
-        peak_frame_5$cov <- peak_frame_5$cov / (5 * round(peak_frame_5$cov / peak_frame_5$cov[1]))
-    }
-    if ( !is.na(peak_frame_6$cov[1]) ){
-        peak_frame_6$cov <- peak_frame_6$cov / (6 * round(peak_frame_6$cov / peak_frame_6$cov[peak_frame_6_max_height]))
-    }
-
-    if (skip_peak_4) {
-    	peak_frame <- rbind(peak_frame_2, peak_frame_3, peak_frame_5, peak_frame_6)
-    } else if (skip_peak_5) {
-    	peak_frame <- rbind(peak_frame_2, peak_frame_3, peak_frame_4, peak_frame_6)
-    } else {
-    	peak_frame <- rbind(peak_frame_2, peak_frame_3, peak_frame_4, peak_frame_5, peak_frame_6)
-    }
-
-    peak_frame <- peak_frame[is.finite(rowSums(peak_frame)),]
-    weighted.mean(peak_frame$cov, peak_frame$height, na.rm = T)
-}
diff --git a/R/estimate_1n_coverage_highest_peak.R b/R/estimate_1n_coverage_highest_peak.R
deleted file mode 100644
index 2b22772..0000000
--- a/R/estimate_1n_coverage_highest_peak.R
+++ /dev/null
@@ -1,27 +0,0 @@
-#' @title estimate_1n_coverage_highest_peak
-#'
-#' @description
-#' \code{estimate_1n_coverage_highest_peak}
-#'
-#' @export
-
-estimate_1n_coverage_highest_peak <- function(.filt_peak_sizes, .cov_tab, .draft_1n){
-    # 3 indications
-    # AAB is presumabely highest
-    # AAB is has the lowest coverages that correspondents to mutliples of two
-    # the is not other peak around 1/6 and 1/2
-    highest_peak <- which.max(.filt_peak_sizes[, 'rel_size'])
-    
-    if(is.na(.draft_1n)){
-        # assuming that the greatest peak is the diploid one
-        draft_1n <- .filt_peak_sizes[highest_peak, 'pair_cov'] / 2
-    } else {
-        draft_1n <- .draft_1n
-    }
-
-    highest_min_cov <- .filt_peak_sizes[highest_peak, 'minor_variant_cov_rounded']
-    highest_subset <- .cov_tab[, 'minor_variant_rel_cov'] < (highest_min_cov + 0.01) & .cov_tab[, 'minor_variant_rel_cov'] > (highest_min_cov - 0.01)
-    major_peaks <- get_1d_peaks(.cov_tab[highest_subset, ], 1, 1)
-    major_cov <- major_peaks$cov[which.max(major_peaks$height)]
-    major_cov / round(major_cov / draft_1n)
-}
diff --git a/R/filter_peak_sizes.R b/R/filter_peak_sizes.R
deleted file mode 100644
index d224d2f..0000000
--- a/R/filter_peak_sizes.R
+++ /dev/null
@@ -1,10 +0,0 @@
-#' @title filter_peak_sizes
-#'
-#' @description
-#' \code{filter_peak_sizes}
-#'
-#' @export
-
-filter_peak_sizes <- function(.peak_sizes, .threshold = 0.005){
-    .peak_sizes[!.peak_sizes$rel_size < .threshold,]
-}
\ No newline at end of file
diff --git a/R/filter_peaks.R b/R/filter_peaks.R
deleted file mode 100644
index c443c88..0000000
--- a/R/filter_peaks.R
+++ /dev/null
@@ -1,14 +0,0 @@
-#' @title filter_peaks
-#'
-#' @description
-#' \code{filter_peaks}
-#'
-#' @export
-
-filter_peaks <- function(.peak_points, .peak_sizes){
-    to_filter <- !.peak_points$peak %in% .peak_sizes$peak
-
-    .peak_points$peak[to_filter] <- NA
-    .peak_points$summit[to_filter] <- F
-    return(.peak_points)
-}
\ No newline at end of file
diff --git a/R/generate_summary.R b/R/generate_summary.R
deleted file mode 100644
index b0aec12..0000000
--- a/R/generate_summary.R
+++ /dev/null
@@ -1,40 +0,0 @@
-#' @title generate_summary
-#'
-#' @description
-#' \code{generate_summary}
-#'
-#' @export
-
-generate_summary <- function(.args, .smudge_summary){
-    smudge_summary(.args$output, "1n coverage estimates (Coverage of every haplotype; Don't confuse with genome coverage which is (ploidy * 1n coverage).)")
-    smudge_summary(.args$output, "* User defined 1n coverage:", .args$n_cov)
-    smudge_summary(.args$output, "* Subset 1n coverage estimate:", .smudge_summary$n_subset_est)
-    smudge_summary(.args$output, "* Highest peak 1n coverage estimate:", round(.smudge_summary$n_peak_est, 1))
-    smudge_summary(.args$output, "1n coverage used in smudgeplot (one of the three above):", round(.smudge_summary$n, 1))
-
-    smudge_summary(.args$output, "* Proposed ploidy:", .smudge_summary$genome_ploidy)
-
-    for_sure_heterozygous <- sapply(.smudge_summary$peak_sizes$structure, function(x){sum(unlist(strsplit(x, split = '')) == 'B') == 1})
-    # THIS LINE
-    minimal_number_of_heterozygous_loci <- ceiling(sum(.smudge_summary$peak_sizes$abs_size[for_sure_heterozygous]) / .args$kmer_size)
-
-    smudge_summary(.args$output, "* Minimal number of heterozygous loci:", minimal_number_of_heterozygous_loci)
-    smudge_summary(.args$output, "Note: This number is NOT an estimate of the total number of heterozygous loci, it's merely setting the lower boundary if the inference of heterozygosity peaks is correct.")
-
-    smudge_summary(.args$output, "* Proportion of heterozygosity carried by pairs in different genome copies (table)")
-    tab_to_print <- data.frame( genome_copies = 2:max(.smudge_summary$peak_sizes$ploidy),
-                                propotion_of_heterozygosity = round(sapply(2:max(.smudge_summary$peak_sizes$ploidy), function(x){ sum(.smudge_summary$peak_sizes$rel_size[.smudge_summary$peak_sizes$ploidy == x]) }), 2))
-    smudge_summary(.args$output, paste0(capture.output(tab_to_print), collapse = "\n"))
-
-    carried_by_paralogs <- sum(.smudge_summary$peak_sizes$rel_size[.smudge_summary$peak_sizes$ploidy > .smudge_summary$genome_ploidy])
-    smudge_summary(.args$output, "* Proportion of heterozygosity carried by paralogs:", round(carried_by_paralogs, 3))
-
-    smudge_summary(.args$output, "* Summary of all detected peaks (table)")
-    tab_to_print <- .smudge_summary$peak_sizes[,c(11,2,3,8,9)]
-    tab_to_print[,c(2:5)] <- round(tab_to_print[,c(2:5)], 2)
-    colnames(tab_to_print) <- c('peak','kmers [#]', 'kmers [proportion]', 'summit B / (A + B)', 'summit A + B')
-    smudge_summary(.args$output, paste0(capture.output(tab_to_print), collapse = "\n"))
-
-    write.table(tab_to_print, paste0(.args$output, '_summary_table.tsv'),
-                quote = F, sep = '\t', row.names = F)
-}
diff --git a/R/get_1d_peaks.R b/R/get_1d_peaks.R
deleted file mode 100644
index 54f437c..0000000
--- a/R/get_1d_peaks.R
+++ /dev/null
@@ -1,24 +0,0 @@
-#' @title get_1d_peaks
-#'
-#' @description
-#' \code{get_1d_peaks} is a legacy code
-#'
-#' @export
-
-get_1d_peaks <- function(.cov_tab, .num_of_peaks = 3, .adjust = 10, .peak_frame = data.frame(), .depth = 1){
-    if (nrow(.cov_tab) < 2){
-        return(data.frame(cov = NA, height = NA))
-    }
-    if (nrow(.peak_frame) < .num_of_peaks & .depth < 11){
-        kmer_pairs <- sum(.cov_tab[, 'freq'])
-        # used weighted version of silverman's rule of thumb
-        bw <- bw.nrdW(.cov_tab[, 'total_pair_cov'], .cov_tab[, 'freq'])
-        # density wants to have the weights relative, I will divide it and them multiply the fitted heights (alternatively, I could suppres warnings)
-        rel_weights <- .cov_tab[, 'freq'] / kmer_pairs
-        d <- density(.cov_tab[, 'total_pair_cov'], weights = rel_weights, adjust = .adjust, bw = bw) # returns the density data
-        selected_points <- which(diff(sign(diff(d$y))) == -2) + 1
-        .peak_frame <- data.frame(cov = d$x[selected_points], height = d$y[selected_points] * kmer_pairs)
-        .peak_frame <- get_1d_peaks(.cov_tab, .num_of_peaks, .adjust - 1, .peak_frame, .depth + 1)
-    }
-    .peak_frame
-}
\ No newline at end of file
diff --git a/R/get_col_ramp.R b/R/get_col_ramp.R
deleted file mode 100644
index 9159c77..0000000
--- a/R/get_col_ramp.R
+++ /dev/null
@@ -1,19 +0,0 @@
-#' @title get_col_ramp
-#'
-#' @description
-#' \code{get_col_ramp} returns a colour ramp used by smudgeplots; 
-#' args need to contain args$col_ramp and args$invert_cols features
-#' the first needs to be a ramp-generating function (e.g. "viridis", "grey.colors", "magma" or "mako")
-#' args$invert_cols is just TRUE or FALSE (if to revert the colours in the palete)
-#'
-#' @export
-
-get_col_ramp <- function(.args, delay = 0){
-    colour_ramp <- eval(parse(text = paste0(.args$col_ramp,"(", 32 - delay, ")")))
-    if (.args$invert_cols){
-        colour_ramp <- rev(colour_ramp)
-    }
-    colour_ramp <- c(rep(colour_ramp[1], delay), colour_ramp)
-    return(colour_ramp)
-}
-
diff --git a/R/get_peak_sizes.R b/R/get_peak_sizes.R
deleted file mode 100644
index b52eeaa..0000000
--- a/R/get_peak_sizes.R
+++ /dev/null
@@ -1,14 +0,0 @@
-#' @title get_peak_sizes
-#'
-#' @description
-#' \code{get_peak_sizes}
-#'
-#' @export
-
-get_peak_sizes <- function(.peak_points){
-    peaks <- .peak_points[.peak_points$summit == T,'peak']
-    peak_sizes <- sapply(peaks, function(x){sum(.peak_points[.peak_points$peak == x, 'vals'], na.rm = T)})
-    data.frame(peak = peaks,
-                abs_size = peak_sizes,
-                rel_size = peak_sizes / sum(peak_sizes))
-}
\ No newline at end of file
diff --git a/R/get_peak_summary.R b/R/get_peak_summary.R
deleted file mode 100644
index cbfbfeb..0000000
--- a/R/get_peak_summary.R
+++ /dev/null
@@ -1,22 +0,0 @@
-#' @title get_peak_summary
-#'
-#' @description
-#' \code{get_peak_summary}
-#'
-#' @export
-
-get_peak_summary <- function(.peak_points, .smudge_container, .treshold = 0.005){
-    peak_sizes <- get_peak_sizes(.peak_points)
-
-    filt_peak_sizes <- filter_peak_sizes(peak_sizes, .treshold)
-    filt_peak_points <- filter_peaks(.peak_points, filt_peak_sizes)
-
-    filt_peak_sizes <- merge(filt_peak_sizes, filt_peak_points[filt_peak_points$summit == T,])
-    filt_peak_sizes$minor_variant_cov <- transform_minor_variant_cov(filt_peak_sizes$x, .smudge_container)
-    filt_peak_sizes$pair_cov <- transform_pair_cov(filt_peak_sizes$y, .smudge_container)
-
-    # genome_copy_nuber <- round(filt_peak_sizes$pair_cov / (min(filt_peak_sizes$pair_cov) / 2))
-    filt_peak_sizes$minor_variant_cov_rounded <- sapply(filt_peak_sizes$minor_variant_cov, round_minor_variant_cov)
-
-    return(filt_peak_sizes)
-}
\ No newline at end of file
diff --git a/R/get_smudge_container.R b/R/get_smudge_container.R
deleted file mode 100644
index 4323312..0000000
--- a/R/get_smudge_container.R
+++ /dev/null
@@ -1,49 +0,0 @@
-#' @title get_smudge_container
-#'
-#' @description
-#' \code{get_smudge_container} is inspired by https://stackoverflow.com/a/18103689/2962344
-#'
-#' @export
-
-get_smudge_container <- function(.cov_tab, .nbins = 20, 
-                                 .xlim = c(0, 0.5), .ylim = NA){
-
-  total_pair_cov <- .cov_tab[, 1] + .cov_tab[, 2]
-  minor_variant_rel_cov <- .cov_tab[, 1] / total_pair_cov
-  
-  if (any(is.na(.ylim))) {
-    .ylim <- c(0, max(.total_pair_cov))
-  }
-
-  smudge_container <- list()
-  smudge_container$x <- seq(.xlim[1], ((.nbins - 1) / .nbins) * .xlim[2], length = .nbins)
-  smudge_container$y <- c(seq(.ylim[1], ((.nbins - 1) / .nbins) * .ylim[2], length = .nbins), .ylim[2])
-
-  xcoords <- findInterval(minor_variant_rel_cov, smudge_container$x, left.open = TRUE)
-  ycoords <- findInterval(total_pair_cov, smudge_container$y, left.open = TRUE) # all >.ylim[2] will be in nbin+1 th bin
-
-  if ("freq" %in% colnames(.cov_tab)) { # ploidy-plot style "cov1 cov2 density" table
-    group_by <- list(paste(xcoords, ycoords))
-    agregated_counts <- aggregate(.cov_tab[, 3], by = group_by, sum)
-    agregated_coordinates <- matrix(as.numeric(unlist(strsplit(agregated_counts[, 1], " "))), ncol = 2, byrow = TRUE)
-    coordinates_in_range <- !agregated_coordinates[, 2] == .nbins+1 # the rows that belong to nbins+1 bin need to be removed; this is identify which to keep
-    agregated_counts <- agregated_counts[coordinates_in_range, 2] # remove nbin+1 in counts, and get rid keep only the counts (not cooredinates)
-    agregated_coordinates <- agregated_coordinates[coordinates_in_range, ] # and subset the separated coordinates too
-  } else { # original smudgepot style "cov1 cov2" table;
-    agregated_counts <-  as.data.frame(table(xcoords, ycoords), stringsAsFactors = FALSE)
-    agregated_counts[, 1] <- as.numeric(agregated_counts[, 1])
-    agregated_counts[, 2] <- as.numeric(agregated_counts[, 2])
-    agregated_counts <- agregated_counts[!agregated_counts[, 2] == .nbins + 1, ]
-    agregated_coordinates <- cbind(agregated_counts[, 1], agregated_counts[, 2])
-    agregated_counts <- agregated_counts[, 3]
-  }
-  
-  smudge_container$dens <- matrix(0, .nbins, .nbins)
-  smudge_container$dens[agregated_coordinates] <- agregated_counts
-
-  smudge_container$y <- smudge_container$y[1:.nbins]
-  smudge_container$z <- log10(smudge_container$dens)
-  smudge_container$z[is.infinite(smudge_container$z)] <- 0
-
-  return(smudge_container)
-}
\ No newline at end of file
diff --git a/R/get_trinoploid_1n_est.R b/R/get_trinoploid_1n_est.R
deleted file mode 100644
index 6179936..0000000
--- a/R/get_trinoploid_1n_est.R
+++ /dev/null
@@ -1,20 +0,0 @@
-#' @title get_trinoploid_1n_est
-#'
-#' @description
-#' \code{get_trinoploid_1n_est}
-#'
-#' @export
-
-get_trinoploid_1n_est <- function(.filt_peak_sizes){
-    trinploids <- .filt_peak_sizes$minor_variant_cov_rounded == 0.33
-    if(any(trinploids)){
-        triplod_1n_guess <- min(.filt_peak_sizes$pair_cov[trinploids]) / 3
-        genome_counts_by_trip_1n_est <- round(.filt_peak_sizes$pair_cov / triplod_1n_guess)
-        if(sum(genome_counts_by_trip_1n_est == 3) == 1){
-            # min triploid peak look like triploid
-            return(triplod_1n_guess)
-        }
-    }
-    # unable to guess
-    return(NA)
-}
diff --git a/R/guess_genome_structure.R b/R/guess_genome_structure.R
deleted file mode 100644
index 0ec44f6..0000000
--- a/R/guess_genome_structure.R
+++ /dev/null
@@ -1,14 +0,0 @@
-#' @title guess_genome_structure
-#'
-#' @description
-#' \code{guess_genome_structure}
-#'
-#' @export
-
-guess_genome_structure <- function(.filt_peak_sizes_line, .n){
-    both_count_in_genome <- round(as.numeric(.filt_peak_sizes_line['pair_cov']) / .n)
-    minor_count_in_genome <- round(as.numeric(.filt_peak_sizes_line['minor_variant_cov']) * both_count_in_genome)
-    paste0(
-        c(rep('A', both_count_in_genome - minor_count_in_genome),
-          rep('B', minor_count_in_genome)), collapse = '')
-}
\ No newline at end of file
diff --git a/R/peak_agregation.R b/R/peak_agregation.R
deleted file mode 100644
index dafa49f..0000000
--- a/R/peak_agregation.R
+++ /dev/null
@@ -1,48 +0,0 @@
-#' @title peak_agregation
-#'
-#' @description
-#' \code{peak_agregation} is the function that agregates tiles from the smudge_container
-#' into smudges - individual distributions
-#'
-#' @param .smudge_container - a smudge_container object; a list with `x`,`y` axies of the 2d histogram and `dens` matrix of 2h histogram values.
-#'        Usually output of function \code{get_smudge_container}
-#'
-#' @export
-
-peak_agregation <- function(.smudge_container, .L = NA){
-    if ( is.na(.L) ){ .L <- min(.smudge_container$y) / 2 } # guess L if not specified
-    .L <- max(c(.L, 10))
-    nbins <- length(.smudge_container$x)
-    peak_points <- data.frame(vals = as.vector(.smudge_container$dens),
-                              x = rep(1:nbins, nbins),
-                              y = rep(1:nbins, each = nbins))
-    peak_points$peak <- NA
-    peak_points$summit <- NA
-    peak_points <- peak_points[order(peak_points$vals, decreasing = T),]
-    #TODO no go zone x,y <- .smudge_container
-    nogo_x <- findInterval(.L / .smudge_container$y, .smudge_container$x, left.open = T)
-    # mark the first
-    peak_points$peak[1] <- 1
-    peak_points$summit[1] <- T
-
-    for (point in 2:nrow(peak_points)) {
-        parsed_points <- peak_points[1:point-1,]
-        x_n <- abs(parsed_points[,'x'] - peak_points[point,'x']) <= 1
-        y_n <- abs(parsed_points[,'y'] - peak_points[point,'y']) <= 1
-        if (any(x_n & y_n)) {
-            tiles_around <- parsed_points[x_n & y_n,]
-            peak_points$peak[point] <- tiles_around$peak[which.max(tiles_around$vals)]
-            peak_points$summit[point] <- F
-        } else {
-            # filt_peak_sizes$y -> pair_coverage
-            # parse only point above eror line
-            if ( peak_points[point,'x'] > nogo_x[peak_points[point,'y']] ) {
-                peak_points$peak[point] <- max(parsed_points$peak, na.rm = T) + 1
-                peak_points$summit[point] <- T
-            } else {
-                peak_points$summit[point] <- F
-            }
-        }
-    }
-    return(peak_points)
-}
diff --git a/R/plot_expected_haplotype_structure.R b/R/plot_expected_haplotype_structure.R
deleted file mode 100644
index c2b7649..0000000
--- a/R/plot_expected_haplotype_structure.R
+++ /dev/null
@@ -1,21 +0,0 @@
-#' @title plot_expected_haplotype_structure
-#'
-#' @description
-#' \code{plot_expected_haplotype_structure} adds genome scture labels to a smudgeplot
-#'
-#' @export
-
-plot_expected_haplotype_structure <- function(.n, .peak_sizes,
-                                              .adjust = F, .cex = 1.3, xmax = 0.49){
-    borercases <- .peak_sizes$corrected_minor_variant_cov == 0.5
-    structures <- reduce_structure_representation(.peak_sizes)
-
-    for(i in 1:nrow(.peak_sizes)){
-        # xmax is in the middle of the last square in the 2d histogram,
-        # which is too far from the edge, so I average it with 0.49
-        # witch will pull the label bit to the edge
-        text( ifelse( borercases[i] & .adjust, (xmax + 0.49) / 2, .peak_sizes$corrected_minor_variant_cov[i]),
-             .peak_sizes$ploidy[i] * .n, structures[i],
-             offset = 0, cex = .cex, xpd = T, pos = ifelse( borercases[i] & .adjust, 2, 1))
-    }
-}
\ No newline at end of file
diff --git a/R/plot_histograms.R b/R/plot_histograms.R
deleted file mode 100644
index 5fc86c5..0000000
--- a/R/plot_histograms.R
+++ /dev/null
@@ -1,68 +0,0 @@
-#' @title plot_histograms
-#'
-#' @description
-#' \code{plot_histograms} makes 2 plots - histograms of the both margins of smudge plot
-#'
-#' @export
-
-# .cov_tab <- cov_tab
-# .smudge_summary <- smudge_summary
-# .fig_title <- NA
-# .ylim <- ylim
-# .bins <- 150
-
-plot_histograms <- function(.cov_tab, .smudge_summary,
-                            .fig_title = NA, .cex = 1.4, .col = NA,
-                            .ylim = NA, .bins = 100){
-
-    if( is.na(.col) ){
-        .col <- rgb(0.8352, 0.2431, 0.3098)
-    }
-
-    # removing pairs with excess coverage
-    .cov_tab <- .cov_tab[.cov_tab[, 'total_pair_cov'] < .ylim[2], ]
-   
-    # minor_variant_rel_cov HISTOGRAM - top
-    coverage_histogram(.cov_tab, bins = .bins, xlim = c(0, 0.5), 
-                       column = 'minor_variant_rel_cov', horiz = FALSE, .col)
-
-    if (!(is.na(.fig_title))) {
-        mtext(bquote(italic(.(.fig_title))), side=3, adj=0, line=-3, cex = .cex + 0.2)
-    }
-
-    if (length(.smudge_summary$genome_ploidy) > 0){
-        if ( .smudge_summary$genome_ploidy < 9){
-            ploidytext <- switch(.smudge_summary$genome_ploidy - 1,
-                                p2 = 'diploid',
-                                p3 = 'triploid',
-                                p4 = 'tetraploid',
-                                p5 = 'pentaploid',
-                                p6 = 'hexaploid',
-                                p7 = 'heptaploid',
-                                p8 = 'octoploid')
-        } else {
-            ploidytext <- paste0(.smudge_summary$genome_ploidy, '-ploid')
-        }
-
-        if(!(is.na(.smudge_summary$genome_ploidy))){
-            mtext(paste('proposed', ploidytext), side=3, adj=0.05, line=-5, cex = .cex - 0.2)
-        }
-    } 
-
-
-    # total pair coverage HISTOGRAM - right
-    coverage_histogram(.cov_tab, bins = .bins, xlim = .ylim, # this is because horizonal is TRUE 
-                       column = 'total_pair_cov', horiz = TRUE, .col)
-
-    if (.smudge_summary$n > 0){ #0 means it was not infered
-        legend('bottomright', bty = 'n', paste('1n = ', round(.smudge_summary$n)), cex = .cex - 0.1)
-
-        .peak_sizes <- data.frame(peak = reduce_structure_representation(.smudge_summary$peak_sizes),
-                                size = .smudge_summary$peak_sizes[,3])
-
-        if(! any(is.na(.peak_sizes))){
-            legend('topleft', bty = 'n', .peak_sizes[,1], cex = .cex - 0.2)
-            legend('topright', bty = 'n', legend = round(.peak_sizes[,2], 2), cex = .cex - 0.2)
-        }
-    }                       
-}
diff --git a/R/plot_legend.R b/R/plot_legend.R
deleted file mode 100644
index dacd44c..0000000
--- a/R/plot_legend.R
+++ /dev/null
@@ -1,26 +0,0 @@
-#' @title plot_legend
-#'
-#' @description
-#' \code{plot_legend} generate topright corener in the smudgeplot
-#'
-#' @export
-
-plot_legend <- function(.k, .colour_ramp, .log_scale = T){
-    par(mar=c(0,0,2,1))
-    plot.new()
-    print_title <- ifelse(.log_scale, 'log kmers pairs', 'kmers pairs')
-    title(print_title)
-    for(i in 1:32){
-        rect(0,(i - 0.01) / 33, 0.5, (i + 0.99) / 33, col = .colour_ramp[i])
-    }
-    kmer_max <- max(smudge_container$dens)
-    if( .log_scale == T ){
-        for(i in 0:6){
-            text(0.75, i / 6, rounding(10^(log10(kmer_max) * i / 6)), offset = 0)
-        }
-    } else {
-        for(i in 0:6){
-            text(0.75, i / 6, rounding(kmer_max * i / 6), offset = 0)
-        }
-    }
-}
\ No newline at end of file
diff --git a/R/plot_seq_error_line.R b/R/plot_seq_error_line.R
deleted file mode 100644
index 915c202..0000000
--- a/R/plot_seq_error_line.R
+++ /dev/null
@@ -1,13 +0,0 @@
-#' @title plot_seq_error_line
-#'
-#' @description
-#' \code{plot_seq_error_line} add error line to plot it's L / cov
-#'
-#' @export
-
-plot_seq_error_line <- function(.cov_tab, .L = NA, .col = 'black'){
-    if (is.na(.L)){ .L <- min(.cov_tab[, 'covB']) }
-    max_cov_pair <- max(.cov_tab[, 'total_pair_cov'])
-    cov_range <- seq(2 * .L, max_cov_pair, length = 500)
-    lines((.L - 1) / cov_range, cov_range, lwd = 2.5, lty = 2, col = .col)
-}
\ No newline at end of file
diff --git a/R/plot_smudgeplot.R b/R/plot_smudgeplot.R
deleted file mode 100644
index 173c127..0000000
--- a/R/plot_smudgeplot.R
+++ /dev/null
@@ -1,36 +0,0 @@
-#' @title plot_smudgeplot
-#'
-#' @description
-#' \code{plot_smudgeplot} is the core smudgeplot function to plot the core of the smudgeplot
-#' the cental 2D histogram/ landscape plot / heatmap / howeveryouwantyoucallthis plot
-#'
-#' @param .k - the matrix of densities or list of x,y axies and z, the matrix. Usually output of function
-#'
-#' @param .n - the 1n coverage, the coverage of the haploid genome (can be estimated by TODO function)
-#'
-#' @param .colour_ramp - the colour pallete for the heat colours
-#'
-#' @param .cex - the size of the axis labels [default 1.4]
-#'
-#' @author Kamil Jaron \email{kamiljaron at gmail.com}
-#'
-#' @export
-
-plot_smudgeplot <- function(.k, .n, .colour_ramp, .cex = 1.4){
-    # margins 'c(bottom, left, top, right)'
-    par(mar=c(4.8,4.8,1,1))
-    # 2D HISTOGRAM
-    image(.k, col = .colour_ramp,
-        xlab = 'Normalized minor kmer coverage: B / (A + B)',
-        ylab = 'Total coverage of the kmer pair: A + B', cex.lab = 1.4,
-        axes=F
-    )
-
-    if (.n == 0){
-        axis(2)
-    } else {
-        axis(2, at = 2:8 * .n, labels = paste(2:8, 'n'))
-    }
-    axis(1, at = c(1/5, 1/4, 1/3, max(.k$x)),
-            labels = c('1/5', '1/4', '1/3', '1/2'))
-}
\ No newline at end of file
diff --git a/R/reduce_structure_representation.R b/R/reduce_structure_representation.R
deleted file mode 100644
index 2e5ac0f..0000000
--- a/R/reduce_structure_representation.R
+++ /dev/null
@@ -1,19 +0,0 @@
-#' @title reduce_structure_representation
-#'
-#' @description
-#' \code{reduce_structure_representation} is the function that collapses labels loger than 4 characters into 4 character representations
-#'
-#' @param .peak_sizes - a table with peak sizes, the truly important column in there is just the "structure" one
-#'
-#' @export
-
-reduce_structure_representation <- function(.peak_sizes){
-    structures_to_adjust <- sapply(.peak_sizes$structure, nchar) > 4
-    if (any(structures_to_adjust)) {
-        decomposed_struct <- strsplit(.peak_sizes$structure[structures_to_adjust], '')
-        As <- sapply(decomposed_struct, function(x){ sum(x == 'A') } )
-        Bs <- sapply(decomposed_struct, length) - As
-        .peak_sizes$structure[structures_to_adjust] <- paste0(As, 'A', Bs, 'B')
-    }
-    .peak_sizes$structure
-}
diff --git a/R/round_minor_variant_cov.R b/R/round_minor_variant_cov.R
deleted file mode 100644
index 2fdf8fc..0000000
--- a/R/round_minor_variant_cov.R
+++ /dev/null
@@ -1,13 +0,0 @@
-#' @title round_minor_variant_cov
-#'
-#' @description
-#' \code{round_minor_variant_cov}
-#'
-#' @export
-
-round_minor_variant_cov <- function(.min_cov){
-    # 0.5   0.4   0.33    0.25   0.2
-    # 1/2   2/5   1/3     1/4    1/5
-    expected_freq <- c(0.5, 0.4, 0.33, 0.25, 0.2, 0.166, 0.1428, 0.125)
-    expected_freq[which.min(abs(expected_freq - .min_cov))]
-}
\ No newline at end of file
diff --git a/R/rounding.R b/R/rounding.R
deleted file mode 100644
index 2c9dfa7..0000000
--- a/R/rounding.R
+++ /dev/null
@@ -1,16 +0,0 @@
-#' @title rounding
-#'
-#' @description
-#' \code{rounding} rounds to thousands or hundreds if the number is smaller than 1000
-#'
-#' @export
-
-rounding <- function(number){
-    if(number > 1000){
-        round(number / 1000) * 1000
-    } else if (number > 100){
-        round(number / 100) * 100
-    } else {
-        round(number / 10) * 10
-    }
-}
\ No newline at end of file
diff --git a/R/smudge_summary.R b/R/smudge_summary.R
deleted file mode 100644
index 17de5b1..0000000
--- a/R/smudge_summary.R
+++ /dev/null
@@ -1,11 +0,0 @@
-#' @title smudge_summary
-#'
-#' @description
-#' \code{smudge_summary} a function that is used to report all the summary output messages
-#' like this I will be able to easily redirect the waring message to file as well
-#'
-#' @export
-
-smudge_summary <- function(.filename, ...){
-    write(paste(..., sep = '\t'), paste0(.filename, "_verbose_summary.txt"), append = T)
-}
diff --git a/R/smudge_warn.R b/R/smudge_warn.R
deleted file mode 100644
index 04512b3..0000000
--- a/R/smudge_warn.R
+++ /dev/null
@@ -1,12 +0,0 @@
-#' @title smudge_warn
-#'
-#' @description
-#' \code{smudge_warn} a function that is used to report all the wanrning messages
-#' like this I will be able to easily redirect the waring message to file as well
-#'
-#' @export
-
-smudge_warn <- function(.filename, ...){
-    write(paste(..., sep = '\t'), stderr())
-    write(paste(..., sep = '\t'), paste0(.filename, "_warnings.txt"), append = T)
-}
\ No newline at end of file
diff --git a/R/transform_minor_variant_cov.R b/R/transform_minor_variant_cov.R
deleted file mode 100644
index 02c34f7..0000000
--- a/R/transform_minor_variant_cov.R
+++ /dev/null
@@ -1,13 +0,0 @@
-#' @title transform_minor_variant_cov
-#'
-#' @description
-#' \code{transform_minor_variant_cov}
-#' to transform coordinates of smudgeplot to minor_variant_rel_cov
-#'
-#' @export
-
-transform_minor_variant_cov <- function(.x, .smudge_container){
-    # .x corresponds to <.x, .x+1> interval
-    # I add 1/2 of the size of the interval to return mean interval values
-    .smudge_container$x[.x] + (diff(.smudge_container$x)[1] / 2)
-}
\ No newline at end of file
diff --git a/R/transform_pair_cov.R b/R/transform_pair_cov.R
deleted file mode 100644
index 27c53e9..0000000
--- a/R/transform_pair_cov.R
+++ /dev/null
@@ -1,11 +0,0 @@
-#' @title transform_pair_cov
-#'
-#' @description
-#' \code{transform_pair_cov}
-#' to transform coordinates of smudgeplot to total_pair_cov
-#'
-#' @export
-
-transform_pair_cov <- function(.y, .smudge_container){
-    .smudge_container$y[.y] + (diff(.smudge_container$y)[1] / 2)
-}
\ No newline at end of file
diff --git a/R/wtd.quantile.R b/R/wtd.quantile.R
deleted file mode 100644
index c7a73e9..0000000
--- a/R/wtd.quantile.R
+++ /dev/null
@@ -1,25 +0,0 @@
-#' @title wtd.quantile & wtd.iqr
-#'
-#' @description
-#' \code{wtd.quantile}
-#' calculates a quantile using "wighted data", specifically designed for "histogram-like" data
-#' \code{wtd.iqr}
-#' 0.75 - 0.25 weighted quantile
-#'
-#' @export
-
-wtd.quantile <- function(x, q=0.25, weight=NULL) {
-  o <- order(x)
-  n <- sum(weight)
-  order <- 1 + (n - 1) * q
-  low  <- pmax(floor(order), 1)
-  high <- pmin(ceiling(order), n)
-  low_contribution <- high - order
-  allq <- approx(x=cumsum(weight[o])/sum(weight), y=x[o], xout = c(low, high)/n, method = "constant",
-      f = 1, rule = 2)$y
-  low_contribution * allq[1] + (1 - low_contribution) * allq[2]
-}
-
-wtd.iqr <- function(x, w=NULL) {
-  wtd.quantile(x, q=0.75, weight=w) - wtd.quantile(x, q=0.25, weight=w)
-}
diff --git a/README.md b/README.md
index 98705cd..e0d75c9 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,14 @@ The big changes are
  + the backend by Gene is paralelized and massively faster
  + the intermediate file will be a flat file with the 2d histogram with cov1, cov2, freq columns (as opposed to list of coverages of pairs cov1 cov2);
  + at least for now WE LOSE the ability to extract sequences of the kmers in the pair; this functionality will hopefully restore at some point together with functionality to assess the quality of assembly.
- + the smudge detection algorithm is under revision and a **new version will be released on 18th of October 2024**
+ + we added "run all" functionality for people that want "FastK database -> plot" type of solution.
+ + completelly revamped plot showing how all individual kmer pairs insead of agregating them into squares
+ + new smudge detection algorithm based on grid projection on the smudge plane (working, but under revisions at the moment)
+ + R package smudgeplot was retired and is no longer used
+
+We keep the same pythonic interface, the interface of older smudgeplot and this version are very similar and largely compatible.
+
+Current state: RUNNING; beta-testing;
 
 ### Install the whole thing
  
@@ -39,21 +46,21 @@ will install smudgeplot to `~/bin/`.
 
 #### Manual installation
 
-Installing the `R` package:
-
-```bash
-# cd smudgeplot
-Rscript -e 'install.packages(".", repos = NULL, type="source")' # this will install smudgeplot R package;
-```
-
 Compiling the `C` executable
 
 ```
-make exec/PloidyPlot # this will compile PloidyPlot backend
+make exec/hetmers # this will compile hetmers (kmer pair searching engine of PloidyPlot) backend
 ```
 
 Now you can move all three files from the `exec` directory somewhere your system will see it (or alternativelly, you can add that directory to `$PATH` variable).
 
+```
+install -C exec/smudgeplot.py /usr/local/bin
+install -C exec/hetmers /usr/local/bin
+install -C exec/smudgeplot_plot.R /usr/local/bin
+install -C exec/centrality_plot.R /usr/local/bin
+```
+
 ### Runing this version on Sacharomyces data
 Requires ~2.1GB of space and `FastK` and `smudgeplot` installed.
 
@@ -69,18 +76,24 @@ mv *fastq.gz data/Scer/
 # run FastK to create a k-mer database
 FastK -v -t4 -k31 -M16 -T4 data/Scer/SRR3265401_[12].fastq.gz -Ndata/Scer/FastK_Table
 
-# Run PloidyPlot to find all k-mer pairs in the dataset
-PloidyPlot -e12 -k -v -T4 -odata/Scer/kmerpairs data/Scer/FastK_Table
+# Find all k-mer pairs in the dataset using hetmer module
+smudgeplot.py hetmers -L 12 -t 4 -o data/Scer/kmerpairs --verbose data/Scer/FastK_Table
 # this now generated `data/Scer/kmerpairs_text.smu` file;
 # it's a flat file with three columns; covB, covA and freq (the number of k-mer pairs with these respective coverages)
 
 # use the .smu file to infer ploidy and create smudgeplot
-smudgeplot.py plot -n 15 -t Sacharomyces -o data/Scer/trial_run data/Scer/kmerpairs_text.smu
+smudgeplot.py all -o data/Scer/trial_run data/Scer/kmerpairs_text.smu
 
-# check that 5 files are generated (2 pdfs; a summary tsv table, and two txt logs)
+# check that bunch files are generated (3 pdfs; some summary tables and logs)
 ls data/Scer/trial_run_*
 ```
 
+The y-axis scaling is by default 100, one can spcify argument `ylim` to scale it differently
+
+```bash
+smudgeplot.py all -o data/Scer/trial_run_ylim70 data/Scer/kmerpairs_text.smu -ylim 70
+```
+
 And that's it for now! I will be streamlining this over the next few days so hopefully it will all work with a single command;
 
 ### How smudgeplot works
diff --git a/exec/centrality_plot.R b/exec/centrality_plot.R
new file mode 100644
index 0000000..b73a85e
--- /dev/null
+++ b/exec/centrality_plot.R
@@ -0,0 +1,11 @@
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+input_name <- args[1]
+# input_name = '~/test/daAchMill1_centralities.txt'
+
+output_name <- gsub('.txt', '.pdf', input_name)
+tested_covs <- read.table(input_name, col.names = c('cov', 'centrality'))
+
+pdf(output_name)
+    plot(tested_covs[, 'cov'], tested_covs[, 'centrality'], xlab = 'Coverage', ylab = 'Centrality [(theoretical_center - actual_center) / coverage ]', pch = 20)
+dev.off()
\ No newline at end of file
diff --git a/exec/smudgeplot.py b/exec/smudgeplot.py
index 89a2ea6..4a3d6e4 100755
--- a/exec/smudgeplot.py
+++ b/exec/smudgeplot.py
@@ -2,14 +2,22 @@
 
 import argparse
 import sys
+import numpy as np
+from pandas import read_csv # type: ignore
+from pandas import DataFrame # type: ignore
+from numpy import arange
+from numpy import argmin
+from numpy import concatenate
 from os import system
 from math import log
 from math import ceil
-# hetkmers dependencies
+from statistics import fmean
 from collections import defaultdict
-from itertools import combinations
+# import matplotlib as mpl
+# import matplotlib.pyplot as plt
+# from matplotlib.pyplot import plot
 
-version = '0.3.0 oriel'
+version = '0.4.0dev'
 
 ############################
 # processing of user input #
@@ -20,9 +28,11 @@ def __init__(self):
         argparser = argparse.ArgumentParser(
             # description='Inference of ploidy and heterozygosity structure using whole genome sequencing data',
             usage='''smudgeplot <task> [options] \n
-tasks: cutoff    Calculate meaningful values for lower kmer histogram cutoff.
-       hetmers   Calculate unique kmer pairs from a FastK k-mer database.
-       plot      Generate 2d histogram; infere ploidy and plot a smudgeplot.\n\n''')
+tasks: cutoff           Calculate meaningful values for lower kmer histogram cutoff.
+       hetmers          Calculate unique kmer pairs from a FastK k-mer database.
+       peak_agregation  Agregates smudges using local agregation algorithm.
+       plot             Generate 2d histogram; infere ploidy and plot a smudgeplot.
+       all              Runs all the steps (with default options)\n\n''')
         # removing this for now;
         #        extract   Extract kmer pairs within specified coverage sum and minor covrage ratio ranges
         argparser.add_argument('task', help='Task to execute; for task specific options execute smudgeplot <task> -h')
@@ -64,26 +74,13 @@ def plot(self):
         Generate 2d histogram; infer ploidy and plot a smudgeplot.
         '''
         argparser = argparse.ArgumentParser(prog = 'smudgeplot plot', description='Generate 2d histogram for smudgeplot')
-        argparser.add_argument('infile', nargs='?', help='name of the input tsv file with covarages and frequencies (default \"coverages_2.smu\")."')
+        argparser.add_argument('infile', help='name of the input tsv file with covarages and frequencies')
+        argparser.add_argument('smudgefile', help='name of the input tsv file with sizes of individual smudges')
+        argparser.add_argument('n', help='The expected haploid coverage.', type=float)
         argparser.add_argument('-o', help='The pattern used to name the output (smudgeplot).', default='smudgeplot')
-        argparser.add_argument('-q', help='Remove kmer pairs with coverage over the specified quantile; (default none).', type=float, default=1)
-        argparser.add_argument('-L', help='The lower boundary used when dumping kmers (default min(total_pair_cov) / 2).', type=int, default=0)
-        argparser.add_argument('-c', '-cov_filter', help='Filter pairs with one of them having coverage bellow specified threshold (default 0; disables parameter L)', type=int, default=0)
-        argparser.add_argument('-n', help='The expected haploid coverage (default estimated from data).', type=float, default=0)
-        argparser.add_argument('-t', '--title', help='name printed at the top of the smudgeplot (default none).', default='')
-        argparser.add_argument('-ylim', help='The upper limit for the coverage sum (the y axis)', type = int, default=0)
-        # argparser.add_argument('-m', '-method', help='The algorithm for annotation of smudges (default \'local_aggregation\')', default='local_aggregation')
-        argparser.add_argument('-nbins', help='The number of nbins used for smudgeplot matrix (nbins x nbins) (default autodetection).', type=int, default=0)
-        # argparser.add_argument('-kmer_file', help='Name of the input files containing kmer seuqences (assuming the same order as in the coverage file)', default = "")
-        argparser.add_argument('--homozygous', action="store_true", default = False, help="Assume no heterozygosity in the genome - plotting a paralog structure; (default False).")
-        argparser.add_argument('--just_plot', action="store_true", default = False, help="Turns off the inference of coverage and annotation of smudges; simply generates smudgeplot. (default False)")
-
-
-        # plotting arugments
-        argparser.add_argument('-col_ramp', help='An R palette used for the plot (default "viridis", other sensible options are "magma", "mako" or "grey.colors" - recommended in combination with --invert_cols).', default='viridis')
-        argparser.add_argument('--invert_cols', action="store_true", default = False, help="Revert the colour palette (default False).")
-        argparser.add_argument('--plot_err_line', action="store_true", default = False, help="Add a line to the plot denoting where the error k-mers and genomic k-mers will likely pair up the most (default False).")
         
+        argparser = self.add_plotting_arguments(argparser)
+
         self.arguments = argparser.parse_args(sys.argv[2:])
 
     def cutoff(self):
@@ -95,19 +92,51 @@ def cutoff(self):
         argparser.add_argument('boundary', help='Which bounary to compute L (lower) or U (upper)')
         self.arguments = argparser.parse_args(sys.argv[2:])
 
-    def extract(self):
+    def peak_agregation(self):
         '''
         Extract kmer pairs within specified coverage sum and minor covrage ratio ranges.
         '''
-        argparser = argparse.ArgumentParser(prog = 'smudgeplot extract', description='Extract kmer pairs within specified coverage sum and minor covrage ratio ranges.')
-        argparser.add_argument("-cov", "--coverageFile",required=True, help="coverage file for the kmer pairs")
-        argparser.add_argument("-seq", "--seqFile",required=True, help="sequences of the kmer pairs")
-        argparser.add_argument("-minc", "--countMin",required=True, help="lower bound of the summed coverage", type=int)
-        argparser.add_argument("-maxc", "--countMax",required=True, help="upper bound of the summed coverage", type=int)
-        argparser.add_argument("-minr", "--ratioMin",required=True, help="lower bound of minor allele ratio", type=float)
-        argparser.add_argument("-maxr", "--ratioMax",required=True, help="upper bound of minor allele ratio", type=float)
+        argparser = argparse.ArgumentParser()
+        argparser.add_argument('infile', nargs='?', help='name of the input tsv file with covarages and frequencies.')
+        argparser.add_argument('-nf', '-noise_filter', help='Do not agregate into smudge k-mer pairs with frequency lower than this parameter', type=int, default=50)
+        argparser.add_argument('-d', '-distance', help='Manthattan distance of k-mer pairs that are considered neioboring for the local agregation purposes.', type=int, default=5)
+        argparser.add_argument('--mask_errors', help='instead of reporting assignments to individual smudges, just remove all monotonically decreasing points from the error line', action="store_true", default = False)
         self.arguments = argparser.parse_args(sys.argv[2:])
 
+    def all(self):
+        argparser = argparse.ArgumentParser()
+        argparser.add_argument('infile', nargs='?', help='name of the input tsv file with covarages and frequencies.')
+        argparser.add_argument('-o', help='The pattern used to name the output (smudgeplot).', default='smudgeplot')
+        argparser.add_argument('-cov_min', help='Minimal coverage to explore (default 6)', default=6, type = int)
+        argparser.add_argument('-cov_max', help='Maximal coverage to explore (default 50)', default=60, type = int)
+        argparser.add_argument('-cov', help='Define coverage instead of infering it. Disables cov_min and cov_max.', default=0, type=int)
+
+        argparser = self.add_plotting_arguments(argparser)
+
+        self.arguments = argparser.parse_args(sys.argv[2:])
+    
+    def add_plotting_arguments(self, argparser):
+        argparser.add_argument('-c', '-cov_filter', help='Filter pairs with one of them having coverage bellow specified threshold (default 0; disables parameter L)', type=int, default=0)
+        argparser.add_argument('-t', '--title', help='name printed at the top of the smudgeplot (default none).', default='')
+        argparser.add_argument('-ylim', help='The upper limit for the coverage sum (the y axis)', type = int, default=0)
+        argparser.add_argument('-col_ramp', help='An R palette used for the plot (default "viridis", other sensible options are "magma", "mako" or "grey.colors" - recommended in combination with --invert_cols).', default='viridis')
+        argparser.add_argument('--invert_cols', action="store_true", default = False, help="Revert the colour palette (default False).")
+        return(argparser)
+    
+    def format_aguments_for_R_plotting(self):
+        plot_args = ""
+        if self.arguments.c != 0:
+            plot_args += " -c " + str(self.arguments.c)
+        if self.arguments.title:
+            plot_args += " -t \"" + self.arguments.title + "\""
+        if self.arguments.ylim != 0:
+            plot_args += " -ylim " + str(self.arguments.ylim)
+        if self.arguments.col_ramp:
+            plot_args += " -col_ramp \"" + self.arguments.col_ramp + "\""
+        if self.arguments.invert_cols:
+            plot_args += " --invert_cols"
+        return(plot_args)
+
 ###############
 # task cutoff #
 ###############
@@ -144,6 +173,160 @@ def cutoff(args):
         sys.stdout.write(str(U))
     sys.stdout.flush()
 
+########################
+# task peak_agregation #
+########################
+
+def load_hetmers(smufile):
+    cov_tab = read_csv(smufile, names = ['covB', 'covA', 'freq'], sep='\t')
+    cov_tab = cov_tab.sort_values('freq', ascending = False)
+    return(cov_tab)
+
+def local_agregation(cov_tab, distance, noise_filter, mask_errors):
+    # generate a dictionary that gives us for each combination of coverages a frequency
+    cov2freq = defaultdict(int)
+    cov2peak = defaultdict(int)
+
+    L = min(cov_tab['covB']) # important only when --mask_errors is on
+
+    ### clustering
+    next_peak = 1
+    for idx, covB, covA, freq in cov_tab.itertuples():
+        cov2freq[(covA, covB)] = freq # a make a frequency dictionary on the fly, because I don't need any value that was not processed yet
+        if freq < noise_filter:
+            break
+        highest_neigbour_coords = (0, 0)
+        highest_neigbour_freq = 0
+        # for each kmer pair I will retrieve all neibours (Manhattan distance)
+        for xA in range(covA - distance,covA + distance + 1):
+            # for explored A coverage in neiborhood, we explore all possible B coordinates
+            distanceB = distance - abs(covA - xA)
+            for xB in range(covB - distanceB,covB + distanceB + 1):
+                xB, xA = sorted([xA, xB]) # this is to make sure xB is smaller than xA
+                # iterating only though those that were assigned already
+                # and recroding only the one with highest frequency
+                if cov2peak[(xA, xB)] and cov2freq[(xA, xB)] > highest_neigbour_freq:
+                    highest_neigbour_coords = (xA, xB)
+                    highest_neigbour_freq = cov2freq[(xA, xB)]
+        if highest_neigbour_freq > 0:
+            cov2peak[(covA, covB)] = cov2peak[(highest_neigbour_coords)]
+        else:
+            # print("new peak:", (covA, covB))
+            if mask_errors:
+                if covB < L + distance:
+                    cov2peak[(covA, covB)] = 1 # error line
+                else:
+                    cov2peak[(covA, covB)] = 0 # central smudges
+            else:
+                cov2peak[(covA, covB)] = next_peak # if I want to keep info about all locally agregated smudges
+                next_peak += 1
+    return(cov2peak)
+
+def peak_agregation(args):
+    ### load data
+    cov_tab = load_hetmers(args.infile)
+
+    cov2peak = local_agregation(cov_tab, args.d, args.nf, mask_errors = False)
+
+    cov_tab = cov_tab.sort_values(['covA', 'covB'], ascending = True)
+    for idx, covB, covA, freq in cov_tab.itertuples():
+        peak = cov2peak[(covA, covB)]
+        sys.stdout.write(f"{covB}\t{covA}\t{freq}\t{peak}\n")
+    sys.stdout.flush()
+
+def get_smudge_container(cov_tab, cov, smudge_filter):
+    smudge_container = dict()
+    genomic_cov_tab = cov_tab[cov_tab['peak'] == 0] # this removed all the marked errors
+    total_kmer_pairs = sum(genomic_cov_tab['freq'])
+
+    for Bs in range(1,9):
+        min_cov = 0 if Bs == 1 else cov * (Bs - 0.5)
+        max_cov = cov * (Bs + 0.5)
+        cov_tab_isoB = genomic_cov_tab.loc[(genomic_cov_tab["covB"] > min_cov) & (genomic_cov_tab["covB"] < max_cov)] #  
+
+        for As in range(Bs,(17 - Bs)):
+            min_cov = 0 if As == 1 else cov * (As - 0.5)
+            max_cov = cov * (As + 0.5)
+            cov_tab_iso_smudge = cov_tab_isoB.loc[(cov_tab_isoB["covA"] > min_cov) & (cov_tab_isoB["covA"] < max_cov)]
+            if sum(cov_tab_iso_smudge['freq']) / total_kmer_pairs > smudge_filter:
+                # sys.stderr.write(f"{As}A{Bs}B: {sum(cov_tab_iso_smudge['freq']) / total_kmer_pairs}\n")
+                smudge_container["A" * As + "B" * Bs] = cov_tab_iso_smudge
+    return(smudge_container)
+
+def get_centrality(smudge_container, cov):
+    centralities = list()
+    freqs = list()
+    for smudge in smudge_container.keys():
+        As = smudge.count('A')
+        Bs = smudge.count('B')
+        smudge_tab = smudge_container[smudge]
+        kmer_in_the_smudge = sum(smudge_tab['freq'])
+        freqs.append(kmer_in_the_smudge)
+        # center as a a mean
+        # center_A = sum((smudge_tab['freq'] * smudge_tab['covA'])) / kmer_in_the_smudge
+        # center_B = sum((smudge_tab['freq'] * smudge_tab['covB'])) / kmer_in_the_smudge
+        # center as a mode 
+        center = smudge_tab.loc[smudge_tab['freq'].idxmax()]
+        center_A = center['covA']
+        center_B = center['covB']
+        ## emprical to edge
+        # distA = min([abs(smudge_tab['covA'].max() - center['covA']), abs(center['covA'] - smudge_tab['covA'].min())])
+        # distB = min([abs(smudge_tab['covB'].max() - center['covB']), abs(center['covB'] - smudge_tab['covB'].min())])
+        ## theoretical to edge
+        # distA = min(abs(center['covA'] - (cov * (As - 0.5))), abs((cov * (As + 0.5)) - center['covA']))
+        # distB = min(abs(center['covB'] - (cov * (Bs - 0.5))), abs((cov * (Bs + 0.5)) - center['covB']))
+        ## theoretical relative distance to the center
+        distA = abs((center_A - (cov * As)) / cov)
+        distB = abs((center_B - (cov * Bs)) / cov)
+
+        # sys.stderr.write(f"Processing: {As}A{Bs}B; with center: {distA}, {distB}\n")
+        centrality = distA + distB
+        centralities.append(centrality)
+
+    if len(centralities) == 0:
+        return(1)
+    return(fmean(centralities, weights=freqs))
+
+def test_coverage_range(cov_tab, min_c, max_c, smudge_size_cutoff = 0.02):
+    # covs_to_test = range(min_c, max_c)
+    covs_to_test = arange(min_c + 0.05, max_c + 0.05, 2)
+    cov_centralities = list()
+    for cov in covs_to_test:
+        smudge_container = get_smudge_container(cov_tab, cov, smudge_size_cutoff)
+        cov_centralities.append(get_centrality(smudge_container, cov))
+
+    best_coverage = covs_to_test[argmin(cov_centralities)]
+
+    tenths_to_test = arange(best_coverage - 1.9, best_coverage + 1.9, 0.2)
+    tenths_centralities = list()
+    for cov in tenths_to_test:
+        smudge_container = get_smudge_container(cov_tab, cov, smudge_size_cutoff)
+        tenths_centralities.append(get_centrality(smudge_container, cov))
+
+    best_tenth = tenths_to_test[argmin(tenths_centralities)]
+    sys.stderr.write(f"Best coverage to precsion of one tenth: {round(best_tenth, 2)}\n")  
+
+    hundredths_to_test = list(arange(best_tenth - 0.19, best_tenth + 0.19, 0.01))
+    hundredths_centralities = list()
+    for cov in hundredths_to_test:
+        smudge_container = get_smudge_container(cov_tab, cov, smudge_size_cutoff)
+        hundredths_centralities.append(get_centrality(smudge_container, cov))
+
+    final_cov = hundredths_to_test[argmin(hundredths_centralities)]
+    just_to_be_sure_cov = final_cov/2
+
+    hundredths_to_test.append(just_to_be_sure_cov)
+    smudge_container = get_smudge_container(cov_tab, just_to_be_sure_cov, smudge_size_cutoff)
+    hundredths_centralities.append(get_centrality(smudge_container, just_to_be_sure_cov))
+
+    final_cov = hundredths_to_test[argmin(hundredths_centralities)]
+    sys.stderr.write(f"Best coverage to precision of one hundreth: {round(final_cov, 3)}\n")  
+
+    all_coverages = concatenate((covs_to_test, tenths_to_test, hundredths_to_test))
+    all_centralities = concatenate((cov_centralities, tenths_centralities, hundredths_centralities))
+
+    return(DataFrame({'coverage': all_coverages, 'centrality': all_centralities}))
+
 #####################
 # the script itself #
 #####################
@@ -172,44 +355,87 @@ def main():
             plot_args += " -P" + args.tmp
         plot_args += " " + args.infile
 
-        sys.stderr.write("Calling: PloidyPlot " + plot_args + "\n")
-        system("PloidyPlot " + plot_args)
+        sys.stderr.write("Calling: hetmers (PloidyPlot kmer pair search) " + plot_args + "\n")
+        system("hetmers " + plot_args)
 
     if _parser.task == "plot":
         # the plotting script is expected ot be installed in the system as well as the R library supporting it
         args = _parser.arguments
-        plot_args = "-i \"" + args.infile + "\" -o \"" + args.o + "\""
-        if args.q != 1:
-            plot_args += " -q " + str(args.q)
-        if args.L != 0:
-            plot_args += " -L " + str(args.L)
-        if args.c != 0:
-            plot_args += " -c " + str(args.c)
-        if args.n != 0:
-            plot_args += " -n " + str(args.n)
-        if args.title:
-            plot_args += " -t \"" + args.title + "\""
-        if args.ylim != 0:
-            plot_args += " -ylim " + str(args.ylim)
-        if args.col_ramp:
-            plot_args += " -col_ramp \"" + args.col_ramp + "\""
-        if args.nbins != 0:
-            plot_args += " -nbins " + str(args.nbins)
-        if args.homozygous:
-            plot_args += " --homozygous"
-        if args.invert_cols:
-            plot_args += " --invert_cols"
-        if args.plot_err_line:
-            plot_args += " --plot_err_line"
-        if args.just_plot:
-            plot_args += " --just_plot"
+        
+        plot_args = f'-i "{args.infile}" -s "{args.smudgefile}" -n {args.n} -o "{args.o}" ' + _parser.format_aguments_for_R_plotting()
 
         sys.stderr.write("Calling: smudgeplot_plot.R " + plot_args + "\n")
         system("smudgeplot_plot.R " + plot_args)
 
-    if _parser.task == "extract":
-        extract_kmer_pairs(_parser.arguments)
+    if _parser.task == "peak_agregation":
+        peak_agregation(_parser.arguments)
+
+    if _parser.task == "all":
+        args = _parser.arguments
+
+        sys.stderr.write("\nLoading data\n")
+        cov_tab = load_hetmers(args.infile)
+        # cov_tab = load_hetmers("data/dicots/smu_files/daAchMill1.k31_ploidy.smu.txt")
+
+        sys.stderr.write("\nMasking errors using local agregation algorithm\n")
+        cov2peak = local_agregation(cov_tab, distance = 1, noise_filter = 1000, mask_errors = True)
+        cov_tab['peak'] = [cov2peak[(covA, covB)] for idx, covB, covA, freq in cov_tab.itertuples()]
 
+        cov_tab = cov_tab.sort_values(['covA', 'covB'], ascending = True)
+        total_kmers = sum(cov_tab['freq'])
+        genomic_kmers = sum(cov_tab[cov_tab['peak'] == 0]['freq'])
+        total_error_kmers = sum(cov_tab[cov_tab['peak'] == 1]['freq'])
+        error_fraction = total_error_kmers / total_kmers
+        sys.stderr.write(f"Total kmers: {total_kmers}\n\tGenomic kmers: {genomic_kmers}\n\tSequencing errors: {total_error_kmers}\n\tFraction or errors: {round(total_error_kmers/total_kmers, 3)}")
+
+        with open(args.o + "_masked_errors_smu.txt", 'w') as error_annotated_smu:
+            error_annotated_smu.write("covB\tcovA\tfreq\tis_error\n")
+            for idx, covB, covA, freq, is_error in cov_tab.itertuples():
+                error_annotated_smu.write(f"{covB}\t{covA}\t{freq}\t{is_error}\n") # might not be needed
+
+        sys.stderr.write("\nInfering 1n coverage using grid algorihm\n")
+
+        smudge_size_cutoff = 0.001 # this is % of all k-mer pairs smudge needs to have to be considered a valid smudge
+
+        if args.cov == 0: # not specified user coverage
+            centralities = test_coverage_range(cov_tab, args.cov_min, args.cov_max, smudge_size_cutoff)
+            np.savetxt(args.o + "_centralities.txt", np.around(centralities, decimals=6), fmt="%.4f", delimiter = '\t')
+            # plot(centralities['coverage'], centralities['coverage'])
+
+            if error_fraction < 0.75:
+                cov = centralities['coverage'][argmin(centralities['centrality'])]
+            else:
+                sys.stderr.write(f"Too many errors observed: {error_fraction}, not trusting coverage inference\n")
+                cov = 0
+
+            sys.stderr.write(f"\nInferred coverage: {cov}\n")
+        else:
+            cov = args.cov
+
+        final_smudges = get_smudge_container(cov_tab, cov, 0.03)
+        # sys.stderr.write(str(final_smudges) + '\n')
+
+        annotated_smudges = list(final_smudges.keys())
+        smudge_sizes = [round(sum(final_smudges[smudge]['freq']) / genomic_kmers, 4) for smudge in annotated_smudges]
+        
+        sys.stderr.write(f'Detected smudges / sizes ({args.o} + "_smudge_sizes.txt):"\n')
+        sys.stderr.write('\t' + str(annotated_smudges) + '\n')
+        sys.stderr.write('\t' + str(smudge_sizes) + '\n')
+
+        # saving smudge sizes
+        smudge_table = DataFrame({'smudge': annotated_smudges, 'size': smudge_sizes})
+        np.savetxt(args.o + "_smudge_sizes.txt", smudge_table, fmt='%s', delimiter = '\t')
+
+        sys.stderr.write("\nPlotting\n")
+
+        system("centrality_plot.R " + args.o + "_centralities.txt")
+        # Rscript playground/alternative_fitting/alternative_plotting_testing.R -i data/dicots/peak_agregation/$ToLID.cov_tab_peaks -o data/dicots/peak_agregation/$ToLID
+        args = _parser.arguments
+        
+        plot_args = f'-i "{args.o}_masked_errors_smu.txt" -s "{args.o}_smudge_sizes.txt" -n {round(cov, 3)} -o "{args.o}" ' + _parser.format_aguments_for_R_plotting()
+
+        sys.stderr.write("Calling: smudgeplot_plot.R " + plot_args + "\n")
+        system("smudgeplot_plot.R " + plot_args) 
 
     sys.stderr.write("\nDone!\n")
     exit(0)
diff --git a/exec/smudgeplot_plot.R b/exec/smudgeplot_plot.R
index 0b9d8f1..0cc1069 100755
--- a/exec/smudgeplot_plot.R
+++ b/exec/smudgeplot_plot.R
@@ -2,7 +2,157 @@
 
 suppressPackageStartupMessages(library("methods"))
 suppressPackageStartupMessages(library("argparse"))
-suppressPackageStartupMessages(library("smudgeplot"))
+suppressPackageStartupMessages(library("viridis"))
+
+# suppressPackageStartupMessages(library("smudgeplot"))
+
+#################
+### funcitons ###
+#################
+# retirying the smudgeplot R package
+get_col_ramp <- function(.args, delay = 0){
+    colour_ramp <- eval(parse(text = paste0(.args$col_ramp,"(", 32 - delay, ")")))
+    if (.args$invert_cols){
+        colour_ramp <- rev(colour_ramp)
+    }
+    colour_ramp <- c(rep(colour_ramp[1], delay), colour_ramp)
+    return(colour_ramp)
+}
+
+wtd.quantile <- function(x, q=0.25, weight=NULL) {
+  o <- order(x)
+  n <- sum(weight)
+  order <- 1 + (n - 1) * q
+  low  <- pmax(floor(order), 1)
+  high <- pmin(ceiling(order), n)
+  low_contribution <- high - order
+  allq <- approx(x=cumsum(weight[o])/sum(weight), y=x[o], xout = c(low, high)/n, method = "constant",
+      f = 1, rule = 2)$y
+  low_contribution * allq[1] + (1 - low_contribution) * allq[2]
+}
+
+wtd.iqr <- function(x, w=NULL) {
+  wtd.quantile(x, q=0.75, weight=w) - wtd.quantile(x, q=0.25, weight=w)
+}
+
+plot_alt <- function(cov_tab, ylim, colour_ramp, log = F){
+    A_equals_B <- cov_tab[, 'covA'] == cov_tab[, 'covB']
+    cov_tab[A_equals_B, 'freq'] <- cov_tab[A_equals_B, 'freq'] * 2
+    if (log){
+        cov_tab[, 'freq'] <- log10(cov_tab[, 'freq'])
+    }
+    cov_tab$col <- colour_ramp[1 + round(31 * cov_tab[, 'freq'] / max(cov_tab[, 'freq']))]
+
+    # c(bottom, left, top, right)
+    par(mar=c(4.8,4.8,1,1))
+    plot(NULL, xlim = c(0, 0.5), ylim = ylim,
+         xlab = 'Normalized minor kmer coverage: B / (A + B)',
+         ylab = 'Total coverage of the kmer pair: A + B', cex.lab = 1.4, bty = 'n')
+    min_cov_to_plot <- max(ylim[1],min(cov_tab[, 'total_pair_cov']))
+    nothing <- sapply(min_cov_to_plot:ylim[2], plot_one_coverage, cov_tab)
+    return(0)
+}
+
+plot_one_coverage <- function(cov, cov_tab){
+    cov_row_to_plot <- cov_tab[cov_tab[, 'total_pair_cov'] == cov, ]
+    width <- 1 / (2 * cov)
+    cov_row_to_plot$left <- cov_row_to_plot[, 'minor_variant_rel_cov'] - width
+    cov_row_to_plot$right <- sapply(cov_row_to_plot[, 'minor_variant_rel_cov'], function(x){ min(0.5, x + width)})
+    apply(cov_row_to_plot, 1, plot_one_box, cov)
+}
+
+plot_one_box <- function(one_box_row, cov){
+    left <- as.numeric(one_box_row['left'])
+    right <- as.numeric(one_box_row['right'])
+    rect(left, cov - 0.5, right, cov + 0.5, col = one_box_row['col'], border = NA)
+}
+
+plot_isoA_line <- function (.covA, .L, .col = "black", .ymax = 250, .lwd, .lty) {
+    min_covB <- .L # min(.cov_tab[, 'covB']) # should be L really
+    max_covB <- .covA
+    B_covs <- seq(min_covB, max_covB, length = 500)
+    isoline_x <- B_covs/ (B_covs + .covA)
+    isoline_y <- B_covs + .covA
+    lines(isoline_x[isoline_y < .ymax], isoline_y[isoline_y < .ymax], lwd = .lwd, lty = .lty, col = .col)
+}
+
+plot_isoB_line <- function (.covB, .ymax, .col = "black", .lwd, .lty) {
+    cov_range <- seq((2 * .covB) - 2, .ymax, length = 500)
+    lines((.covB)/cov_range, cov_range, lwd = .lwd, lty = .lty, col = .col)
+}
+
+plot_iso_grid <- function(.cov, .L, .ymax, .col = 'black', .lwd = 2, .lty = 2){
+    for (i in 0:15){
+        cov <- (i + 0.5) * .cov
+        plot_isoA_line(cov, .L = .L, .ymax = .ymax, .col, .lwd = .lwd, .lty = .lty)
+        if (i < 8){
+            plot_isoB_line(cov, .ymax, .col, .lwd = .lwd, .lty = .lty)
+        }
+    }
+}
+
+plot_expected_haplotype_structure <- function(.n, .peak_sizes,
+                                              .adjust = F, .cex = 1.3, xmax = 0.49){
+    .peak_sizes <- .peak_sizes[.peak_sizes[, 'size'] > 0.05, ]
+    .peak_sizes[, 'ploidy'] <- nchar(.peak_sizes[, 'structure'])
+
+    decomposed_struct <- strsplit(.peak_sizes[, 'structure'], '')
+    .peak_sizes[, 'corrected_minor_variant_cov'] <- sapply(decomposed_struct, function(x){ sum(x == 'B') } ) / .peak_sizes[, 'ploidy']
+    .peak_sizes[, 'label'] <- reduce_structure_representation(.peak_sizes[, 'structure'])
+
+    borercases <- .peak_sizes$corrected_minor_variant_cov == 0.5
+
+    for(i in 1:nrow(.peak_sizes)){
+        # xmax is in the middle of the last square in the 2d histogram,
+        # which is too far from the edge, so I average it with 0.49
+        # witch will pull the label bit to the edge
+        text( ifelse( borercases[i] & .adjust, (xmax + 0.49) / 2, .peak_sizes$corrected_minor_variant_cov[i]),
+             .peak_sizes$ploidy[i] * .n, .peak_sizes[i, 'label'],
+             offset = 0, cex = .cex, xpd = T, pos = ifelse( borercases[i] & .adjust, 2, 1))
+    }
+}
+
+reduce_structure_representation <- function(smudge_labels){
+    structures_to_adjust <- (sapply(smudge_labels, nchar) > 4)
+
+    if ( any(structures_to_adjust) ) {
+        decomposed_struct <- strsplit(smudge_labels[structures_to_adjust], '')
+        As <- sapply(decomposed_struct, function(x){ sum(x == 'A') } )
+        Bs <- sapply(decomposed_struct, length) - As
+        smudge_labels[structures_to_adjust] <- paste0(As, 'A', Bs, 'B')
+    }
+    return(smudge_labels)
+}
+
+plot_legend <- function(kmer_max, .colour_ramp, .log_scale = T){
+    par(mar=c(0,0,2,1))
+    plot.new()
+    print_title <- ifelse(.log_scale, 'log kmers pairs', 'kmers pairs')
+    title(print_title)
+    for(i in 1:32){
+        rect(0,(i - 0.01) / 33, 0.5, (i + 0.99) / 33, col = .colour_ramp[i])
+    }
+    # kmer_max <- max(smudge_container$dens)
+    if( .log_scale == T ){
+        for(i in 0:6){
+            text(0.75, i / 6, rounding(10^(log10(kmer_max) * i / 6)), offset = 0)
+        }
+    } else {
+        for(i in 0:6){
+            text(0.75, i / 6, rounding(kmer_max * i / 6), offset = 0)
+        }
+    }
+}
+
+rounding <- function(number){
+    if(number > 1000){
+        round(number / 1000) * 1000
+    } else if (number > 100){
+        round(number / 100) * 100
+    } else {
+        round(number / 10) * 10
+    }
+}
 
 #############
 ## SETTING ##
@@ -12,7 +162,9 @@ parser <- ArgumentParser()
 parser$add_argument("--homozygous", action="store_true", default = F,
                     help="Assume no heterozygosity in the genome - plotting a paralog structure; [default FALSE]")
 parser$add_argument("-i", "--input", default = "*_smu.txt",
-                    help="name of the input tsv file with covarages [default \"coverages_2.tsv\"]")
+                    help="name of the input tsv file with covarages [default \"*_smu.txt\"]")
+parser$add_argument("-s", "--smudges", default = "not_specified",
+                    help="name of the input tsv file with annotated smudges and their respective sizes")
 parser$add_argument("-o", "--output", default = "smudgeplot",
                     help="name pattern used for the output files (OUTPUT_smudgeplot.png, OUTPUT_summary.txt, OUTPUT_warrnings.txt) [default \"smudgeplot\"]")
 parser$add_argument("-t", "--title",
@@ -21,248 +173,139 @@ parser$add_argument("-q", "--quantile_filt", type = "double",
                     help="Remove kmer pairs with coverage over the specified quantile; [default none]")
 parser$add_argument("-n", "--n_cov", type = "double",
                     help="the haploid coverage of the sequencing data [default inference from data]")
-parser$add_argument("-L", "--low_cutoff", type = "integer",
-                    help="the lower boundary used when dumping kmers [default min(total_pair_cov) / 2]")
 parser$add_argument("-c", "-cov_filter", type = "integer",
-                    help="Filter pairs with one of them having coverage bellow specified threshold [default 0; disables parameter L]")
+                    help="Filter pairs with one of them having coverage bellow specified threshold [default 0]")
 parser$add_argument("-ylim", type = "integer", 
                     help="The upper limit for the coverage sum (the y axis)")
-parser$add_argument("-nbins", type = "integer",
-                    help="the number of nbins used for smudgeplot matrix (nbins x nbins) [default autodetection]")
 parser$add_argument("-col_ramp", default = "viridis",
                     help="A colour ramp available in your R session [viridis]")
 parser$add_argument("--invert_cols", action="store_true", default = F,
                     help="Set this flag to invert colorus of Smudgeplot (dark for high, light for low densities)")
-parser$add_argument("--plot_err_line", action="store_true", default = F,
-                    help="Set this flag to add a line of theh higher expected occurance of errors paired with genomic k-mers")
-parser$add_argument("--just_plot", action="store_true", default = F,
-                    help="Turns off the inference of coverage and annotation of smudges; simply generates smudgeplot. (default False)")
-
-
+ 
 args <- parser$parse_args()
 
 colour_ramp_log <- get_col_ramp(args, 16) # create palette for the log plots
 colour_ramp <- get_col_ramp(args) # create palette for the linear plots
 
-iterative_nbins <- F
-if( is.null(args$nbins) ){
-    args$nbins <- 40
-    iterative_nbins <- T
-}
-
-# for easier manipulation I store estimated things in a list
-smudge_summary <- list()
-
-smudge_warn(args$output, "\n######################")
-smudge_warn(args$output, "## INPUT PROCESSING ##")
-smudge_warn(args$output, "######################")
-
 if ( !file.exists(args$input) ) {
     stop("The input file not found. Please use --help to get help", call.=FALSE)
 }
 
-cov_tab <- read.table(args$input, col.names = c('covB', 'covA', 'freq'))
+cov_tab <- read.table(args$input, header = T) # col.names = c('covB', 'covA', 'freq', 'is_error'), 
+smudge_tab <- read.table(args$smudges, col.names = c('structure', 'size'))
 
 # total covarate of the kmer pair
 cov_tab[, 'total_pair_cov'] <- cov_tab[, 'covA'] + cov_tab[, 'covB']
 # calcualte relative coverage of the minor allele
 cov_tab[, 'minor_variant_rel_cov'] <- cov_tab[, 'covB'] / cov_tab[, 'total_pair_cov']
 
-if ( !is.null(args$q) ){
-    # quantile filtering (remove top q%, it's not really informative)
-    threshold <- quantile(cov_tab[, 'total_pair_cov'], args$q)
-    high_cov_filt <- cov_tab[, 'total_pair_cov'] < threshold
-    smudge_warn(args$output, "Removing", sum(cov_tab[!high_cov_filt, 'freq']), 
-                "kmer pairs with coverage higher than",
-                threshold, paste0("(", args$q, " quantile)"))
-    cov_tab <- cov_tab[high_cov_filt, ]
-}
+##### coverage filtering
 
 if ( !is.null(args$c) ){
     threshold <- args$c
     low_cov_filt <- cov_tab[, 'covA'] < threshold | cov_tab[, 'covB'] < threshold 
-    smudge_warn(args$output, "Removing", sum(cov_tab[low_cov_filt, 'freq']), 
-                "kmer pairs for which one of the pair had coverage below",
-                threshold, paste0("(Specified by argument -c ", args$c, ")"))
+    # smudge_warn(args$output, "Removing", sum(cov_tab[low_cov_filt, 'freq']), 
+    #             "kmer pairs for which one of the pair had coverage below",
+    #             threshold, paste0("(Specified by argument -c ", args$c, ")"))
     cov_tab <- cov_tab[!low_cov_filt, ]
-    smudge_warn(args$output, "Processing", sum(cov_tab[, 'freq']), "kmer pairs")
-    L <- min(cov_tab[, 'covB'])
-} else {
-    L <- ifelse(length(args$L) == 0, min(cov_tab[, 'covB']), args$L)
+    # smudge_warn(args$output, "Processing", sum(cov_tab[, 'freq']), "kmer pairs")    
 }
-smudge_summary$n_subset_est <- round(estimate_1n_coverage_1d_subsets(cov_tab), 1)
 
-if (!args$just_plot){
-    draft_n <- ifelse(length(args$n_cov) == 0, smudge_summary$n_subset_est, args$n_cov)
-    ylim <- c(min(cov_tab[, 'total_pair_cov']) - 1, # or 0?
-          min(10*draft_n, max(cov_tab[, 'total_pair_cov'])))
+##### quantile filtering
+if ( !is.null(args$q) ){
+    # quantile filtering (remove top q%, it's not really informative)    
+    threshold <- wtd.quantile(cov_tab[, 'total_pair_cov'], args$q, cov_tab[, 'freq'])
+    high_cov_filt <- cov_tab[, 'total_pair_cov'] < threshold
+    # smudge_warn(args$output, "Removing", sum(cov_tab[!high_cov_filt, 'freq']), 
+    #             "kmer pairs with coverage higher than",
+    #             threshold, paste0("(", args$q, " quantile)"))
+    cov_tab <- cov_tab[high_cov_filt, ]
+}
+
+cov <- args$n_cov
+if (cov == wtd.quantile(cov_tab[, 'total_pair_cov'], 0.95, cov_tab[, 'freq'])){
+    ylim <- c(min(cov_tab[, 'total_pair_cov']), max(cov_tab[, 'total_pair_cov']))
 } else {
-    ylim <- c(0,max(cov_tab[, 'total_pair_cov'])) # if there is no inference, the default coverage is max cov
+    ylim <- c(min(cov_tab[, 'total_pair_cov']) - 1, # or 0?
+          min(max(100, 10*cov), max(cov_tab[, 'total_pair_cov'])))
 }
 
 xlim <- c(0, 0.5)
+error_fraction <- sum(cov_tab[, 'is_error'] * cov_tab[, 'freq']) / sum(cov_tab[, 'freq']) * 100
+error_string <- paste("err =", round(error_fraction, 1), "%")
+cov_string <- paste0("1n = ", cov)
 
-if (!is.null(args$ylim)){ # if ylim is specified, set the bounday by the argument instead
+if (!is.null(args$ylim)){ # if ylim is specified, set the boundary by the argument instead
     ylim[2] <- args$ylim
 }
 
+fig_title <- ifelse(length(args$title) == 0, NA, args$title[1])
+# histogram_bins = max(30, args$nbins)
 
-smudge_warn(args$output, "\n#############")
-smudge_warn(args$output, "## SUMMARY ##")
-smudge_warn(args$output, "#############")
-
-dulpicit_structures <- T
-repeat {
-    smudge_container <- get_smudge_container(cov_tab, .nbins = args$nbins, .xlim = xlim, .ylim = ylim)
-
-    peak_points <- peak_agregation(smudge_container)
-    if (args$just_plot){
-        break
-    }
-    peak_sizes <- get_peak_summary(peak_points, smudge_container, 0.02)
-
-    the_smallest_n <- min(get_trinoploid_1n_est(peak_sizes), draft_n)
-    smudge_summary$n_peak_est <- estimate_1n_coverage_highest_peak(peak_sizes, cov_tab, the_smallest_n)
-    if (length(args$n_cov) == 0) {
-        if( (abs(log2(smudge_summary$n_subset_est / smudge_summary$n_peak_est)) > .95 | smudge_summary$n_peak_est < smudge_summary$n_subset_est) & !args$homozygous){
-            smudge_summary$n <- smudge_summary$n_subset_est
-        } else {
-            smudge_summary$n <- smudge_summary$n_peak_est
-        }
-    } else {
-        smudge_summary$n <- args$n_cov
-    }
-
-    # if the organism is completely homozygous, all the detected kmer pairs are corresponding to paralogs
-    # therefore ther inference will confuse AB peak to AABB peak etc.
-    # that is recolvable just telling to guess half of the coverage instead
-    if( args$homozygous ){
-        smudge_summary$n <- smudge_summary$n / 2
-        smudge_summary$n_subset_est <- smudge_summary$n_subset_est / 2
-    }
-
-    peak_sizes$structure <- apply(peak_sizes, 1,
-                                  function(x){ guess_genome_structure(x, smudge_summary$n)})
+##########
+# LINEAR #
+##########
+pdf(paste0(args$output,'_smudgeplot.pdf'))
 
-    dulpicit_structures <- any(table(peak_sizes$structure) > 1)
-    # if there are more smudges on the same location & if user have not specified nbins
-    if(dulpicit_structures & iterative_nbins){
-        if(args$nbins > 20){
-            args$nbins <- args$nbins - 5
-        } else {
-            args$nbins <- args$nbins - 2
-        }
-        smudge_warn(args$output, "detecting two smudges at the same positions, not enough data for this number of bins lowering number of bins to ", args$nbins)
-    } else {
-        break
-    }
+# layout(matrix(c(2,4,1,3), 2, 2, byrow=T), c(3,1), c(1,3))
+layout(matrix(c(4,2,1,3), 2, 2, byrow=T), c(3,1), c(1,3))
+# 1 smudge plot
+plot_alt(cov_tab, ylim, colour_ramp_log)
+if (cov > 0){
+    plot_expected_haplotype_structure(cov, smudge_tab, T, xmax = 0.49)
 }
 
-if (!args$just_plot){
-    # checks / warnings for potential inference problems
-    if( abs(log2(smudge_summary$n_subset_est / smudge_summary$n_peak_est)) > 1 & !args$homozygous){
-        smudge_warn(args$output, "!! Warning, the two types of estimates of 1n coverage differ a lot (",
-                    smudge_summary$n_subset_est, "and", smudge_summary$n_peak_est, ")")
-        smudge_warn(args$output, "i.e. at least of one of the smudgeplot methods to estimate the haploid coverage got it wrong")
-        smudge_warn(args$output, "Using subset estimate instead of highest peak estimate (less precise but also less often completely wrong)")
-        smudge_warn(args$output, "Does the smudgeplot look sane? Is at least one of the 1n estimates close a GenomeScope estimate?")
-        smudge_warn(args$output, "You can help us imrove this software by sharing this strange smudgeplot on https://github.com/KamilSJaron/smudgeplot/issues.")
-    }
-    if( L > (smudge_summary$n / 2) & !args$homozygous ){
-        smudge_warn(args$output, "!! Warning, your coverage filter on the lower end (L = ", L,
-                    ") is higher than half of the 1n coverage estimate ( 1n / 2 = ", round(smudge_summary$n / 2, 2))
-        smudge_warn(args$output, "If the real 1n coverage is half of your estimate you would not picked it up due to the filtering.")
-        smudge_warn(args$output, "If you have sufficient coverage, consider reruning the analysis with lower L (something like (1n / 2) - 5)")
-        smudge_warn(args$output, "One good way for verificaiton would be to compare it to GenomeScope estimate of haploid coverage")
-    }
-
-    peak_sizes$corrected_minor_variant_cov <- sapply(peak_sizes$structure, function(x){round(mean(unlist(strsplit(x, split = '')) == 'B'), 2)})
-    peak_sizes$ploidy <- sapply(peak_sizes$structure, nchar)
-
-    to_filter <- peak_sizes$ploidy <= 1
-    if( any(to_filter) ){
-        smudge_warn(args$output, paste(sum(to_filter), "peaks of kmer pairs detected with coverage < (1n_coverage * 2) =", round(smudge_summary$n * 2, 1)))
-        tab_to_print <- peak_sizes[to_filter,c(2,3,8,9)]
-        tab_to_print <- round(tab_to_print, 2)
-        colnames(tab_to_print) <- c('kmers_in_peak[#]', 'kmers_in_peak[proportion]', 'summit B / (A + B)', 'summit A + B')
-        smudge_warn(args$output, paste0(capture.output(tab_to_print), collapse = "\n"))
-        peak_sizes <- peak_sizes[!to_filter,]
-        smudge_warn(args$output, "This might be due to kmers with sequencing errors present in the kmer dump.")
-        smudge_warn(args$output, "Increasing L might help remove erroneous kmers.")
-    }
-
-    peak_sizes$rel_size <- peak_sizes$rel_size / sum(peak_sizes$rel_size)
-    peak_sizes <- peak_sizes[order(peak_sizes$rel_size, decreasing = T),]
-    smudge_summary$peak_sizes <- peak_sizes
-
-    # genome ploidy is the ploidy with highest number of corresponding kmer pairs regardless of topology
-    considered_ploidies <- unique(peak_sizes$ploidy)
-    ploidy_with_most_smudges <- which.max(sapply(considered_ploidies, function(x){ sum(peak_sizes[peak_sizes$ploidy == x,'rel_size']) }) )
-    smudge_summary$genome_ploidy <- considered_ploidies[ploidy_with_most_smudges]
-    # smudge_summary$genome_ploidy <- peak_sizes$ploidy[which.max(peak_sizes$rel_size)]
-
-    # this will be probably diploid,
-    # but theoretically one can imagine a species that si completely homozygous tetraploid
-    if( args$homozygous ){
-        smudge_summary$genome_ploidy <- smudge_summary$genome_ploidy / 2
-        if(!smudge_summary$genome_ploidy %in% c(2,4,6,8)){
-            smudge_warn(args$output, "Guessing really strange ploidy", smudge_summary$genome_ploidy, "perhaps there is not enough coverage for a good inference.")
-            smudge_warn(args$output, "You can trust the plot, but guessed ploidy or peak detection might be completely off.")
-        }
-    }
 
-    generate_summary(args, smudge_summary)
+# 4 legend
+plot_legend(max(cov_tab[, 'freq']), colour_ramp, F)
+
+### add annotation
+# print smudge sizes
+plot.new()
+if (cov > 0){
+    legend('topleft', bty = 'n', reduce_structure_representation(smudge_tab[,'structure']), cex = 1.1)
+    legend('top', bty = 'n', legend = round(smudge_tab[,2], 2), cex = 1.1)
+    legend('bottomleft', bty = 'n', legend = c(cov_string, error_string), cex = 1.1)
 } else {
-    smudge_summary$n <- 0 # this is interpreted as "no infered"
+    legend('bottomleft', bty = 'n', legend = error_string, cex = 1.1)
 }
 
-smudge_warn(args$output, "\n##########")
-smudge_warn(args$output, "## PLOT ##")
-smudge_warn(args$output, "##########")
-
-fig_title <- ifelse(length(args$title) == 0, NA, args$title[1])
+plot.new()
+mtext(bquote(italic(.(fig_title))), side=3, adj=0.1, line=-3, cex = 1.6)
 
-pdf(paste0(args$output,'_smudgeplot_log10.pdf'))
-
-layout(matrix(c(2,4,1,3), 2, 2, byrow=T), c(3,1), c(1,3))
-# 1 smudge plot
-plot_smudgeplot(smudge_container, smudge_summary$n, colour_ramp_log)
-if (!args$just_plot){
-plot_expected_haplotype_structure(smudge_summary$n, peak_sizes, T, xmax = max(smudge_container$x))
-}
-if (args$plot_err_line){
-    plot_seq_error_line(cov_tab)
-}
-
-histogram_bins = max(30, args$nbins)
-# 2,3 hist
-plot_histograms(cov_tab, smudge_summary, fig_title, .ylim = ylim, .bins = histogram_bins) # I am testing here setting the number of bars to the same number as the number of squares
-# 4 legend
-plot_legend(smudge_container, colour_ramp_log)
 
 dev.off()
 
-# replace the log transformed values by non-transformed
-smudge_container$z <- smudge_container$dens
+############
+# log plot #
+############
 
-pdf(paste0(args$output,'_smudgeplot.pdf'))
+pdf(paste0(args$output,'_smudgeplot_log10.pdf'))
 
-layout(matrix(c(2,4,1,3), 2, 2, byrow=T), c(3,1), c(1,3))
+layout(matrix(c(4,2,1,3), 2, 2, byrow=T), c(3,1), c(1,3))
+# cov_tab[, 'freq'] <- log10(cov_tab[, 'freq'])
 # 1 smudge plot
-plot_smudgeplot(smudge_container, smudge_summary$n, colour_ramp)
-if (!args$just_plot){
-    plot_expected_haplotype_structure(smudge_summary$n, peak_sizes, T, xmax = max(smudge_container$x))
+plot_alt(cov_tab, ylim, colour_ramp_log, log = T)
+
+if (cov > 0){
+    plot_expected_haplotype_structure(cov, smudge_tab, T, xmax = 0.49)
 }
-# plot error line L - 1 / cov ~ cov
-if (args$plot_err_line){
-    plot_seq_error_line(cov_tab)
+
+# 4 legend
+plot_legend(max(cov_tab[, 'freq']), colour_ramp_log, T)
+
+# print smudge sizes
+plot.new()
+if (cov > 0){
+    legend('topleft', bty = 'n', reduce_structure_representation(smudge_tab[,'structure']), cex = 1.1)
+    legend('top', bty = 'n', legend = round(smudge_tab[,2], 2), cex = 1.1)
+    legend('bottomleft', bty = 'n', legend = c(cov_string, error_string), cex = 1.1)
+} else {
+    legend('bottomleft', bty = 'n', legend = error_string, cex = 1.1)
 }
 
-# 2,3 hist
-plot_histograms(cov_tab, smudge_summary, fig_title,
-                .ylim = ylim, .bins = histogram_bins)
 
-# 4 legend
-plot_legend(smudge_container, colour_ramp, F)
+plot.new()
+mtext(bquote(italic(.(fig_title))), side=3, adj=0.1, line=-3, cex = 1.6)
 
-dev.off()
+dev.off()
\ No newline at end of file
diff --git a/install.R b/install.R
deleted file mode 100644
index f3647de..0000000
--- a/install.R
+++ /dev/null
@@ -1,5 +0,0 @@
-require(devtools)
-
-document()
-# test() #- the tests are broken for the moment
-install.packages(".", repos = NULL, type="source")
diff --git a/man/annotate_peaks.Rd b/man/annotate_peaks.Rd
deleted file mode 100644
index 2cc3014..0000000
--- a/man/annotate_peaks.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/annotate_peaks.R
-\name{annotate_peaks}
-\alias{annotate_peaks}
-\title{annotate_peaks}
-\usage{
-annotate_peaks(.peak_points, .min_kmerpair_cov, .max_kmerpair_cov)
-}
-\description{
-\code{annotate_peaks}
-}
diff --git a/man/annotate_summits.Rd b/man/annotate_summits.Rd
deleted file mode 100644
index 6e7167a..0000000
--- a/man/annotate_summits.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/annotate_summits.R
-\name{annotate_summits}
-\alias{annotate_summits}
-\title{annotate_summits}
-\usage{
-annotate_summits(
-  .peak_points,
-  .peak_sizes,
-  .min_kmerpair_cov,
-  .max_kmerpair_cov,
-  col = "red"
-)
-}
-\description{
-\code{annotate_summits}
-}
diff --git a/man/bw.nrdW.Rd b/man/bw.nrdW.Rd
deleted file mode 100644
index 2244d7a..0000000
--- a/man/bw.nrdW.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/bw.nrdW.R
-\name{bw.nrdW}
-\alias{bw.nrdW}
-\title{bw.nrdW}
-\usage{
-bw.nrdW(x, w)
-}
-\description{
-\code{bw.nrdW} is a function that mimics bw.nrd0 - Silverman's ‘rule of thumb’ for estimating the kernel width, but using weighted values;
-this can be useful if one wants to mimics kerned smoothing to a histogram using counts as weights
-}
diff --git a/man/coverage_histogram.Rd b/man/coverage_histogram.Rd
deleted file mode 100644
index f6b2eda..0000000
--- a/man/coverage_histogram.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/coverage_histogram.R
-\name{coverage_histogram}
-\alias{coverage_histogram}
-\title{coverage_histogram}
-\usage{
-coverage_histogram(.cov_tab, bins, xlim, column, horiz, .col)
-}
-\description{
-\code{coverage_histogram} plots a histogram from a table that contains both values (what is normally x) and their respective frequencies (a column called 'freq');
-}
diff --git a/man/estimate_1n_coverage_1d_subsets.Rd b/man/estimate_1n_coverage_1d_subsets.Rd
deleted file mode 100644
index c975e91..0000000
--- a/man/estimate_1n_coverage_1d_subsets.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/estimate_1n_coverage_1d_subsets.R
-\name{estimate_1n_coverage_1d_subsets}
-\alias{estimate_1n_coverage_1d_subsets}
-\title{estimate_1n_coverage_1d_subsets}
-\usage{
-estimate_1n_coverage_1d_subsets(.cov_tab)
-}
-\description{
-\code{estimate_1n_coverage_1d_subsets} is used as the first proxy of 1n coverage. The estimate is subsequentially refined by the brightest smudge.
-}
diff --git a/man/estimate_1n_coverage_highest_peak.Rd b/man/estimate_1n_coverage_highest_peak.Rd
deleted file mode 100644
index 2ec0176..0000000
--- a/man/estimate_1n_coverage_highest_peak.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/estimate_1n_coverage_highest_peak.R
-\name{estimate_1n_coverage_highest_peak}
-\alias{estimate_1n_coverage_highest_peak}
-\title{estimate_1n_coverage_highest_peak}
-\usage{
-estimate_1n_coverage_highest_peak(.filt_peak_sizes, .cov_tab, .draft_1n)
-}
-\description{
-\code{estimate_1n_coverage_highest_peak}
-}
diff --git a/man/filter_peak_sizes.Rd b/man/filter_peak_sizes.Rd
deleted file mode 100644
index 2eb3dc0..0000000
--- a/man/filter_peak_sizes.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/filter_peak_sizes.R
-\name{filter_peak_sizes}
-\alias{filter_peak_sizes}
-\title{filter_peak_sizes}
-\usage{
-filter_peak_sizes(.peak_sizes, .threshold = 0.005)
-}
-\description{
-\code{filter_peak_sizes}
-}
diff --git a/man/filter_peaks.Rd b/man/filter_peaks.Rd
deleted file mode 100644
index 31f1333..0000000
--- a/man/filter_peaks.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/filter_peaks.R
-\name{filter_peaks}
-\alias{filter_peaks}
-\title{filter_peaks}
-\usage{
-filter_peaks(.peak_points, .peak_sizes)
-}
-\description{
-\code{filter_peaks}
-}
diff --git a/man/generate_summary.Rd b/man/generate_summary.Rd
deleted file mode 100644
index 50000a0..0000000
--- a/man/generate_summary.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/generate_summary.R
-\name{generate_summary}
-\alias{generate_summary}
-\title{generate_summary}
-\usage{
-generate_summary(.args, .smudge_summary)
-}
-\description{
-\code{generate_summary}
-}
diff --git a/man/get_1d_peaks.Rd b/man/get_1d_peaks.Rd
deleted file mode 100644
index 4627f34..0000000
--- a/man/get_1d_peaks.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_1d_peaks.R
-\name{get_1d_peaks}
-\alias{get_1d_peaks}
-\title{get_1d_peaks}
-\usage{
-get_1d_peaks(
-  .cov_tab,
-  .num_of_peaks = 3,
-  .adjust = 10,
-  .peak_frame = data.frame(),
-  .depth = 1
-)
-}
-\description{
-\code{get_1d_peaks} is a legacy code
-}
diff --git a/man/get_col_ramp.Rd b/man/get_col_ramp.Rd
deleted file mode 100644
index b30d8e9..0000000
--- a/man/get_col_ramp.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_col_ramp.R
-\name{get_col_ramp}
-\alias{get_col_ramp}
-\title{get_col_ramp}
-\usage{
-get_col_ramp(.args, delay = 0)
-}
-\description{
-\code{get_col_ramp} returns a colour ramp used by smudgeplots; 
-args need to contain args$col_ramp and args$invert_cols features
-the first needs to be a ramp-generating function (e.g. "viridis", "grey.colors", "magma" or "mako")
-args$invert_cols is just TRUE or FALSE (if to revert the colours in the palete)
-}
diff --git a/man/get_peak_sizes.Rd b/man/get_peak_sizes.Rd
deleted file mode 100644
index b1a1e4b..0000000
--- a/man/get_peak_sizes.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_peak_sizes.R
-\name{get_peak_sizes}
-\alias{get_peak_sizes}
-\title{get_peak_sizes}
-\usage{
-get_peak_sizes(.peak_points)
-}
-\description{
-\code{get_peak_sizes}
-}
diff --git a/man/get_peak_summary.Rd b/man/get_peak_summary.Rd
deleted file mode 100644
index f84d502..0000000
--- a/man/get_peak_summary.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_peak_summary.R
-\name{get_peak_summary}
-\alias{get_peak_summary}
-\title{get_peak_summary}
-\usage{
-get_peak_summary(.peak_points, .smudge_container, .treshold = 0.005)
-}
-\description{
-\code{get_peak_summary}
-}
diff --git a/man/get_smudge_container.Rd b/man/get_smudge_container.Rd
deleted file mode 100644
index c72ca48..0000000
--- a/man/get_smudge_container.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_smudge_container.R
-\name{get_smudge_container}
-\alias{get_smudge_container}
-\title{get_smudge_container}
-\usage{
-get_smudge_container(.cov_tab, .nbins = 20, .xlim = c(0, 0.5), .ylim = NA)
-}
-\description{
-\code{get_smudge_container} is inspired by https://stackoverflow.com/a/18103689/2962344
-}
diff --git a/man/get_trinoploid_1n_est.Rd b/man/get_trinoploid_1n_est.Rd
deleted file mode 100644
index 1e7b16c..0000000
--- a/man/get_trinoploid_1n_est.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_trinoploid_1n_est.R
-\name{get_trinoploid_1n_est}
-\alias{get_trinoploid_1n_est}
-\title{get_trinoploid_1n_est}
-\usage{
-get_trinoploid_1n_est(.filt_peak_sizes)
-}
-\description{
-\code{get_trinoploid_1n_est}
-}
diff --git a/man/guess_genome_structure.Rd b/man/guess_genome_structure.Rd
deleted file mode 100644
index 1142907..0000000
--- a/man/guess_genome_structure.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/guess_genome_structure.R
-\name{guess_genome_structure}
-\alias{guess_genome_structure}
-\title{guess_genome_structure}
-\usage{
-guess_genome_structure(.filt_peak_sizes_line, .n)
-}
-\description{
-\code{guess_genome_structure}
-}
diff --git a/man/peak_agregation.Rd b/man/peak_agregation.Rd
deleted file mode 100644
index a0a55d1..0000000
--- a/man/peak_agregation.Rd
+++ /dev/null
@@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/peak_agregation.R
-\name{peak_agregation}
-\alias{peak_agregation}
-\title{peak_agregation}
-\usage{
-peak_agregation(.smudge_container, .L = NA)
-}
-\arguments{
-\item{.smudge_container}{- a smudge_container object; a list with `x`,`y` axies of the 2d histogram and `dens` matrix of 2h histogram values.
-Usually output of function \code{get_smudge_container}}
-}
-\description{
-\code{peak_agregation} is the function that agregates tiles from the smudge_container
-into smudges - individual distributions
-}
diff --git a/man/plot_expected_haplotype_structure.Rd b/man/plot_expected_haplotype_structure.Rd
deleted file mode 100644
index 56d0ccf..0000000
--- a/man/plot_expected_haplotype_structure.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_expected_haplotype_structure.R
-\name{plot_expected_haplotype_structure}
-\alias{plot_expected_haplotype_structure}
-\title{plot_expected_haplotype_structure}
-\usage{
-plot_expected_haplotype_structure(
-  .n,
-  .peak_sizes,
-  .adjust = F,
-  .cex = 1.3,
-  xmax = 0.49
-)
-}
-\description{
-\code{plot_expected_haplotype_structure} adds genome scture labels to a smudgeplot
-}
diff --git a/man/plot_histograms.Rd b/man/plot_histograms.Rd
deleted file mode 100644
index 03833d1..0000000
--- a/man/plot_histograms.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_histograms.R
-\name{plot_histograms}
-\alias{plot_histograms}
-\title{plot_histograms}
-\usage{
-plot_histograms(
-  .cov_tab,
-  .smudge_summary,
-  .fig_title = NA,
-  .cex = 1.4,
-  .col = NA,
-  .ylim = NA,
-  .bins = 100
-)
-}
-\description{
-\code{plot_histograms} makes 2 plots - histograms of the both margins of smudge plot
-}
diff --git a/man/plot_legend.Rd b/man/plot_legend.Rd
deleted file mode 100644
index e80d46a..0000000
--- a/man/plot_legend.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_legend.R
-\name{plot_legend}
-\alias{plot_legend}
-\title{plot_legend}
-\usage{
-plot_legend(.k, .colour_ramp, .log_scale = T)
-}
-\description{
-\code{plot_legend} generate topright corener in the smudgeplot
-}
diff --git a/man/plot_seq_error_line.Rd b/man/plot_seq_error_line.Rd
deleted file mode 100644
index c89b94a..0000000
--- a/man/plot_seq_error_line.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_seq_error_line.R
-\name{plot_seq_error_line}
-\alias{plot_seq_error_line}
-\title{plot_seq_error_line}
-\usage{
-plot_seq_error_line(.cov_tab, .L = NA, .col = "black")
-}
-\description{
-\code{plot_seq_error_line} add error line to plot it's L / cov
-}
diff --git a/man/plot_smudgeplot.Rd b/man/plot_smudgeplot.Rd
deleted file mode 100644
index 1787439..0000000
--- a/man/plot_smudgeplot.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_smudgeplot.R
-\name{plot_smudgeplot}
-\alias{plot_smudgeplot}
-\title{plot_smudgeplot}
-\usage{
-plot_smudgeplot(.k, .n, .colour_ramp, .cex = 1.4)
-}
-\arguments{
-\item{.k}{- the matrix of densities or list of x,y axies and z, the matrix. Usually output of function}
-
-\item{.n}{- the 1n coverage, the coverage of the haploid genome (can be estimated by TODO function)}
-
-\item{.colour_ramp}{- the colour pallete for the heat colours}
-
-\item{.cex}{- the size of the axis labels [default 1.4]}
-}
-\description{
-\code{plot_smudgeplot} is the core smudgeplot function to plot the core of the smudgeplot
-the cental 2D histogram/ landscape plot / heatmap / howeveryouwantyoucallthis plot
-}
-\author{
-Kamil Jaron \email{kamiljaron at gmail.com}
-}
diff --git a/man/reduce_structure_representation.Rd b/man/reduce_structure_representation.Rd
deleted file mode 100644
index a04e55e..0000000
--- a/man/reduce_structure_representation.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/reduce_structure_representation.R
-\name{reduce_structure_representation}
-\alias{reduce_structure_representation}
-\title{reduce_structure_representation}
-\usage{
-reduce_structure_representation(.peak_sizes)
-}
-\arguments{
-\item{.peak_sizes}{- a table with peak sizes, the truly important column in there is just the "structure" one}
-}
-\description{
-\code{reduce_structure_representation} is the function that collapses labels loger than 4 characters into 4 character representations
-}
diff --git a/man/round_minor_variant_cov.Rd b/man/round_minor_variant_cov.Rd
deleted file mode 100644
index ce32d1e..0000000
--- a/man/round_minor_variant_cov.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/round_minor_variant_cov.R
-\name{round_minor_variant_cov}
-\alias{round_minor_variant_cov}
-\title{round_minor_variant_cov}
-\usage{
-round_minor_variant_cov(.min_cov)
-}
-\description{
-\code{round_minor_variant_cov}
-}
diff --git a/man/rounding.Rd b/man/rounding.Rd
deleted file mode 100644
index 28b63f7..0000000
--- a/man/rounding.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/rounding.R
-\name{rounding}
-\alias{rounding}
-\title{rounding}
-\usage{
-rounding(number)
-}
-\description{
-\code{rounding} rounds to thousands or hundreds if the number is smaller than 1000
-}
diff --git a/man/smudge_summary.Rd b/man/smudge_summary.Rd
deleted file mode 100644
index e9babc8..0000000
--- a/man/smudge_summary.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/smudge_summary.R
-\name{smudge_summary}
-\alias{smudge_summary}
-\title{smudge_summary}
-\usage{
-smudge_summary(.filename, ...)
-}
-\description{
-\code{smudge_summary} a function that is used to report all the summary output messages
-like this I will be able to easily redirect the waring message to file as well
-}
diff --git a/man/smudge_warn.Rd b/man/smudge_warn.Rd
deleted file mode 100644
index a5907f0..0000000
--- a/man/smudge_warn.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/smudge_warn.R
-\name{smudge_warn}
-\alias{smudge_warn}
-\title{smudge_warn}
-\usage{
-smudge_warn(.filename, ...)
-}
-\description{
-\code{smudge_warn} a function that is used to report all the wanrning messages
-like this I will be able to easily redirect the waring message to file as well
-}
diff --git a/man/transform_minor_variant_cov.Rd b/man/transform_minor_variant_cov.Rd
deleted file mode 100644
index af2760a..0000000
--- a/man/transform_minor_variant_cov.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/transform_minor_variant_cov.R
-\name{transform_minor_variant_cov}
-\alias{transform_minor_variant_cov}
-\title{transform_minor_variant_cov}
-\usage{
-transform_minor_variant_cov(.x, .smudge_container)
-}
-\description{
-\code{transform_minor_variant_cov}
-to transform coordinates of smudgeplot to minor_variant_rel_cov
-}
diff --git a/man/transform_pair_cov.Rd b/man/transform_pair_cov.Rd
deleted file mode 100644
index a9e5b7e..0000000
--- a/man/transform_pair_cov.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/transform_pair_cov.R
-\name{transform_pair_cov}
-\alias{transform_pair_cov}
-\title{transform_pair_cov}
-\usage{
-transform_pair_cov(.y, .smudge_container)
-}
-\description{
-\code{transform_pair_cov}
-to transform coordinates of smudgeplot to total_pair_cov
-}
diff --git a/man/wtd.quantile.Rd b/man/wtd.quantile.Rd
deleted file mode 100644
index c43a0fb..0000000
--- a/man/wtd.quantile.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/wtd.quantile.R
-\name{wtd.quantile}
-\alias{wtd.quantile}
-\title{wtd.quantile & wtd.iqr}
-\usage{
-wtd.quantile(x, q = 0.25, weight = NULL)
-}
-\description{
-\code{wtd.quantile}
-calculates a quantile using "wighted data", specifically designed for "histogram-like" data
-\code{wtd.iqr}
-0.75 - 0.25 weighted quantile
-}
diff --git a/playground/alternative_fitting/README.md b/playground/alternative_fitting/README.md
new file mode 100644
index 0000000..75a9f0a
--- /dev/null
+++ b/playground/alternative_fitting/README.md
@@ -0,0 +1,250 @@
+# Sploidyplot
+
+The goal is to have a smudge inference based on an explicit model. I hoped for a model that would make a lot of sense - based on negative binomials.
+
+Gene - made his own EM algorithm, I think. I could not decipher it
+Richard and Tianyi - made me an EM algorithm that work on simply coverage A and B; also using normal distributions
+
+
+## alternative plotting
+
+A minimalist attempt
+
+```R
+minidata <- daArtCamp1[daArtCamp1[, 'total_pair_cov'] < 20, ]
+coverages_to_plot <- unique(minidata[, 'total_pair_cov'])
+number_of_coverages_to_plot <- length(coverages_to_plot)
+mini_ylim <- c(5, 20)
+L <- 4
+cols <- c(rgb(1,0,0, 0.5), rgb(1,1,0, 0.5), rgb(0,1,1, 0.5), rgb(1,0,1, 0.5), rgb(0,1,0, 0.5), rgb(0,0,1, 0.5))
+# plot(1:6, pch = 20, cex = 5, col = cols)
+
+plot_dot_smudgeplot(minidata, rep('black', 32), xlim, mini_ylim, cex = 3)
+points((L - 1) / coverages_to_plot, coverages_to_plot, cex = 3, pch = 20, col = 'blue')
+
+for( cov in 8:19){
+    rect(0, cov - 0.5, 0.5, cov + 0.5, col = NA, border = 'black')
+    width <- 1 / (2 * cov)
+    min_ratio <- L / cov
+    rect(min_ratio - width, cov - 0.5, min(0.5, min_ratio + width), cov + 0.5, col = sample(cols, 1))    
+}
+
+text(rep(0.05, number_of_coverages_to_plot), 8:19, 8:19)
+```
+
+This is more serious attempt that does not really work.
+
+Alternative local aggregation
+
+```bash
+for ToLID in daAchMill1 daAchPtar1 daAdoMosc1 daAjuCham1 daAjuRept1 daArcMinu1 daArtCamp1 daArtMari1 daArtVulg1 daAtrBela1; do
+    python3 playground/alternative_fitting/pair_clustering.py data/dicots/smu_files/$ToLID.k31_ploidy.smu.txt --mask_errors > data/dicots/peak_agregation/$ToLID.cov_tab_peaks
+    Rscript playground/alternative_fitting/alternative_plotting_testing.R -i data/dicots/peak_agregation/$ToLID.cov_tab_peaks -o data/dicots/peak_agregation/$ToLID
+done
+```
+
+This worked well. The agregation produced beautiful blocks, and vastly of the same shape as noticed by Richard. He suggested we should fix their shape and fit only a single parameter - coverage.
+
+```R
+smudge_tab <- read.table('data/dicots/peak_agregation/daArtMari1.cov_tab_peaks', col.names = c('covB', 'covA', 'freq', 'smudge'))
+
+all_smudges <- unique(smudge_tab[, 'smudge'])
+all_smudge_sizes <- sapply(all_smudges, function(x){ sum(smudge_tab[smudge_tab[, 'smudge'] == x, 'freq']) })
+
+# plot(sort(all_smudge_sizes, decreasing = T) / sum(all_smudge_sizes), ylim = c(0, 1))
+# sort(all_smudge_sizes, decreasing = T) / sum(all_smudge_sizes) > 0.02
+# 2% of data soiunds reasonable
+
+smudges <- all_smudges[all_smudge_sizes / sum(all_smudge_sizes) > 0.02 & all_smudges != 0]
+smudge_sizes <- all_smudge_sizes[all_smudge_sizes / sum(all_smudge_sizes) > 0.02 & all_smudges != 0]
+
+smudge_tab[, 'total_pair_cov'] <- smudge_tab[, 1] + smudge_tab[, 2]
+smudge_tab[, 'minor_variant_rel_cov'] <- smudge_tab[, 1] / smudge_tab[, 'total_pair_cov']
+
+
+smudge_tab_reduced <- smudge_tab[smudge_tab[, 'smudge'] %in% smudges, ]
+source('playground/alternative_fitting/alternative_plotting_functions.R')
+
+per_smudge_cov_list <- lapply(smudges, function(x){ smudge_tab_reduced[smudge_tab_reduced$smudge == x, ] })
+names(per_smudge_cov_list) <- smudges
+
+cov_sum_summary <- sapply(per_smudge_cov_list, function(x){ summary(x[, 'total_pair_cov']) } )
+rel_cov_summary <- sapply(per_smudge_cov_list, function(x){ summary(x[, 'minor_variant_rel_cov']) } )
+
+colnames(cov_sum_summary) <- colnames(rel_cov_summary) <- smudges
+
+data.frame(smudges = smudges, total_pair_cov = round(cov_sum_summary[4, ], 1), minor_variant_rel_cov = round(rel_cov_summary[4, ], 3))
+
+head(per_smudge_cov_list[['2']])
+
+one_smudge <- per_smudge_cov_list[['2']]
+one_smudge[one_smudge[, 'minor_variant_rel_cov'] == 0.5, ]
+
+table(one_smudge[, 'covB'])
+(one_smudge[, 'minor_variant_rel_cov'])
+
+
+
+    # cov_range <- seq((2 * .L) - 2, max_cov_pair, length = 500)
+    # lines((.L - 1)/cov_range, cov_range, lwd = 2.5, lty = 2, 
+
+plot_peakmap(smudge_tab_reduced, xlim = c(0, 0.5), ylim = c(0, 300))
+
+plot_seq_error_line(smudge_tab, 4)
+plot_seq_error_line(smudge_tab, 13)
+plot_seq_error_line(smudge_tab, 48)
+plot_seq_error_line(smudge_tab, 80)
+
+one_smudge <- per_smudge_cov_list[['4']]
+min(one_smudge[ ,'total_pair_cov'])
+
+one_smudge[one_smudge[ ,'total_pair_cov'] == 61, ]
+one_smudge <- one_smudge[order(one_smudge[, 'minor_variant_rel_cov']), ]
+
+right_part_of_the_smudge <- one_smudge[one_smudge[ ,'minor_variant_rel_cov'] > 0.2131147, ]
+
+all_minor_var_rel_covs <- sort(unique(round(right_part_of_the_smudge[, 'minor_variant_rel_cov'], 2)))
+corresponding_min_cov_sums <- sapply(all_minor_var_rel_covs, function(x){ min(right_part_of_the_smudge[round(right_part_of_the_smudge[, 'minor_variant_rel_cov'], 2) == x, 'total_pair_cov']) } )
+
+lines(all_minor_var_rel_covs, corresponding_min_cov_sums, lwd = 3, lty = 3, col = 'red')
+
+subtract_line <- function(rel_cov, cov_tab){
+    approx_rel_cov = round(rel_cov, 2)
+    band_covs = round(cov_tab[, 'minor_variant_rel_cov'], 2) == approx_rel_cov
+    cov_tab[band_covs, ][which.min(cov_tab[band_covs, 'total_pair_cov']), ]
+}
+
+edge_points <- t(sapply(all_minor_var_rel_covs, subtract_line, right_part_of_the_smudge))
+total_pair_cov <- sapply(1:29, function(x){edge_points[[x,5]]})
+minor_variant_rel_cov <- sapply(1:29, function(x){edge_points[[x,6]]})
+lm(total_pair_cov ~ minor_variant_rel_cov + I(minor_variant_rel_cov^2))
+
+plot_isoA_line <- function (.covA, .cov_tab, .col = "black") {
+    min_covB <- min(.cov_tab[, 'covB']) # should be L really
+    max_covB <- .covA
+    B_covs <- seq(min_covB, max_covB, length = 500)
+    lines(B_covs/ (B_covs + .covA), B_covs + .covA, lwd = 2.5, lty = 2, 
+        col = .col)
+}
+
+plot_isoA_line(48, smudge_tab, 'blue')
+plot_isoA_line(79, smudge_tab, 'blue')
+plot_isoA_line(110, smudge_tab, 'blue')
+plot_isoA_line(141, smudge_tab, 'blue')
+plot_isoA_line(172, smudge_tab, 'blue')
+
+```
+
+HA, looks great! Let's plot it on the background...
+
+```R
+library(smudgeplot)
+source('playground/alternative_fitting/alternative_plotting_functions.R')
+colour_ramp <- viridis(32)
+
+
+smudge_tab <- read.table('data/dicots/peak_agregation/daAchMill1.cov_tab_errors', col.names = c('covB', 'covA', 'freq', 'is_error'))
+# smudge_tab <- read.table('data/dicots/peak_agregation/daArtMari1.cov_tab_peaks', col.names = c('covB', 'covA', 'freq', 'smudge'))
+smudge_tab[, 'total_pair_cov'] <- smudge_tab[, 1] + smudge_tab[, 2]
+smudge_tab[, 'minor_variant_rel_cov'] <- smudge_tab[, 1] / smudge_tab[, 'total_pair_cov']
+cov = 31.1 # this is from GenomeScope this time
+
+plot_alt(smudge_tab[smudge_tab[, 'is_error'] != 0, ], c(0, 100), colour_ramp, T) 
+plot_alt(smudge_tab[smudge_tab[, 'is_error'] != 1, ], c(0, 100), colour_ramp, T)
+plot_alt(smudge_tab, c(0, 100), colour_ramp, T)
+plot_iso_grid(31.1, 4, 100)
+plot_smudge_labels(18.1, 100)
+# .peak_points, .peak_sizes, .min_kmerpair_cov, .max_kmerpair_cov, col = "red"
+dev.off()
+
+plot_iso_grid()
+
+plot_smudge_labels(cov, 240)
+text(0.49, cov / 2, "2err", offset = 0, cex = 1.3, xpd = T, pos = 2)
+```
+
+Say we will test ploidy up to 16 (capturing up to octoploid paralogs). That makes
+
+```R
+smudge_tab_with_err <- read.table('data/dicots/peak_agregation/daAchMill1.cov_tab_errors', col.names = c('covB', 'covA', 'freq', 'is_error'))
+
+smudge_filtering_threshold <- 0.01 # at least 1% of genomic kmers
+colour_ramp <- viridis(32)
+
+# # error band, done on non filtered data
+# smudge_tab[, 'edgepoint'] <- F
+# smudge_tab[smudge_tab[, 'covB'] < L + 3, 'edgepoint'] <- T
+# plot_alt(smudge_tab[smudge_tab[, 'edgepoint'], ], c(0, 500), colour_ramp, T)
+
+cov <- 19.55 # this will be unknown
+L <- min(smudge_tab_with_err[, 'covB'])
+smudge_tab <- smudge_tab_with_err[smudge_tab_with_err[, 'is_error'] == 0, ]
+genomic_kmer_pairs <- sum(smudge_tab[ ,'freq'])
+
+plot_alt(smudge_tab, c(0, 300), colour_ramp, T)
+
+smudge_tab[, 'total_pair_cov'] <- smudge_tab[, 1] + smudge_tab[, 2]
+smudge_tab[, 'minor_variant_rel_cov'] <- smudge_tab[, 1] / smudge_tab[, 'total_pair_cov']
+
+plot_alt(smudge_tab, c(0, 300), colour_ramp) 
+plot_all_smudge_labels(cov, 300)
+dev.off()
+
+#### isolating all smudges given cov
+
+# total_genomic_kmer_pairs <- sum(smudge_tab[, 'freq'])
+
+# plot_alt(smudge_container[[1]], c(0, 300), colour_ramp, T) 
+# looks good!
+
+# two functions need to be sources from the smudgeplot package here
+covs_to_test <- seq(10.05, 60.05, by = 0.1)
+centrality_grid <- sapply(covs_to_test, run_replicate, smudge_tab, smudge_filtering_threshold)
+covs_to_test[which.max(centrality_grid)]
+
+sapply(c(21.71, 21.72, 21.73), run_replicate, smudge_tab, smudge_filtering_threshold)
+# 21.72 is our winner!
+
+tested_covs <- test_grid_of_coverages(smudge_tab, smudge_filtering_threshold, min_to_explore, max_to_explore)
+plot(tested_covs[, 'cov'], tested_covs[, 'centrality'])
+
+```
+
+Fixing the main package
+
+```bash
+for smu_file in  data/dicots/smu_files/*.k31_ploidy.smu.txt; do 
+    ToLID=$(basename $smu_file .k31_ploidy.smu.txt); 
+    smudgeplot.py all $smu_file -o data/dicots/grid_fits/$ToLID
+done
+
+
+```
+
+
+## Homopolymer compressed testing
+
+Datasets with lots of errors. Sacharomyces will do.
+
+```
+FastK -v -c -t4 -k31 -M16 -T4 data/Scer/SRR3265401_[12].fastq.gz -Ndata/Scer/FastK_Table_hc
+hetmers -e4 -k -v -T4 -odata/Scer/kmerpairs_hc data/Scer/FastK_Table_hc
+
+smudgeplot.py hetmers -L 4 -t 4 -o data/Scer/kmerpairs_default_e --verbose data/Scer/FastK_Table
+
+smudgeplot.py all -o data/Scer/homopolymer_e4_wo data/Scer/kmerpairs_default_e_text.smu
+
+smudgeplot.py all -o data/Scer/homopolymer_e4_with data/Scer/kmerpairs_hc_text.smu
+```
+
+## Other
+
+
+### .smu to smu.txt
+
+For the legacy `.smu` files, we have a convertor for to flat files.
+
+```bash
+gcc src_ploidyplot/smu2text_smu.c -o exec/smu2text_smu
+exec/smu2text_smu data/ddSalArbu1/ddSalArbu1.k31_ploidy.smu | less
+```
\ No newline at end of file
diff --git a/playground/alternative_fitting/alternative_plot_covA_covB.R b/playground/alternative_fitting/alternative_plot_covA_covB.R
new file mode 100644
index 0000000..be7bd1c
--- /dev/null
+++ b/playground/alternative_fitting/alternative_plot_covA_covB.R
@@ -0,0 +1,49 @@
+library(ggplot2)
+
+plot_unsquared_smudgeplot <- function(cov_tab, colour_ramp, xlim, ylim){
+    # this is the adjustment for plotting
+    # cov_tab[cov_tab$covA == cov_tab$covB, 'freq'] <- cov_tab[cov_tab$covA == cov_tab$covB, 'freq'] * 2
+    cov_tab$col = colour_ramp[1 + round(31 * cov_tab$freq / max(cov_tab$freq))]
+
+    plot(NULL, xlim = xlim, ylim = ylim,
+         xlab = 'covA',
+         ylab = 'covB', cex.lab = 1.4)
+
+    ## This might bite me in the a.., instead of taking L as an argument, I guess it from the data
+    # L = floor(min(cov_tab_daAchMill1[, 'total_pair_cov']) / 2)
+    ggplot(cov_tab, aes(x=covA, y=covB, weight = weight)) +
+    geom_bin2d() +
+    theme_bw()
+
+}
+
+real_clean <- read.table('data/Fiin/kmerpairs_k51_text.smu', col.names = c('covA', 'covB', 'freq'))
+real_clean$weight <- real_clean$freq / sum(real_clean$freq)
+
+# plot(real_clean[, 'covA'], real_clean[, 'covB'])
+
+xlim <- range(real_clean[, 'covA'])
+ylim <- range(real_clean[, 'covB'])
+
+library(smudgeplot)
+args <- list()
+args$col_ramp <- 'viridis'
+args$invert_cols <- F
+colour_ramp <- get_col_ramp(args)
+real_clean$col <- colour_ramp[1 + round(31 * real_clean$freq / max(real_clean$freq))]
+
+plot(NULL, xlim = xlim, ylim = ylim,
+     xlab = 'covA',
+     ylab = 'covB', cex.lab = 1.4)
+
+ggplot(real_clean, aes(x=covA, y=covB, weight = weight)) +
+  geom_bin2d() +
+  theme_bw()
+
+head(real_clean)
+
+
+# plotSquare <- function(row){
+#     rect(as.numeric(row['covA']) - 0.5, as.numeric(row['covB']) - 0.5, as.numeric(row['covA']) + 0.5, as.numeric(row['covB']) + 0.5, col = row['col'], border = NA)
+# }
+# apply(real_clean, 1, plotSquare)
diff --git a/playground/alternative_fitting/alternative_plotting.R b/playground/alternative_fitting/alternative_plotting.R
new file mode 100644
index 0000000..512b1a6
--- /dev/null
+++ b/playground/alternative_fitting/alternative_plotting.R
@@ -0,0 +1,48 @@
+# HM Revenue and custom Tax Return form;
+# Needs to be 2023 Jan to 2024 Jan
+
+
+
+library(smudgeplot)
+source('playground/alternative_plotting_functions.R')
+
+cov_tab_daAchMill1 <- read.table('data/dicots/smu_files/daAchMill1.k31_ploidy.smu.txt', col.names = c('covB', 'covA', 'freq'))
+ylim = c(0, 250)
+
+xlim = c(0, 0.5)
+
+
+
+cov_tab_daAchMill1[, 'total_pair_cov'] <- cov_tab_daAchMill1[, 1] + cov_tab_daAchMill1[, 2]
+cov_tab_daAchMill1[, 'minor_variant_rel_cov'] <- cov_tab_daAchMill1[, 1] / cov_tab_daAchMill1[, 'total_pair_cov']
+
+args <- list()
+args$col_ramp <- 'viridis'
+args$invert_cols <- F
+colour_ramp <- get_col_ramp(args)
+cols = colour_ramp[1 + round(31 * cov_tab_daAchMill1$freq / max(cov_tab_daAchMill1$freq))]
+
+# solving this "density" problem; cov1 cov1; have twice lower probability than cov1 cov2; we need to multiply these points, but it needs to be somehow corrected for the fit / summaries
+
+plot_dot_smudgeplot(cov_tab_daAchMill1, colour_ramp, xlim, ylim)
+
+plot_unsquared_smudgeplot(cov_tab_daAchMill1, colour_ramp, xlim, ylim)
+
+# plots the line where there will be nothing
+plot_seq_error_line(cov_tab_daAchMill1, .col = 'red')
+
+head(cov_tab_daAchMill1[order(cov_tab_daAchMill1[,'total_pair_cov']), ], 12)
+colour_ramp
+3 / 8:13
+
+####
+
+cov_tab_Fiin_ideal <- read.table('data/Fiin/idealised/kmerpairs_idealised_with_transformations.tsv', header = T)
+head(cov_tab_Fiin_ideal)
+
+xlim = c(0, 0.5)
+ylim = c(0, max(cov_tab_Fiin_ideal[, 'total_pair_cov']))
+
+plot_dot_smudgeplot(cov_tab_Fiin_ideal, colour_ramp, xlim, ylim)
+
+plot_unsquared_smudgeplot(cov_tab_Fiin_ideal, colour_ramp, xlim, ylim)
diff --git a/playground/alternative_fitting/alternative_plotting_functions.R b/playground/alternative_fitting/alternative_plotting_functions.R
new file mode 100644
index 0000000..ccd0572
--- /dev/null
+++ b/playground/alternative_fitting/alternative_plotting_functions.R
@@ -0,0 +1,119 @@
+plot_alt <- function(cov_tab, ylim, colour_ramp, logscale = F){
+    A_equals_B <- cov_tab[, 'covA'] == cov_tab[, 'covB']
+    cov_tab[A_equals_B, 'freq'] <- cov_tab[A_equals_B, 'freq'] * 2
+    if (logscale){
+        cov_tab[, 'freq'] <- log10(cov_tab[, 'freq'])
+    }
+    cov_tab$col <- colour_ramp[1 + round(31 * cov_tab[, 'freq'] / max(cov_tab[, 'freq']))]
+
+    plot(NULL, xlim = c(0, 0.5), ylim = ylim,
+         xlab = 'Normalized minor kmer coverage: B / (A + B)',
+         ylab = 'Total coverage of the kmer pair: A + B', cex.lab = 1.4)
+    min_cov_to_plot <- max(ylim[1],min(cov_tab[, 'total_pair_cov']))
+    nothing <- sapply(min_cov_to_plot:ylim[2], plot_one_coverage, cov_tab)
+    return(0)
+}
+
+plot_one_coverage <- function(cov, cov_tab){
+    cov_row_to_plot <- cov_tab[cov_tab[, 'total_pair_cov'] == cov, ]
+    width <- 1 / (2 * cov)
+    cov_row_to_plot$left <- cov_row_to_plot[, 'minor_variant_rel_cov'] - width
+    cov_row_to_plot$right <- sapply(cov_row_to_plot[, 'minor_variant_rel_cov'], function(x){ min(0.5, x + width)})
+    apply(cov_row_to_plot, 1, plot_one_box, cov)
+}
+
+plot_one_box <- function(one_box_row, cov){
+    left <- as.numeric(one_box_row['left'])
+    right <- as.numeric(one_box_row['right'])
+    rect(left, cov - 0.5, right, cov + 0.5, col = one_box_row['col'], border = NA)
+}
+
+plot_dot_smudgeplot <- function(cov_tab, colour_ramp, xlim, ylim, background_col = 'grey', cex = 0.4){
+    # this is the adjustment for plotting
+    cov_tab[cov_tab$covA == cov_tab$covB, 'freq'] <- cov_tab[cov_tab$covA == cov_tab$covB, 'freq'] * 2
+    cov_tab$col = colour_ramp[1 + round(31 * cov_tab$freq / max(cov_tab$freq))]
+
+    plot(NULL, xlim = xlim, ylim = ylim, xlab = 'Normalized minor kmer coverage: B / (A + B)',
+         ylab = 'Total coverage of the kmer pair: A + B')
+    rect(xlim[1], ylim[1], xlim[2], ylim[2], col = background_col, border = NA)
+    points(cov_tab[, 'minor_variant_rel_cov'], cov_tab[, 'total_pair_cov'], col = cov_tab$col, pch = 20, cex = cex)
+}
+
+plot_peakmap <- function(cov_tab, xlim, ylim, background_col = 'grey', cex = 2){
+    # this is the adjustment for plotting
+    plot(NULL, xlim = xlim, ylim = ylim, xlab = 'Normalized minor kmer coverage: B / (A + B)',
+         ylab = 'Total coverage of the kmer pair: A + B')
+    points(cov_tab[, 'minor_variant_rel_cov'], cov_tab[, 'total_pair_cov'], col = cov_tab$smudge, pch = 20, cex = cex)
+    legend('bottomleft', col = 1:8, pch = 20, title = 'smudge', legend = 1:8)
+}
+
+plot_seq_error_line <- function (.cov_tab, .L = NA, .col = "black") {
+    if (is.na(.L)) {
+        .L <- min(.cov_tab[, "covB"])
+    }
+    max_cov_pair <- max(.cov_tab[, "total_pair_cov"])
+    cov_range <- seq((2 * .L) - 2, max_cov_pair, length = 500)
+    lines((.L - 1)/cov_range, cov_range, lwd = 2.5, lty = 2, 
+        col = .col)
+}
+
+plot_isoA_line <- function (.covA, .L, .col = "black", .ymax = 250, .lwd, .lty) {
+    min_covB <- .L # min(.cov_tab[, 'covB']) # should be L really
+    max_covB <- .covA
+    B_covs <- seq(min_covB, max_covB, length = 500)
+    isoline_x <- B_covs/ (B_covs + .covA)
+    isoline_y <- B_covs + .covA
+    lines(isoline_x[isoline_y < .ymax], isoline_y[isoline_y < .ymax], lwd = .lwd, lty = .lty, col = .col)
+}
+
+plot_isoB_line <- function (.covB, .ymax, .col = "black", .lwd, .lty) {
+    cov_range <- seq((2 * .covB) - 2, .ymax, length = 500)
+    lines((.covB)/cov_range, cov_range, lwd = .lwd, lty = .lty, col = .col)
+}
+
+plot_iso_grid <- function(.cov, .L, .ymax, .col = 'black', .lwd = 2, .lty = 2){
+    for (i in 0:15){
+        cov <- (i + 0.5) * .cov
+        plot_isoA_line(cov, .L = .L, .ymax = .ymax, .col, .lwd = .lwd, .lty = .lty)
+        if (i < 8){
+            plot_isoB_line(cov, .ymax, .col, .lwd = .lwd, .lty = .lty)
+        }
+    }
+}
+
+plot_smudge_labels <- function(cov_est, ymax, xmax = 0.49, .cex = 1.3, .L = 4){
+    for (As in 1:(floor(ymax / cov_est) - 1)){
+        label <- paste0(As, "Aerr")
+        text(.L / (As * cov_est), (As * cov_est) + .L, label, 
+                 offset = 0, cex = .cex, xpd = T, pos = ifelse(As == 1, 3, 4))
+    }
+    for (ploidy in 2:floor(ymax / cov_est)){
+        for (Bs in 1:floor(ploidy / 2)){
+            As = ploidy - Bs
+            label <- paste0(As, "A", Bs, "B")
+            text(ifelse(As == Bs, (xmax + 0.49)/2, Bs / ploidy), ploidy * cov_est, label, 
+                 offset = 0, cex = .cex, xpd = T, 
+                 pos = ifelse(As == Bs, 2, 1))
+        }
+    }
+}
+
+create_smudge_container <- function(cov, cov_tab, smudge_filtering_threshold){
+    smudge_container <- list()
+    total_genomic_kmers <- sum(cov_tab[ , 'freq'])
+    
+    for (Bs in 1:8){
+        cov_tab_isoB <- cov_tab[cov_tab[ , 'covB'] > cov * ifelse(Bs == 1, 0, Bs - 0.5) & cov_tab[ , 'covB'] < cov * (Bs + 0.5), ]
+        # cov_tab_isoB[, 'Bs'] <- Bs
+        cov_tab_isoB[, 'As'] <- round(cov_tab_isoB[, 'covA'] / cov) #these are be individual smudges cutouts given coverages
+        cov_tab_isoB[cov_tab_isoB[, 'As'] == 0, 'As'] = 1
+        for( As in Bs:(16 - Bs)){
+            cov_tab_one_smudge <- cov_tab_isoB[cov_tab_isoB[, 'As'] == As, ]
+            if (sum(cov_tab_one_smudge[, 'freq']) / total_genomic_kmers > smudge_filtering_threshold){
+                label <- paste0(As, "A", Bs, "B") 
+                smudge_container[[label]] <- cov_tab_one_smudge[,-which(names(cov_tab_one_smudge) %in% c('is_error', 'As'))]
+            }
+        }
+    }
+    return(smudge_container)
+}
\ No newline at end of file
diff --git a/playground/alternative_fitting/alternative_plotting_testing.R b/playground/alternative_fitting/alternative_plotting_testing.R
new file mode 100644
index 0000000..598cfae
--- /dev/null
+++ b/playground/alternative_fitting/alternative_plotting_testing.R
@@ -0,0 +1,90 @@
+library(smudgeplot)
+library(argparse)
+source('playground/alternative_fitting/alternative_plotting_functions.R')
+
+parser <- ArgumentParser()
+parser$add_argument("-i", "-infile", help="Input file")
+parser$add_argument("-o", "-outfile", help="Output file")
+args <- parser$parse_args()
+
+args$col_ramp <- 'viridis'
+args$invert_cols <- F
+
+# cov_tab_daAchMill1 <- read.table('data/dicots/smu_files/daAchMill1.k31_ploidy.smu.txt', col.names = c('covB', 'covA', 'freq'))
+# cov_tab <- read.table(args$file, col.names = c('covB', 'covA', 'freq'))
+# args <- list()
+# args$i <- 'data/ddSalArbu1/ddSalArbu1.k31_ploidy_converted.smu_with_peaks.txt'
+# args$o <- 'data/ddSalArbu1/smudge_with_peaks'
+
+cov_tab <- read.table(args$i, col.names = c('covB', 'covA', 'freq','peak'))
+
+xlim = c(0, 0.5)
+ylim = c(0, 300)
+
+
+cov_tab[, 'total_pair_cov'] <- cov_tab[, 1] + cov_tab[, 2]
+cov_tab[, 'minor_variant_rel_cov'] <- cov_tab[, 1] / cov_tab[, 'total_pair_cov']
+
+colour_ramp <- viridis(32)
+colour_ramp_log <- get_col_ramp(args, 16)
+# cols = colour_ramp[1 + round(31 * cov_tab$freq / max(cov_tab$freq))]
+
+# solving this "density" problem; cov1 cov1; have twice lower probability than cov1 cov2; we need to multiply these points, but it needs to be somehow corrected for the fit / summaries
+
+plot_dot_smudgeplot(cov_tab, colour_ramp, xlim, ylim)
+
+pdf(paste0(args$o, '_background.pdf'))
+    plot_alt(cov_tab, ylim, colour_ramp, F)
+dev.off()
+
+pdf(paste0(args$o, '_log_background.pdf'))
+    plot_alt(cov_tab, ylim, colour_ramp, T)
+dev.off()
+
+pdf(paste0(args$o, '_peaks.pdf'))
+    plot_peakmap(cov_tab, xlim = xlim, ylim = ylim)
+dev.off()
+
+# plots the line where there will be nothing
+# plot_seq_error_line(cov_tab, .col = 'red')
+
+# head(cov_tab[order(cov_tab[,'total_pair_cov']), ], 12)
+# colour_ramp
+# 3 / 8:13
+
+####
+
+# cov_tab_Fiin_ideal <- read.table('data/Fiin/idealised/kmerpairs_idealised_with_transformations.tsv', header = T)
+# head(cov_tab_Fiin_ideal)
+
+# xlim = c(0, 0.5)
+# ylim = c(0, max(cov_tab_Fiin_ideal[, 'total_pair_cov']))
+
+# pdf('data/Fiin/idealised/straw_plot_test3.pdf')
+#     plot_dot_smudgeplot(cov_tab_Fiin_ideal, colour_ramp, xlim, ylim)
+# dev.off()
+
+# pdf('data/Fiin/idealised/straw_plot_test2.pdf')
+#     plot_unsquared_smudgeplot(cov_tab_Fiin_ideal, colour_ramp, xlim, ylim)
+# dev.off()
+
+# # testing the packaged version
+
+# library(smudgeplot)
+# args <- list()
+# args$col_ramp <- 'viridis'
+# args$invert_cols <- F
+# colour_ramp <- get_col_ramp(args)
+
+# cov_tab_Fiin_ideal <- read.table('data/Fiin/idealised/kmerpairs_idealised_with_transformations.tsv', header = T)
+
+# pdf('data/Fiin/idealised/straw_plot_test.pdf')
+#     plot_alt(cov_tab_Fiin_ideal, c(50, 700), colour_ramp)
+# dev.off()
+
+# source('playground/alternative_fitting/alternative_plotting_functions.R')
+
+# pdf('data/Fiin/idealised/straw_plot_test2.pdf')
+#     plot_unsquared_smudgeplot(cov_tab_Fiin_ideal, colour_ramp, c(0, 0.5), c(50, 700))
+# dev.off()
+
diff --git a/playground/alternative_fitting/pair_clustering.py b/playground/alternative_fitting/pair_clustering.py
new file mode 100644
index 0000000..1cd3eb3
--- /dev/null
+++ b/playground/alternative_fitting/pair_clustering.py
@@ -0,0 +1,84 @@
+
+# cov2freq = defualtdict(covA, covB) -> freq
+# cov2peak = dict(covA, covB) -> peak
+# dict(peak) -> summit (if relevant)
+# import numpy as np
+
+import argparse
+from pandas import read_csv # type: ignore
+from collections import defaultdict
+from statistics import mean
+# import matplotlib.pyplot as plt
+
+####
+
+parser = argparse.ArgumentParser()
+parser.add_argument('infile', nargs='?', help='name of the input tsv file with covarages and frequencies.')
+parser.add_argument('-nf', '-noise_filter', help='Do not agregate into smudge k-mer pairs with frequency lower than this parameter', type=int, default=50)
+parser.add_argument('-d', '-distance', help='Manthattan distance of k-mer pairs that are considered neioboring for the local agregation purposes.', type=int, default=5)
+parser.add_argument('--mask_errors', help='instead of reporting assignments to individual smudges, just remove all monotonically decreasing points from the error line', action="store_true", default = False)
+args = parser.parse_args()
+
+### what should be aguments at some point
+# smu_file = 'data/ddSalArbu1/ddSalArbu1.k31_ploidy_converted.smu.txt'
+# distance = 5
+# noise_filter = 100
+
+smu_file = args.infile
+distance = args.d
+noise_filter = args.nf
+
+### load data
+# cov_tab = np.loadtxt(smu_file, dtype=int)
+cov_tab = read_csv(smu_file, names = ['covB', 'covA', 'freq'], sep='\t')
+cov_tab = cov_tab.sort_values('freq', ascending = False)
+L = min(cov_tab['covB']) # important only when --mask_errors is on
+
+# generate a dictionary that gives us for each combination of coverages a frequency
+cov2freq = defaultdict(int)
+cov2peak = defaultdict(int)
+# for idx, covB, covA, freq in cov_tab.itertuples():
+#     cov2freq[(covA, covB)] = freq
+# I can create this one when I iterate though the data though
+
+# plt.hist(means)
+# plt.hist([x for x in means if x < 100 and x > -100])
+# plt.show() 
+
+### clustering
+next_peak = 1
+for idx, covB, covA, freq in cov_tab.itertuples():
+    cov2freq[(covA, covB)] = freq # with this I can get rid of lines 23 24 that pre-makes this table
+    if freq < noise_filter:
+        break
+    highest_neigbour_coords = (0, 0)
+    highest_neigbour_freq = 0
+    # for each kmer pair I will retrieve all neibours (Manhattan distance)
+    for xA in range(covA - distance,covA + distance + 1):
+        # for explored A coverage in neiborhood, we explore all possible B coordinates
+        distanceB = distance - abs(covA - xA)
+        for xB in range(covB - distanceB,covB + distanceB + 1):
+            xB, xA = sorted([xA, xB]) # this is to make sure xB is smaller than xA
+            # iterating only though those that were assigned already
+            # and recroding only the one with highest frequency
+            if cov2peak[(xA, xB)] and cov2freq[(xA, xB)] > highest_neigbour_freq:
+                highest_neigbour_coords = (xA, xB)
+                highest_neigbour_freq = cov2freq[(xA, xB)]
+    if highest_neigbour_freq > 0:
+        cov2peak[(covA, covB)] = cov2peak[(highest_neigbour_coords)]
+    else:
+        # print("new peak:", (covA, covB))
+        if args.mask_errors:
+            if covB < L + args.d:
+                cov2peak[(covA, covB)] = 1 # error line
+            else:
+                cov2peak[(covA, covB)] = 0 # central smudges
+        else:
+            cov2peak[(covA, covB)] = next_peak # if I want to keep info about all locally agregated smudges
+            next_peak += 1
+
+cov_tab = cov_tab.sort_values(['covA', 'covB'], ascending = True)
+for idx, covB, covA, freq in cov_tab.itertuples():
+    print(covB, covA, freq, cov2peak[(covA, covB)])
+    # if idx > 20:
+    #     break
diff --git a/playground/fooling_around_impossible_locations.R b/playground/fooling_around_impossible_locations.R
deleted file mode 100644
index f27b829..0000000
--- a/playground/fooling_around_impossible_locations.R
+++ /dev/null
@@ -1,113 +0,0 @@
-####### SAMPLE
-
-library(smudgeplot)
-
-cov_tab_sample <- read.table('data/testing_Scer.tsv', col.names = c('covB', 'covA', 'freq'))
-cov_tab_sample[, 'total_pair_cov'] <- cov_tab_sample[, 1] + cov_tab_sample[, 2]
-cov_tab_sample[, 'minor_variant_rel_cov'] <- cov_tab_sample[, 1] / cov_tab_sample[, 'total_pair_cov']
-
-xlim <- c(0, 0.5)
-ylim <- c(0, 150)
-nbins <- 40
-
-smudge_container_sample <- get_smudge_container(cov_tab_sample, nbins, xlim, ylim)
-
-args <- list()
-args$col_ramp <- "viridis"
-args$invert_cols <- FALSE
-colour_ramp <- get_col_ramp(args) # get the default colour ramp (Spectral, 11)
-
-plot_smudgeplot(smudge_container_sample, 15.5, colour_ramp)
-
-######## SCER
-
-cov_tab <- read.table("data/Scer/PloidyPlot3_text.smu", col.names = c('covB', 'covA', 'freq')) #nolint
-cov_tab[, 'total_pair_cov'] <- cov_tab[, 1] + cov_tab[, 2]
-cov_tab[, 'minor_variant_rel_cov'] <- cov_tab[, 1] / cov_tab[, 'total_pair_cov']
-
-smudge_container <- get_smudge_container(cov_tab, nbins, xlim, ylim)
-plot_smudgeplot(smudge_container, 15.5, colour_ramp)
-rect(0.4375, smudge_container$y[6], 0.4500, smudge_container$y[7], col = 'white')
-rect(0, 15.5*2, 0.500, 15.5*2, col = 'grey')
-rect(0.49, 0, 0.49, 475, col = 'grey')
-rect(0.4935, 0, 0.49, 475, col = 'grey')
-
-########
-
-L <- 12 
-y_min <- 22.50  
-y_max <- 26.25
-
-
-bracket_size <- diff(smudge_container$y)[1]
-
-allowed_ranges <- lapply(smudge_container$y, function(ymin){if(ymin + bracket_size <= 2*L){ return(c()) }; seq(max(ceiling(ymin), 2*L), ymin + bracket_size, by = 1)})
-
-# this will generate a table of all possible k-mer combinations of covA and covB
-impossible_ratios <- function(allowed_range, L, x_brackets){
-  impossible <- rep(TRUE, length(smudge_container$x))
-  if (is.null(allowed_range)) {
-    return (impossible)
-  }
-  minicov_tab <- merge(L:floor(max(allowed_range) / 2), (ceiling(max(allowed_range) / 2):(max(allowed_range) - L)))
-  colnames(minicov_tab) <- c("covB", "covA")
-  minicov_tab <- minicov_tab[minicov_tab[, 'covB'] <= minicov_tab[, 'covA'], ] 
-  minicov_tab[, 'cov_ratio'] <- minicov_tab[, 'covB'] / (minicov_tab[, 'covB'] + minicov_tab[, 'covA'])
-  minicov_tab[, 'bracket'] <- findInterval(minicov_tab[, 'cov_ratio'], x_brackets, left.open = TRUE)
-  xcoords <- unique(minicov_tab[, 'bracket'])  
-  impossible[xcoords] <- FALSE
-  return (impossible)
-}
-
-rect(0.4375, smudge_container$y[6], 0.4500, smudge_container$y[7], col = 'white')
-
-rect(0, 0, 0.5, 10, col = 'white')
-rect(0, 0, 0.05, 250, col = 'white')
-
-allowed_range <- allowed_ranges[[8]]
-impossible_ratios(allowed_ranges[[7]], L, smudge_container$x)
-impossible_ratios(allowed_ranges[[8]], L, smudge_container$x)
-matrix_of_impossibles <- sapply(allowed_ranges, impossible_ratios, L, smudge_container$x)
-
-smudge_container <- get_smudge_container(cov_tab, nbins, xlim, ylim)
-smudge_container$z[matrix_of_impossibles] <- NA
-plot_smudgeplot(smudge_container, 15.5, colour_ramp)
-
-
-L <- 12
-cov_tab_mini <- merge(c(L:20), c(L:20))
-colnames(cov_tab_mini) <- c("covA", "covB")
-cov_tab_mini <- cov_tab_mini[cov_tab_mini$covB <= cov_tab_mini$covA, ] 
-# now cov_tab_mini is a realistic cov table
-
-cov_tab_mini$cov_sum <- cov_tab_mini$covB + cov_tab_mini$covA
-cov_tab_mini$cov_ratio <- cov_tab_mini$covB / cov_tab_mini$cov_sum
-
-cov_tab_mini_first_covsum_bracket <- cov_tab_mini[cov_tab_mini$cov_sum %in% allowed_range, ]
-table(cov_tab_mini_first_covsum_bracket$cov_ratio)
-
-
-library(smudgeplot)
-
-args <- ArgumentParser()$parse_args()
-args$output <- "data/Scer/sploidyplot_test"
-args$nbins <- 40
-args$kmer_size <- 21
-args$homozygous <- FALSE
-args$L <- c()
-args$col_ramp <- 'viridis'
-args$invert_cols <- TRUE
-
-
-  L <- 12
-  smudge_container$x
-  smudge_container$y
-  is_possible <- function(x_min, x_max, y_min, y_max, L){
-    y_min <- 22.50  
-    y_max <- 26.25
-    seq(ceiling(y_min), y_max, by = 1)
-    x_min <- 0.4500
-    x_max <- 0.4625
-    if (x_max < L / y_max)
-  }
-
diff --git a/playground/interactive_plot_strawberry_full_kmer_families_fooling_around.R b/playground/interactive_plot_strawberry_full_kmer_families_fooling_around.R
index 0525d32..6dc3211 100644
--- a/playground/interactive_plot_strawberry_full_kmer_families_fooling_around.R
+++ b/playground/interactive_plot_strawberry_full_kmer_families_fooling_around.R
@@ -1,7 +1,7 @@
 library("methods")
 library("argparse")
 library("smudgeplot")
-library("hexbin")
+# library("hexbin")
 
 # preprocessing
 # to get simply number of memebers / family (exploration)
@@ -13,159 +13,11 @@ library("hexbin")
 
 args <- ArgumentParser()$parse_args()
 args$homozygous <- F
-args$input <- './data/strawberry_iinumae/kmer_counts_L109_U.tsv'
-args$output = './data/strawberry_iinumae/straw'
+args$input <- 'data/Fiin/kmerpairs_k51_text.smu'
+args$output = './data/Fiin/testrun'
 args$title = 'F. iinumae'
 args$nbins <- 40
 args$L <- NULL
 args$n_cov <- NULL
 args$k <- 21
 
-family_members <- table(read.table('data/strawberry_iinumae/kmer_counts_L109_U_family_members.tsv')$V1)
-family_members / sum(family_members)
-
-#          2          3          4          5          6          7          8 
-# 0.60463486 0.18252481 0.08881352 0.05260407 0.03399928 0.02262155 0.01480190 
-
-tranf_cov <- read.table('data/strawberry_iinumae/kmer_counts_L109_U_sums_min_max.tsv', col.names = c('sum', 'min', 'max'))
-
-iterative_nbins <- T
-
-smudge_summary <- list()
-
-pairs <- c(tranf_cov$min + tranf_cov$max == tranf_cov$sum)
-cov <- tranf_cov[pairs,c(2,3)]
-minor_variant_rel_cov <- cov$min / (cov$min + cov$max)
-total_pair_cov <- cov$min + cov$max
-
-plot_smudge <- function(minor_variant_rel_cov, total_pair_cov, add_to_title = ''){
-	L <- ifelse( length(args$L) == 0, min(total_pair_cov) / 2, args$L)
-	smudge_summary$n_subset_est <- 144
-	draft_n <- ifelse(length(args$n_cov) == 0, smudge_summary$n_subset_est, args$n_cov)
-
-	ymax <- min(10*draft_n, max(total_pair_cov))
-	ymin <- min(total_pair_cov) - 1
-
-	dulpicit_structures <- T
-	repeat {
-	    smudge_container <- get_smudge_container(minor_variant_rel_cov, total_pair_cov, .nbins = args$nbins, .ylim = c(ymin, ymax))
-	    peak_points <- peak_agregation(smudge_container)
-	    peak_sizes <- get_peak_summary(peak_points, smudge_container, 0.02)
-
-	    the_smallest_n <- min(get_trinoploid_1n_est(peak_sizes), draft_n)
-	    smudge_summary$n_peak_est <- estimate_1n_coverage_highest_peak(peak_sizes, minor_variant_rel_cov, total_pair_cov, the_smallest_n)
-
-	    smudge_summary$n <- ifelse(length(args$n_cov) == 0, smudge_summary$n_peak_est, args$n_cov)
-
-	    peak_sizes$structure <- apply(peak_sizes, 1, function(x){ guess_genome_structure(x, smudge_summary$n)})
-
-	    dulpicit_structures <- any(table(peak_sizes$structure) > 1)
-	    if(dulpicit_structures & iterative_nbins){
-	        if(args$nbins > 20){
-	            args$nbins <- args$nbins - 5
-	        } else {
-	            args$nbins <- args$nbins - 2
-	        }
-	        smudge_warn(args$output, "detecting two smudges at the same positions, not enough data for this number of bins lowering number of bins to ", args$nbins)
-	    } else {
-	        break
-	    }
-	}
-
-	peak_sizes$corrected_minor_variant_cov <- sapply(peak_sizes$structure, function(x){round(mean(unlist(strsplit(x, split = '')) == 'B'), 2)})
-	peak_sizes$ploidy <- sapply(peak_sizes$structure, nchar)
-
-
-	peak_sizes$rel_size <- peak_sizes$rel_size / sum(peak_sizes$rel_size)
-	peak_sizes <- peak_sizes[order(peak_sizes$rel_size, decreasing = T),]
-	smudge_summary$peak_sizes <- peak_sizes
-
-	considered_ploidies <- unique(peak_sizes$ploidy)
-	ploidy_with_most_smudges <- which.max(sapply(considered_ploidies, function(x){ sum(peak_sizes[peak_sizes$ploidy == x,'rel_size']) }) )
-	smudge_summary$genome_ploidy <- considered_ploidies[ploidy_with_most_smudges]
-
-	generate_summary(args, smudge_summary)
-	fig_title <- paste(args$title, add_to_title)
-
-	layout(matrix(c(2,4,1,3), 2, 2, byrow=T), c(3,1), c(1,3))
-	# 1 smudge plot
-	plot_smudgeplot(smudge_container, smudge_summary$n, colour_ramp)
-	plot_expected_haplotype_structure(smudge_summary$n, peak_sizes, T, xmax = max(smudge_container$x))
-	# 2,3 hist
-	plot_histograms(minor_variant_rel_cov, total_pair_cov,
-	                ymax, smudge_summary, args$nbins, fig_title)
-	# 4 legend
-	plot_legend(smudge_container, colour_ramp)
-}
-
-plot_smudge(minor_variant_rel_cov, total_pair_cov, 'just pairs')
-## OK, the C++ software kind of works,
-# but the smudgeplot of the families of two DOES NOT look exactly the same as when produced by python kmer pairs script
-# it's hard to say from here where is the difference comes from, but... it remains true that the smudgeplots are different
-
-
-# Now, let look at the non diploid ones
-cov <- tranf_cov[!pairs,]
-minor_variant_rel_cov <- cov$min / cov$sum
-total_pair_cov <- cov$sum
-
-pdf('figures/multi_family_covsum_hist.pdf')
-	plot_hist <- hist(total_pair_cov, breaks = 2000, xlim = c(0,5000))
-	for(i in c(4, 6,8,10, 16,32) * 144){
-		lines(c(i,i), c(0, 100000), col = 'red')
-		text(i, max(plot_hist$counts), labels = i / 144, pos = 4)
-	}
-dev.off()
-
-pdf('figures/multi_family_smudgeplot.pdf')
-	plot_smudge(minor_variant_rel_cov, total_pair_cov, 'only_multi_families')
-dev.off()
-
-minor_variant_rel_cov <- cov$max / cov$sum
-total_pair_cov <- cov$sum
-
-low_cov_data <- c(total_pair_cov < 1300)
-minor_variant_rel_cov <- minor_variant_rel_cov[low_cov_data]
-total_pair_cov <- total_pair_cov[low_cov_data]
-
-# Create hexbin object and plot
-h <- hexbin(data.frame(max_rel_cov = minor_variant_rel_cov,total_pair_cov))
-
-pdf('figures/multi_family_max_rel_cov.pdf')
-	plot(h, colramp=viridis)
-dev.off()
-#plot_smudge(minor_variant_rel_cov, total_pair_cov, 'only_multi_families')
-
-tranf_cov$memebers <- read.table('data/strawberry_iinumae/kmer_counts_L109_U_family_members.tsv')$V1
-cov <- tranf_cov[tranf_cov$memebers == 3,]
-
-cov$mid <- cov$sum - (cov$min + cov$max)
-
-cov$min <- round(cov$min / 144)
-cov$mid <- round(cov$mid / 144)
-cov$max <- round(cov$max / 144)
-
-tri_family_tab <- table(paste(cov$max, cov$mid, cov$min))
-
-tri_represented <- tri_family_tab[tri_family_tab > 5000]
-tri_family_overview <- data.frame(name = names(tri_represented), count = as.numeric(tri_represented), stringsAsFactors = F)
-
-tri_family_overview$As <- sapply(strsplit(tri_family_overview$name, ' '), function(x)( as.numeric(x[1]) ) )
-tri_family_overview$Bs <- sapply(strsplit(tri_family_overview$name, ' '), function(x)( as.numeric(x[2]) ) )
-tri_family_overview$Cs <- sapply(strsplit(tri_family_overview$name, ' '), function(x)( as.numeric(x[3]) ) )
-
-tri_family_overview$ploidy <- rowSums(tri_family_overview[,c('As', 'Bs', 'Cs')])
-tri_family_overview$smudge <- paste0(sapply(tri_family_overview$As, function(x) { paste0(rep('A', x), collapse = '') }),
-									 sapply(tri_family_overview$Bs, function(x) { paste0(rep('B', x), collapse = '') }),
-									 sapply(tri_family_overview$Cs, function(x) { paste0(rep('C', x), collapse = '') }))
-
-tri_family_overview <- tri_family_overview[order(tri_family_overview$ploidy),]
-tri_barplot <- tri_family_overview$count
-tri_spaces <- c(0.5, ifelse(tri_family_overview$ploidy[-1] == tri_family_overview$ploidy[-length(tri_barplot)], 0.2, 0.5))
-names(tri_barplot) <- tri_family_overview$ploidy
-annot <- tri_family_overview$count > 5e4
-
-pdf('figures/triallelic_smudge_sizes.pdf')
-	bar_pos <- barplot(tri_barplot, space = tri_spaces)
-	text(bar_pos[annot], tri_family_overview$count[annot], tri_family_overview$smudge[annot])
-dev.off()
\ No newline at end of file
diff --git a/playground/popart.R b/playground/popart.R
new file mode 100644
index 0000000..dbd52db
--- /dev/null
+++ b/playground/popart.R
@@ -0,0 +1,56 @@
+library(smudgeplot)
+
+args <- ArgumentParser()$parse_args()
+args$output <- "data/Scer/sploidyplot_test"
+args$nbins <- 40
+args$kmer_size <- 21
+args$homozygous <- FALSE
+args$L <- c()
+args$col_ramp <- 'viridis'
+args$invert_cols <- TRUE
+
+cov_tab <- read.table("data/Scer/PloidyPlot3_text.smu", col.names = c('covB', 'covA', 'freq'), skip = 2) #nolint
+cov_tab[, 'total_pair_cov'] <- cov_tab[, 1] + cov_tab[, 2]
+cov_tab[, 'minor_variant_rel_cov'] <- cov_tab[, 1] / cov_tab[, 'total_pair_cov']
+
+cov_tab_1n_est <- round(estimate_1n_coverage_1d_subsets(cov_tab), 1)
+
+xlim <- c(0, 0.5)
+# max(total_pair_cov); 10*draft_n
+ylim <- c(0, 150)
+nbins <- 40
+
+smudge_container <- get_smudge_container(cov_tab, nbins, xlim, ylim)
+smudge_container$z <- smudge_container$dens
+
+plot_popart <- function(cov_tab, ylim, colour_ramp){
+    A_equals_B <- cov_tab[, 'covA'] == cov_tab[, 'covB']
+    cov_tab[A_equals_B, 'freq'] <- cov_tab[A_equals_B, 'freq'] * 2
+    cov_tab$col <- colour_ramp[1 + round(31 * cov_tab[, 'freq'] / max(cov_tab[, 'freq']))]
+
+    plot(NULL, xlim = c(0.1, 0.5), ylim = ylim, xaxt = "n", yaxt = "n", xlab = '', ylab = '', bty = 'n')
+    min_cov_to_plot <- max(ylim[1],min(cov_tab[, 'total_pair_cov']))
+    nothing <- sapply(min_cov_to_plot:ylim[2], plot_one_coverage, cov_tab)
+    return(0)
+}
+
+par(mfrow = c(2, 5))
+
+args$col_ramp <- "viridis"
+args$invert_cols <- FALSE
+colour_ramp <- get_col_ramp(args) # get the default colour ramp (Spectral, 11)
+# plot_smudgeplot(smudge_container, 15.5, colour_ramp)
+plot_popart(cov_tab, c(20, 120), colour_ramp)
+
+args$invert_cols <- TRUE
+colour_ramp <- get_col_ramp(args) # get the default colour ramp (Spectral, 11)
+# plot_smudgeplot(smudge_container, 15.5, colour_ramp)
+plot_popart(cov_tab, c(20, 120), colour_ramp)
+
+
+for (ramp in c("grey.colors", "magma", "plasma", "mako", "inferno", "rocket", "heat.colors", "cm.colors")){
+    args$col_ramp <- ramp
+    colour_ramp <- get_col_ramp(args) # get the default colour ramp (Spectral, 11)
+    plot_popart(cov_tab, c(20, 120), colour_ramp)
+}
+
diff --git a/tests/README.md b/tests/README.md
index 8880ac0..445b730 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -4,46 +4,61 @@ This is a place for manual tests that have not been automated (yet?).
 Don't forget to re-install the package/script before execution. Somehting like
 
 ```
-Rscript install.R
-install -C exec/smudgeplot /usr/local/bin
-install -C exec/smudgeplot_plot.R /usr/local/bin
+make install INSTALL_PREFIX=~
 ```
 
 should do the job.
 
 #### interface tests
 
-##### smudgeplot plot
-
-```
-./exec/smudgeplot plot tests/data/toy_middle_coverages.tsv -o tests/data/toy_middle_R -n 100 -nbins 20
-```
-
-generates a smudgeplot from the toy data.
+##### data prep
 
-##### smudgeplot hetkmers
+Download `SRR3265401` - nice teteraploid Sacharomyces run I use often for testing.
 
-two different methods to extract homologous kmers
+##### smudgeplot plot
 
-###### All
+Defaults:
 
 ```
-./exec/smudgeplot hetkmers -o tests/data/toy_all tests/data/toy_kmer_k21.dump
+smudgeplot.py all data/Scer/kmerpairs_default_e2_text.smu -o data/Scer/240918_trial
 ```
 
-###### Middle
+Testing parameters:
 
 ```
-./exec/smudgeplot hetkmers -o tests/data/toy_middle tests/data/toy_kmer_k21.dump --middle
+smudgeplot.py all data/Scer/kmerpairs_default_e2_text.smu -o data/Scer/240918_trial_params -t "Species 1" -c 20 -ylim 80 -col_ramp magma --invert_cols
 ```
 
-generates two files `tests/data/toy_middle_coverages.tsv` and `tests/data/toy_middle_sequences.tsv` with coverages and sequences of kmer pairs.
+##### smudgeplot hetkmers
+
+two different methods to extract homologous kmers
+
+TODO
 
+##### Dicots
 
-##### smudgeplot kmer extraction
+This is a large dataset of the first 540 dicot genomes sequenced by the Tree of Life. Some of them are completed, some of them are with insufficient coverage or otherwise qc failed data. The idea here is to be able to tell those apart, get reasonable defaults so the generated plot is meaningful for a reasonable number (i.e. nearly all) of them.
 
 ```
-exec/smudgeplot.py extract -cov tests/data/toy_all_coverages.tsv -seq tests/data/toy_all_sequences.tsv   -minc 400 -maxc 410 -minr 0.49 -maxr 0.5 | head
+time ./exec/smudgeplot.py plot data/dicots/smu_files/daAchMill1.k31_ploidy.smu.txt -o data/dicots/alt_plots/daAchMill1 --alt_plot -q 0.9
 ```
 
-would extract 80 lines made of 20 kmers pairs, but showing only the first ten.
\ No newline at end of file
+```bash
+for smu in data/dicots/smu_files/*.smu.txt; do
+    species=$(basename $smu)
+    echo $species $smu
+    time ./exec/smudgeplot.py plot $smu -o data/dicots/alt_plots/$species --alt_plot -q 0.9
+done
+
+for smu in data/dicots/smu_files/*.smu.txt; do
+    species=$(basename $smu .smu.txt)
+    echo $species $smu
+    time ./exec/smudgeplot.py plot $smu -c 10 -o data/dicots/alt_plots_c10/$species --alt_plot -q 0.9
+done
+
+for smu in $(ls data/dicots/smu_files/*.smu.txt | head -20); do
+    species=$(basename $smu .smu.txt)
+    echo $species $smu
+    smudgeplot.py all $smu -t $species -o data/dicots/automated_smudgeplots/$species
+done
+```
\ No newline at end of file
diff --git a/tests/testthat.R b/tests/testthat.R
deleted file mode 100644
index c687ba7..0000000
--- a/tests/testthat.R
+++ /dev/null
@@ -1,4 +0,0 @@
-library(testthat)
-library(smudgeplot)
-
-test_package("smudgeplot")
\ No newline at end of file
diff --git a/tests/testthat/fmlo_smaple_cov_file.tsv b/tests/testthat/fmlo_smaple_cov_file.tsv
deleted file mode 100644
index 68f82c0..0000000
--- a/tests/testthat/fmlo_smaple_cov_file.tsv
+++ /dev/null
@@ -1,1000 +0,0 @@
-203	452
-199	439
-390	484
-235	482
-229	334
-220	407
-197	394
-183	192
-195	211
-633	728
-193	552
-188	188
-197	414
-177	409
-217	455
-197	402
-181	359
-218	396
-244	249
-185	225
-217	409
-197	431
-231	242
-167	408
-215	379
-206	396
-198	637
-195	468
-398	635
-191	465
-178	193
-247	440
-203	424
-196	367
-185	434
-222	1409
-234	1083
-202	406
-212	433
-203	409
-412	526
-558	720
-195	412
-216	454
-204	411
-251	689
-242	475
-178	181
-178	580
-438	686
-166	212
-191	412
-208	452
-174	355
-221	634
-494	508
-202	351
-162	377
-229	445
-517	611
-183	384
-252	437
-199	397
-452	693
-172	202
-640	647
-223	379
-234	539
-421	454
-202	387
-222	785
-422	458
-213	233
-404	604
-264	465
-562	873
-201	472
-608	1263
-232	496
-256	358
-244	406
-222	436
-220	393
-178	447
-447	521
-198	609
-506	905
-204	205
-457	615
-209	402
-171	307
-218	429
-223	230
-349	416
-242	818
-211	414
-197	394
-180	383
-200	372
-151	348
-220	413
-622	642
-155	403
-193	475
-238	422
-171	365
-137	208
-200	374
-234	435
-201	366
-229	404
-200	404
-206	452
-177	407
-227	306
-213	481
-219	449
-193	301
-134	624
-183	560
-228	487
-190	399
-194	704
-183	388
-168	209
-202	415
-571	662
-175	373
-225	417
-203	367
-199	249
-206	426
-240	818
-145	282
-219	432
-165	204
-235	545
-195	240
-172	395
-151	722
-270	399
-226	242
-161	324
-217	438
-234	988
-216	640
-182	432
-224	396
-707	1331
-198	223
-214	404
-219	265
-213	407
-402	674
-178	215
-193	705
-190	262
-199	424
-178	195
-218	439
-232	526
-177	404
-207	441
-382	690
-221	227
-215	462
-140	285
-225	590
-215	402
-218	438
-257	435
-210	260
-202	443
-243	392
-202	405
-195	452
-192	361
-187	199
-231	450
-211	444
-225	471
-234	411
-216	580
-211	701
-158	353
-208	377
-138	235
-174	492
-222	419
-234	414
-397	540
-160	381
-214	420
-197	381
-199	399
-221	575
-577	704
-210	249
-166	355
-204	1379
-224	431
-174	398
-176	1405
-268	489
-377	621
-195	603
-226	1373
-209	604
-211	415
-222	495
-129	417
-190	397
-195	896
-228	604
-693	1236
-204	229
-195	617
-234	428
-245	402
-190	426
-146	484
-196	390
-187	440
-174	372
-225	478
-175	367
-565	594
-381	473
-210	455
-209	453
-1108	1452
-196	414
-207	369
-472	918
-220	470
-192	394
-239	446
-200	1252
-211	216
-187	470
-179	460
-164	370
-222	397
-194	396
-180	361
-409	753
-630	706
-407	671
-180	400
-579	649
-216	224
-235	253
-180	421
-638	641
-212	217
-227	400
-209	416
-234	393
-202	385
-244	451
-220	459
-202	424
-177	416
-191	371
-195	205
-216	435
-215	457
-343	747
-233	468
-198	464
-193	203
-220	479
-217	246
-219	505
-203	219
-257	749
-219	627
-187	388
-228	498
-177	650
-691	755
-1057	1075
-600	645
-188	336
-208	397
-198	395
-234	421
-209	447
-258	451
-171	359
-208	446
-207	381
-245	442
-189	331
-243	678
-203	402
-217	419
-179	295
-177	371
-188	424
-203	358
-192	413
-230	454
-237	641
-474	496
-204	428
-170	392
-207	865
-225	662
-420	625
-196	367
-267	454
-440	1428
-167	362
-193	1169
-208	447
-224	451
-249	468
-233	556
-235	1044
-278	445
-192	405
-592	639
-178	354
-409	582
-170	464
-427	833
-375	660
-171	383
-193	195
-443	618
-209	234
-190	205
-202	425
-537	584
-190	370
-219	424
-187	493
-195	196
-575	692
-247	359
-187	574
-228	382
-462	1467
-177	402
-248	390
-435	874
-182	379
-340	547
-215	417
-363	598
-171	704
-225	456
-198	415
-201	537
-203	404
-155	307
-211	381
-169	519
-392	478
-205	635
-257	477
-205	216
-178	388
-214	376
-204	425
-306	457
-191	327
-215	431
-575	1101
-587	601
-162	374
-285	459
-231	472
-200	676
-203	816
-609	623
-177	372
-214	428
-335	613
-189	710
-370	963
-221	237
-164	184
-220	410
-405	578
-203	403
-243	509
-195	347
-208	451
-193	433
-208	398
-197	424
-208	401
-224	434
-156	363
-175	441
-176	183
-253	420
-203	637
-191	416
-207	413
-228	362
-203	408
-226	683
-141	387
-160	377
-194	420
-229	440
-181	388
-226	419
-195	536
-206	579
-217	587
-201	361
-498	631
-170	181
-424	673
-204	431
-229	459
-185	416
-216	445
-192	381
-164	246
-232	446
-174	189
-195	440
-211	379
-202	369
-205	376
-456	1056
-481	553
-219	375
-372	588
-206	255
-139	197
-276	402
-1115	1486
-191	437
-178	580
-220	626
-247	433
-209	212
-392	431
-172	389
-163	557
-210	390
-398	944
-187	352
-258	446
-203	204
-202	378
-166	210
-405	499
-242	416
-179	377
-217	626
-223	510
-165	344
-664	735
-231	652
-219	438
-232	498
-170	184
-234	387
-743	750
-190	473
-213	1248
-230	486
-163	434
-608	621
-215	365
-365	659
-195	409
-187	430
-596	670
-247	454
-224	386
-199	408
-235	473
-220	476
-299	1189
-207	473
-215	432
-279	466
-177	191
-331	624
-372	416
-441	862
-191	699
-213	388
-225	306
-188	380
-180	356
-197	222
-194	230
-218	446
-562	722
-218	470
-240	424
-626	649
-228	536
-199	390
-226	589
-254	451
-200	220
-237	418
-190	195
-212	480
-228	399
-198	648
-222	508
-217	244
-202	364
-198	428
-236	485
-402	442
-207	762
-248	435
-241	434
-398	670
-396	450
-216	412
-201	463
-185	406
-234	472
-170	359
-196	395
-245	446
-221	424
-214	508
-159	179
-133	278
-212	240
-240	700
-197	429
-238	430
-231	432
-229	429
-198	422
-195	442
-198	443
-188	209
-410	685
-234	457
-250	466
-220	518
-167	544
-170	203
-476	494
-208	405
-280	316
-199	818
-207	468
-232	499
-219	442
-185	441
-419	825
-210	652
-186	375
-194	433
-202	416
-180	386
-177	403
-236	460
-186	394
-697	698
-218	657
-189	406
-220	440
-208	648
-631	642
-198	316
-241	483
-181	397
-201	650
-188	375
-191	201
-178	430
-241	456
-217	397
-223	445
-197	513
-176	574
-523	602
-190	472
-171	1047
-178	1136
-181	430
-221	437
-191	812
-454	772
-201	416
-206	382
-369	609
-247	484
-204	440
-197	377
-572	624
-208	220
-216	431
-218	432
-213	464
-340	1307
-200	448
-215	676
-188	359
-194	201
-181	390
-195	434
-196	407
-192	400
-208	373
-185	340
-214	425
-182	437
-187	627
-207	459
-163	278
-190	565
-188	401
-172	371
-219	469
-645	863
-397	808
-711	846
-208	441
-191	411
-197	373
-197	378
-170	265
-194	423
-207	436
-197	431
-447	776
-228	660
-551	613
-210	423
-590	726
-190	190
-193	405
-182	362
-226	399
-219	471
-228	237
-674	704
-214	1364
-491	495
-424	701
-199	225
-473	927
-211	392
-202	782
-206	387
-233	689
-448	730
-153	297
-140	349
-192	388
-212	327
-218	612
-254	422
-194	386
-202	438
-199	419
-233	350
-169	386
-185	512
-178	433
-442	618
-383	703
-359	600
-206	351
-209	251
-195	401
-395	627
-177	331
-261	594
-179	577
-315	420
-208	455
-205	462
-168	770
-169	424
-185	848
-423	627
-181	206
-217	429
-183	533
-226	390
-204	386
-183	300
-196	416
-192	220
-186	935
-164	369
-211	365
-697	878
-263	1129
-200	368
-627	909
-194	592
-191	339
-197	344
-209	453
-180	408
-247	723
-222	438
-514	563
-251	668
-236	599
-231	428
-218	432
-213	422
-208	764
-418	638
-175	398
-194	268
-210	433
-238	480
-190	369
-526	568
-147	238
-224	233
-220	439
-190	513
-194	206
-220	374
-214	426
-301	389
-194	217
-186	192
-160	507
-225	428
-183	375
-198	386
-224	466
-312	637
-165	196
-205	415
-423	512
-241	446
-181	226
-176	430
-218	427
-397	1136
-261	386
-213	469
-217	257
-648	684
-213	381
-207	360
-210	439
-238	454
-166	823
-203	238
-206	385
-183	484
-194	408
-236	466
-427	841
-171	424
-203	901
-211	425
-190	217
-192	497
-227	439
-228	617
-215	458
-579	654
-404	1274
-203	398
-195	375
-224	419
-230	518
-204	398
-268	895
-190	375
-203	598
-187	356
-227	454
-228	391
-229	409
-391	862
-542	604
-389	496
-166	184
-196	472
-239	250
-235	464
-613	1203
-206	987
-231	427
-464	555
-232	454
-399	678
-186	395
-177	327
-193	216
-179	416
-628	1070
-161	364
-182	437
-141	497
-195	410
-177	378
-560	605
-248	470
-405	670
-192	409
-594	656
-223	403
-206	437
-178	564
-216	401
-245	362
-205	463
-145	211
-216	255
-211	340
-210	418
-207	449
-211	389
-591	1231
-223	382
-213	215
-126	175
-381	456
-218	1003
-209	412
-231	444
-223	425
-131	227
-495	628
-187	190
-188	349
-180	483
-200	375
-179	383
-355	938
-203	387
-219	380
-178	433
-415	437
-200	233
-197	442
-218	405
-217	494
-218	229
-374	790
-188	421
-201	218
-628	682
-530	567
-215	442
-195	404
-241	452
-461	614
-217	361
-183	206
-185	401
-182	183
-234	408
-218	431
-404	1193
-576	621
-226	405
-218	471
-179	383
-169	377
-187	448
-189	387
-238	376
-220	423
-178	445
-183	417
-191	383
-232	442
-575	1143
-212	238
-414	640
-247	248
-230	323
-123	409
-199	951
-228	401
-166	432
-196	973
-227	386
-146	398
-214	610
-213	432
-343	399
-188	611
-238	690
-191	369
-169	215
-208	379
-168	369
-189	463
-190	434
-264	974
-191	649
-185	359
-167	205
-186	414
-184	653
-227	267
-430	626
-227	429
-201	398
-175	359
-191	203
-205	384
-257	467
-210	393
-260	520
-483	499
-251	478
-240	474
-202	410
-207	367
-222	425
-661	892
-192	1031
-231	507
-461	623
-262	412
-206	413
-190	470
-224	448
-196	211
-167	779
-191	405
-221	381
-383	570
-160	180
-173	455
-413	557
-229	423
-619	709
-206	383
-205	469
-218	634
-200	373
-180	388
-189	432
-201	221
-279	492
-830	833
-227	240
-208	441
-236	374
-224	448
-241	605
-165	414
-186	352
-394	887
-192	544
-181	189
-232	538
-216	404
-188	466
-191	1109
-660	668
-180	424
-215	463
-204	400
-429	864
-365	623
-276	537
-194	348
-174	379
-185	368
-540	592
-201	417
-196	204
-178	189
-208	459
-155	376
-224	234
-630	636
-173	373
-231	376
-193	346
-202	389
-220	226
-609	902
-190	204
-155	192
-197	645
-371	794
-210	461
-255	471
-243	403
-223	389
-192	407
-174	401
-400	789
-544	602
diff --git a/tests/testthat/test-estimate_1n_coverage_1d_subsets.R b/tests/testthat/test-estimate_1n_coverage_1d_subsets.R
deleted file mode 100644
index dda6eb8..0000000
--- a/tests/testthat/test-estimate_1n_coverage_1d_subsets.R
+++ /dev/null
@@ -1,18 +0,0 @@
-context("1d subsets fitting")
-
-cov <- read.table('fmlo_smaple_cov_file.tsv')
-
-minor_variant_rel_cov <- cov$V1 / (cov$V1 + cov$V2)
-total_pair_cov <- cov$V1 + cov$V2
-
-high_cov_filt <- quantile(total_pair_cov, 0.99) > total_pair_cov
-minor_variant_rel_cov <- minor_variant_rel_cov[high_cov_filt]
-total_pair_cov <- total_pair_cov[high_cov_filt]
-
-draft_n <- round(estimate_1n_coverage_1d_subsets(total_pair_cov, minor_variant_rel_cov), 1)
-expected_n <- 207 # from genomescope
-
-test_that("approximate coverage estimation", {
-        expect_true(abs(draft_n - expected_n) < 10)
-    }
-)
diff --git a/tests/testthat/test-fitting.R b/tests/testthat/test-fitting.R
deleted file mode 100644
index 708f5cd..0000000
--- a/tests/testthat/test-fitting.R
+++ /dev/null
@@ -1,46 +0,0 @@
-context("integration")
-
-cov <- read.table('fmlo_smaple_cov_file.tsv')
-
-minor_variant_rel_cov <- cov$V1 / (cov$V1 + cov$V2)
-total_pair_cov <- cov$V1 + cov$V2
-
-high_cov_filt <- quantile(total_pair_cov, 0.99) > total_pair_cov
-minor_variant_rel_cov <- minor_variant_rel_cov[high_cov_filt]
-total_pair_cov <- total_pair_cov[high_cov_filt]
-
-draft_n <- round(estimate_1n_coverage_1d_subsets(total_pair_cov, minor_variant_rel_cov), 1)
-ymax <- min(10 * draft_n, max(total_pair_cov))
-ymin <- min(total_pair_cov) - 1
-
-nbins <- 15
-smudge_container <- get_smudge_container(minor_variant_rel_cov, total_pair_cov, .nbins = nbins)
-
-test_that("2d fitting somehow works", {
-        expect_gt(sum(smudge_container$dens), 0)
-    }
-)
-
-# peak_points <- peak_agregation(smudge_container)
-# peak_sizes <- get_peak_summary(peak_points, smudge_container)
-# n <- estimate_1n_coverage_highest_peak(peak_sizes, minor_variant_rel_cov, total_pair_cov)
-# peak_sizes$structure <- apply(peak_sizes, 1,
-#                               function(x){ guess_genome_structure(x, n)})
-# peak_sizes$corrected_minor_variant_cov <- sapply(peak_sizes$structure, function(x){round(mean(unlist(strsplit(x, split = '')) == 'B'), 2)})
-# peak_sizes$ploidy <- sapply(peak_sizes$structure, nchar)
-#
-# genome_ploidy <- peak_sizes$ploidy[which.max(peak_sizes$rel_size)]
-#
-# test_that("M. flo is expected to be estimated as triploid", {
-#         expect_equal(genome_ploidy, 3)
-#     }
-# )
-
-# pal <- brewer.pal(11,'Spectral')
-# rf <- colorRampPalette(rev(pal[3:11]))
-# colour_ramp <- rf(32)
-# layout(matrix(c(2,4,1,3), 2, 2, byrow=T), c(3,1), c(1,3))
-# plot_smudgeplot(smudge_container, n, colour_ramp)
-# plot_expected_haplotype_structure(n, peak_sizes, T)
-# plot_histograms(minor_variant_rel_cov, total_pair_cov, ymax, "TEST", genome_ploidy)
-# plot_legend(smudge_container, total_pair_cov, colour_ramp)
diff --git a/tests/testthat/test-get_smudge_container.R b/tests/testthat/test-get_smudge_container.R
deleted file mode 100644
index 4c3b6c8..0000000
--- a/tests/testthat/test-get_smudge_container.R
+++ /dev/null
@@ -1,22 +0,0 @@
-context("smudge container generation")
-
-x <- c(2,2,2,3,3,3,3,3,3,3)
-y <- c(1,1,2,2,2,2,3,3,3,3)
-
-cont <- get_smudge_container(x, y, .nbins = 3, .xlim = c(0, 3), .ylim = c(0, 3))
-exp <- matrix(c(0,0,0,2,1,0,0,3,4), ncol = 3, byrow = T)
-
-test_that("generation of smudge matrix categorize as expected", {
-        expect_true(all(cont$dens == exp))
-    }
-)
-
-minor_cov <- c(0.5, 0.33, 0.32, 0.25, 0.125, 0.31)
-pair_cov <- c(40, 60, 61, 83, 164, 123)
-cont <- get_smudge_container(minor_cov, pair_cov, .nbins = 4)
-exp <- matrix(c(0,0,0,1, 0,0,1,0, 0,2,1,0, 1,0,0,0), ncol = 4, byrow = T)
-
-test_that("generation of smudge matrix categorize as expected", {
-        expect_true(all(cont$dens == exp))
-    }
-)
diff --git a/tests/testthat/test-get_summary.R b/tests/testthat/test-get_summary.R
deleted file mode 100644
index 5b8f1ec..0000000
--- a/tests/testthat/test-get_summary.R
+++ /dev/null
@@ -1,49 +0,0 @@
-context("generation of peak summary")
-
-cov <- read.table('fmlo_smaple_cov_file.tsv')
-
-minor_variant_rel_cov <- cov$V1 / (cov$V1 + cov$V2)
-total_pair_cov <- cov$V1 + cov$V2
-
-high_cov_filt <- quantile(total_pair_cov, 0.99) > total_pair_cov
-minor_variant_rel_cov <- minor_variant_rel_cov[high_cov_filt]
-total_pair_cov <- total_pair_cov[high_cov_filt]
-
-draft_n <- 205
-ymax <- min(10 * draft_n, max(total_pair_cov))
-ymin <- min(total_pair_cov) - 1
-
-nbins <- 30
-smudge_container <- get_smudge_container(minor_variant_rel_cov, total_pair_cov, .nbins = nbins)
-
-peak_points <- peak_agregation(smudge_container)
-# peak_points[peak_points$summit == T,]
-
-test_that("peak agregation creates more than one smudge", {
-        expect_true(sum(peak_points$summit) > 1)
-        expect_true(any(!is.na(peak_points$peak)))
-    }
-)
-
-peak_sizes <- get_peak_summary(peak_points, smudge_container, 0.05)
-the_smallest_n <- get_trinoploid_1n_est(peak_sizes)
-
-n <- estimate_1n_coverage_highest_peak(peak_sizes, minor_variant_rel_cov, total_pair_cov, the_smallest_n)
-
-test_that("test that polished estimate of coverage for mflo makes sense", {
-        expect_true(abs(n - 207) < 10)
-    }
-)
-
-peak_sizes$structure <- apply(peak_sizes, 1,
-                              function(x){ guess_genome_structure(x, n)})
-
-peak_sizes$corrected_minor_variant_cov <- sapply(peak_sizes$structure, function(x){round(mean(unlist(strsplit(x, split = '')) == 'B'), 2)})
-peak_sizes$ploidy <- sapply(peak_sizes$structure, nchar)
-
-genome_ploidy <- peak_sizes$ploidy[which.max(peak_sizes$rel_size)]
-
-test_that("mflo was estimated triploid", {
-        expect_true(genome_ploidy == 3)
-    }
-)