From 9a2d61dee2148912d0c7d8bfd2dd6319fd7dc0b2 Mon Sep 17 00:00:00 2001 From: Zena Lapp Date: Thu, 7 Sep 2023 08:05:49 -0600 Subject: [PATCH] Modularize str profile preprocessing --- NAMESPACE | 2 + R/bistro.R | 40 ++---- R/calc_log10_lrs.R | 5 + R/identify_matches.R | 6 +- R/preprocess_data.R | 145 ++++++++++++++++++++++ R/utils.R | 86 ------------- man/calc_log10_lrs.Rd | 5 +- man/prep_bloodmeal_profiles.Rd | 29 +++++ man/prep_human_profiles.Rd | 27 ++++ man/rm_dups.Rd | 2 +- man/rm_twins.Rd | 2 +- man/subset_ids.Rd | 2 +- tests/testthat/_snaps/bistro.md | 22 ---- tests/testthat/_snaps/identify_matches.md | 4 +- tests/testthat/_snaps/preprocess_data.md | 86 +++++++++++++ tests/testthat/_snaps/utils.md | 23 ---- tests/testthat/test-bistro.R | 25 ---- tests/testthat/test-identify_matches.R | 14 ++- tests/testthat/test-preprocess_data.R | 52 ++++++++ tests/testthat/test-utils.R | 32 ----- 20 files changed, 381 insertions(+), 228 deletions(-) create mode 100644 R/preprocess_data.R create mode 100644 man/prep_bloodmeal_profiles.Rd create mode 100644 man/prep_human_profiles.Rd create mode 100644 tests/testthat/_snaps/preprocess_data.md delete mode 100644 tests/testthat/_snaps/utils.md create mode 100644 tests/testthat/test-preprocess_data.R diff --git a/NAMESPACE b/NAMESPACE index e9f2359..57b908f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,8 @@ export(calc_allele_freqs) export(calc_log10_lrs) export(filter_peaks) export(identify_matches) +export(prep_bloodmeal_profiles) +export(prep_human_profiles) export(rm_dups) export(rm_twins) import(euroformix) diff --git a/R/bistro.R b/R/bistro.R index b126869..4f07c3f 100644 --- a/R/bistro.R +++ b/R/bistro.R @@ -114,39 +114,19 @@ bistro <- pop_allele_freqs <- calc_allele_freqs(human_profiles) } - if (rm_twins) { - human_profiles <- rm_twins(human_profiles) - } - message("Formatting bloodmeal profiles") - - if (is.null(bloodmeal_ids)) { - bloodmeal_ids <- unique(bloodmeal_profiles$SampleName) - } else { - bloodmeal_ids <- - subset_ids(bloodmeal_ids, bloodmeal_profiles$SampleName) - } - - bloodmeal_profiles <- bloodmeal_profiles |> - dplyr::filter(SampleName %in% bloodmeal_ids) - - check_heights(bloodmeal_profiles$Height, peak_thresh) - - bloodmeal_profiles <- bloodmeal_profiles |> - rm_dups() |> - filter_peaks(peak_thresh) + bloodmeal_profiles <- prep_bloodmeal_profiles( + bloodmeal_profiles, + peak_thresh, + bloodmeal_ids + ) message("Formatting human profiles") - - if (is.null(human_ids)) { - human_ids <- unique(human_profiles$SampleName) - } else { - human_ids <- subset_ids(human_ids, human_profiles$SampleName) - } - - human_profiles <- human_profiles |> - dplyr::filter(SampleName %in% human_ids) |> - rm_dups() + human_profiles <- prep_human_profiles( + human_profiles, + human_ids, + rm_twins + ) message("Calculating log10LRs") diff --git a/R/calc_log10_lrs.R b/R/calc_log10_lrs.R index bf2572b..a3e8bb9 100644 --- a/R/calc_log10_lrs.R +++ b/R/calc_log10_lrs.R @@ -116,6 +116,11 @@ calc_one_log10_lr <- #' Calculate log10_lrs for multiple bloodmeal-human pairs #' +#' Note that this function doesn't preprocess the bloodmeal and human profile +#' data. If you would like to preprocess it in the same way as is performed +#' internally in the `bistro()` function, you must run +#' `prep_bloodmeal_profiles()` and `prep_human_profiles()` first. +#' #' @inheritParams bistro #' #' @return A tibble with the same output as for [bistro()], except there is no diff --git a/R/identify_matches.R b/R/identify_matches.R index 8c9d763..1294a75 100644 --- a/R/identify_matches.R +++ b/R/identify_matches.R @@ -29,7 +29,11 @@ identify_one_match_set <- function(log10_lrs, bloodmeal_id) { ) if (all(is.na(log10_lrs$log10_lr) | is.infinite(log10_lrs$log10_lr))) { - matches_thresh <- matches + matches_thresh <- matches |> + dplyr::mutate(notes = ifelse(is.na(notes), + "all log10LRs NA or Inf", + paste0("all log10LRs NA or Inf", ";", notes) + )) } else if (max(log10_lrs$log10_lr[!is.infinite(log10_lrs$log10_lr)]) < 1.5 && is.na(notes)) { matches_thresh <- matches |> diff --git a/R/preprocess_data.R b/R/preprocess_data.R new file mode 100644 index 0000000..8533e16 --- /dev/null +++ b/R/preprocess_data.R @@ -0,0 +1,145 @@ +#' Preprocess bloodmeal profiles +#' +#' Removes duplicates and peaks below threshold, subsets ids +#' +#' @inheritParams bistro +#' +#' @return Dataframe with preprocessed bloodmeal profiles +#' @export +#' @keywords internal +prep_bloodmeal_profiles <- function(bloodmeal_profiles, + peak_thresh, + bloodmeal_ids = NULL) { + if (is.null(bloodmeal_ids)) { + bloodmeal_ids <- unique(bloodmeal_profiles$SampleName) + } else { + bloodmeal_ids <- + subset_ids(bloodmeal_ids, bloodmeal_profiles$SampleName) + } + + bloodmeal_profiles <- bloodmeal_profiles |> + dplyr::filter(SampleName %in% bloodmeal_ids) + + check_heights(bloodmeal_profiles$Height, peak_thresh) + + bloodmeal_profiles <- bloodmeal_profiles |> + rm_dups() |> + filter_peaks(peak_thresh) + + return(bloodmeal_profiles) +} + +#' Preprocess human profiles +#' +#' Removes duplicates and optionally twins, subsets ids +#' +#' @inheritParams bistro +#' +#' @return Dataframe with preprocessed human profiles +#' @export +#' @keywords internal +prep_human_profiles <- function(human_profiles, + human_ids = NULL, + rm_twins = TRUE) { + if (rm_twins) { + human_profiles <- rm_twins(human_profiles) + } + + if (is.null(human_ids)) { + human_ids <- unique(human_profiles$SampleName) + } else { + human_ids <- subset_ids(human_ids, human_profiles$SampleName) + } + + human_profiles <- human_profiles |> + dplyr::filter(SampleName %in% human_ids) |> + rm_dups() + + return(human_profiles) +} + +#' Remove duplicate rows with warning +#' +#' @param df Dataframe from which to remove duplicate rows +#' +#' @return Un-duplicated dataframe +#' @export +#' @keywords internal +rm_dups <- function(df) { + n_orig <- nrow(df) + if (n_orig != dplyr::n_distinct(df)) { + df <- df |> + dplyr::distinct() + warning(paste0( + "Detected and removed ", + n_orig - nrow(df), + " duplicate rows." + )) + } + return(df) +} + +#' Remove identical STR profiles +#' +#' @inheritParams bistro +#' +#' @return Data frame with twins removed +#' @export +#' @keywords internal +rm_twins <- function(human_profiles) { + not_twins <- human_profiles |> + dplyr::arrange(Marker, Allele) |> + dplyr::group_by(SampleName) |> + dplyr::summarize(all_alleles = stringr::str_c(paste0(Marker, Allele), + collapse = ";" + )) |> + dplyr::group_by(all_alleles) |> + dplyr::mutate(n = dplyr::n()) |> + dplyr::filter(n == 1) |> + dplyr::pull(SampleName) + + n_ident_profs <- + dplyr::n_distinct(human_profiles$SampleName) - length(not_twins) + + if (n_ident_profs > 0) { + message( + paste0( + "Identified ", + n_ident_profs, + " people whose profiles appear more than once", + " (likely identical twins). These are being removed." + ) + ) + human_profiles <- human_profiles |> + dplyr::filter(SampleName %in% not_twins) + } + + return(human_profiles) +} + +#' Subset to ids present in the dataset +#' +#' @param ids ids to check if in vec +#' @param vec vector of ids +#' @param vec_name name of vector +#' +#' @return list of IDs that are present +#' @keywords internal +subset_ids <- function(ids, vec, vec_name) { + id_absent <- setdiff(ids, vec) + if (length(id_absent) > 0) { + warning( + "Removing ", + length(id_absent), + " ", + vec_name, + " not found in the dataset: ", + id_absent + ) + } + ids <- intersect(ids, vec) + if (length(ids) == 0) { + stop("None of the provided ", vec_name, " are present in the dataset.") + } + return(ids) +} diff --git a/R/utils.R b/R/utils.R index 309cf0e..79c5997 100644 --- a/R/utils.R +++ b/R/utils.R @@ -30,89 +30,3 @@ utils::globalVariables( ignore_unused_imports <- function() { codetools::checkUsage } - -#' Remove duplicate rows with warning -#' -#' @param df Dataframe from which to remove duplicate rows -#' -#' @return Un-duplicated dataframe -#' @export -#' @keywords internal -rm_dups <- function(df) { - n_orig <- nrow(df) - if (n_orig != dplyr::n_distinct(df)) { - df <- df |> - dplyr::distinct() - warning(paste0( - "Detected and removed ", - n_orig - nrow(df), - " duplicate rows." - )) - } - return(df) -} - -#' Remove identical STR profiles -#' -#' @inheritParams bistro -#' -#' @return Data frame with twins removed -#' @export -#' @keywords internal -rm_twins <- function(human_profiles) { - not_twins <- human_profiles |> - dplyr::arrange(Marker, Allele) |> - dplyr::group_by(SampleName) |> - dplyr::summarize(all_alleles = stringr::str_c(paste0(Marker, Allele), - collapse = ";" - )) |> - dplyr::group_by(all_alleles) |> - dplyr::mutate(n = dplyr::n()) |> - dplyr::filter(n == 1) |> - dplyr::pull(SampleName) - - n_ident_profs <- - dplyr::n_distinct(human_profiles$SampleName) - length(not_twins) - - if (n_ident_profs > 0) { - message( - paste0( - "Identified ", - n_ident_profs, - " people whose profiles appear more than once", - " (likely identical twins). These are being removed." - ) - ) - human_profiles <- human_profiles |> - dplyr::filter(SampleName %in% not_twins) - } - - return(human_profiles) -} - -#' Subset to ids present in the dataset -#' -#' @param ids ids to check if in vec -#' @param vec vector of ids -#' @param vec_name name of vector -#' -#' @return list of IDs that are present -#' @keywords internal -subset_ids <- function(ids, vec, vec_name) { - id_absent <- setdiff(ids, vec) - if (length(id_absent) > 0) { - warning( - "Removing ", - length(id_absent), - " ", - vec_name, - " not found in the dataset: ", - id_absent - ) - } - ids <- intersect(ids, vec) - if (length(ids) == 0) { - stop("None of the provided ", vec_name, " are present in the dataset.") - } - return(ids) -} diff --git a/man/calc_log10_lrs.Rd b/man/calc_log10_lrs.Rd index b724bc6..0c38d5d 100644 --- a/man/calc_log10_lrs.Rd +++ b/man/calc_log10_lrs.Rd @@ -85,6 +85,9 @@ contributors used for \code{\link[euroformix:contLikSearch]{euroformix::contLikS value of 3. } \description{ -Calculate log10_lrs for multiple bloodmeal-human pairs +Note that this function doesn't preprocess the bloodmeal and human profile +data. If you would like to preprocess it in the same way as is performed +internally in the \code{bistro()} function, you must run +\code{prep_bloodmeal_profiles()} and \code{prep_human_profiles()} first. } \keyword{internal} diff --git a/man/prep_bloodmeal_profiles.Rd b/man/prep_bloodmeal_profiles.Rd new file mode 100644 index 0000000..aebdaa8 --- /dev/null +++ b/man/prep_bloodmeal_profiles.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocess_data.R +\name{prep_bloodmeal_profiles} +\alias{prep_bloodmeal_profiles} +\title{Preprocess bloodmeal profiles} +\usage{ +prep_bloodmeal_profiles(bloodmeal_profiles, peak_thresh, bloodmeal_ids = NULL) +} +\arguments{ +\item{bloodmeal_profiles}{Tibble with alleles for all bloodmeals in reference +database including 4 columns: SampleName, Marker, Allele, Height. Height +must be numeric or coercible to numeric.} + +\item{peak_thresh}{Allele peak height threshold in RFUs. All peaks under this +threshold will be filtered out. If prior filtering was performed, this +number should be equal to or greater than that number. Also used for +\code{threshT} argument in \code{\link[euroformix:contLikSearch]{euroformix::contLikSearch()}}.} + +\item{bloodmeal_ids}{Vector of bloodmeal ids from the SampleName column in +\code{bloodmeal_profiles} for which to compute log10_lrs. If NULL, all ids in +the input dataframe will be used. Default: NULL} +} +\value{ +Dataframe with preprocessed bloodmeal profiles +} +\description{ +Removes duplicates and peaks below threshold, subsets ids +} +\keyword{internal} diff --git a/man/prep_human_profiles.Rd b/man/prep_human_profiles.Rd new file mode 100644 index 0000000..91c4531 --- /dev/null +++ b/man/prep_human_profiles.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocess_data.R +\name{prep_human_profiles} +\alias{prep_human_profiles} +\title{Preprocess human profiles} +\usage{ +prep_human_profiles(human_profiles, human_ids = NULL, rm_twins = TRUE) +} +\arguments{ +\item{human_profiles}{Tibble with alleles for all humans in reference +database including three columns: SampleName, Marker, Allele.} + +\item{human_ids}{Vector of human ids from the SampleName column in +\code{human_profiles} for which to compute log10_lrs. If NULL, all ids in the +input dataframe will be used. Default: NULL} + +\item{rm_twins}{A boolean indicating whether or not to remove likely twins +(identical STR profiles) from the human database prior to identifying +matches. Default: TRUE} +} +\value{ +Dataframe with preprocessed human profiles +} +\description{ +Removes duplicates and optionally twins, subsets ids +} +\keyword{internal} diff --git a/man/rm_dups.Rd b/man/rm_dups.Rd index 21eff1a..0130d8a 100644 --- a/man/rm_dups.Rd +++ b/man/rm_dups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/preprocess_data.R \name{rm_dups} \alias{rm_dups} \title{Remove duplicate rows with warning} diff --git a/man/rm_twins.Rd b/man/rm_twins.Rd index 2da1dcf..6c34eac 100644 --- a/man/rm_twins.Rd +++ b/man/rm_twins.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/preprocess_data.R \name{rm_twins} \alias{rm_twins} \title{Remove identical STR profiles} diff --git a/man/subset_ids.Rd b/man/subset_ids.Rd index fa97257..d1fb105 100644 --- a/man/subset_ids.Rd +++ b/man/subset_ids.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/preprocess_data.R \name{subset_ids} \alias{subset_ids} \title{Subset to ids present in the dataset} diff --git a/tests/testthat/_snaps/bistro.md b/tests/testthat/_snaps/bistro.md index 317924f..83fd9f7 100644 --- a/tests/testthat/_snaps/bistro.md +++ b/tests/testthat/_snaps/bistro.md @@ -20,28 +20,6 @@ 1 evid1 17 2 yes P1 21.8 passed al~ 21 ---- - - Code - bistro(bm_evid1, hu_p1, pop_allele_freqs = pop_allele_freqs, kit = "ESX17", - peak_thresh = 200, bloodmeal_ids = "evid1", human_ids = "P1") - Message - 1/17 markers in kit but not in pop_allele_freqs: AMEL - Formatting bloodmeal profiles - Removing 4 peaks under the threshold of 200 RFU. - Formatting human profiles - Calculating log10LRs - # bloodmeal ids: 1 - # human ids: 1 - Bloodmeal id 1/1 - Human id 1/1 - Identifying matches - Output - # A tibble: 1 x 8 - bloodmeal_id locus_count est_noc match human_id log10_lr notes thresh_low - - 1 evid1 17 2 yes P1 21.8 passed al~ 21 - --- Code diff --git a/tests/testthat/_snaps/identify_matches.md b/tests/testthat/_snaps/identify_matches.md index cfc5118..4967a5c 100644 --- a/tests/testthat/_snaps/identify_matches.md +++ b/tests/testthat/_snaps/identify_matches.md @@ -17,7 +17,7 @@ # A tibble: 1 x 8 bloodmeal_id locus_count est_noc match human_id log10_lr notes thresh_low - 1 evid2 1 2 no NA NA no shared~ NA + 1 evid2 1 2 no NA NA all log10~ NA --- @@ -50,6 +50,6 @@ 1 evid1 17 2 yes P1 21.8 passed al~ 9.5 2 evid1 17 2 yes P2 10.3 passed al~ 9.5 - 3 evid2 1 2 no NA no shared~ NA + 3 evid2 1 2 no NA all log10~ NA 4 evid3 8 1 no NA all log10~ NA diff --git a/tests/testthat/_snaps/preprocess_data.md b/tests/testthat/_snaps/preprocess_data.md new file mode 100644 index 0000000..6e4dcfb --- /dev/null +++ b/tests/testthat/_snaps/preprocess_data.md @@ -0,0 +1,86 @@ +# prep_bloodmeal_profiles works + + Code + prep_bloodmeal_profiles(bloodmeal_profiles, bloodmeal_ids = "evid1", + peak_thresh = 200) + Message + Removing 4 peaks under the threshold of 200 RFU. + Output + # A tibble: 50 x 4 + SampleName Marker Allele Height + + 1 evid1 AMEL X 2136 + 2 evid1 AMEL Y 1015 + 3 evid1 D10S1248 13 1856 + 4 evid1 D10S1248 15 1045 + 5 evid1 D12S391 18 297 + 6 evid1 D12S391 18.3 1446 + 7 evid1 D12S391 19 751 + 8 evid1 D12S391 22 1370 + 9 evid1 D16S539 10 312 + 10 evid1 D16S539 11 743 + # i 40 more rows + +# prep_human_profiles works + + Code + prep_human_profiles(human_profiles) + Output + # A tibble: 96 x 3 + SampleName Marker Allele + + 1 00-JP0001-14_20142342311_NO-3241 AMEL X + 2 00-JP0001-14_20142342311_NO-3241 AMEL Y + 3 00-JP0001-14_20142342311_NO-3241 D10S1248 12 + 4 00-JP0001-14_20142342311_NO-3241 D10S1248 13 + 5 00-JP0001-14_20142342311_NO-3241 D12S391 17 + 6 00-JP0001-14_20142342311_NO-3241 D12S391 18 + 7 00-JP0001-14_20142342311_NO-3241 D16S539 10 + 8 00-JP0001-14_20142342311_NO-3241 D16S539 11 + 9 00-JP0001-14_20142342311_NO-3241 D18S51 13 + 10 00-JP0001-14_20142342311_NO-3241 D18S51 17 + # i 86 more rows + +--- + + Code + prep_human_profiles(human_profiles, human_ids = "P1") + Output + # A tibble: 33 x 3 + SampleName Marker Allele + + 1 P1 AMEL X + 2 P1 AMEL Y + 3 P1 D10S1248 13 + 4 P1 D10S1248 15 + 5 P1 D12S391 18.3 + 6 P1 D12S391 22 + 7 P1 D16S539 11 + 8 P1 D16S539 12 + 9 P1 D18S51 15 + 10 P1 D18S51 17 + # i 23 more rows + +# rm_twins works + + Code + rm_twins(dplyr::bind_rows(human_profiles, dplyr::mutate(dplyr::filter( + human_profiles, SampleName == "P1"), SampleName = "Pdup"))) + Message + Identified 2 people whose profiles appear more than once (likely identical twins). These are being removed. + Output + # A tibble: 63 x 3 + SampleName Marker Allele + + 1 00-JP0001-14_20142342311_NO-3241 AMEL X + 2 00-JP0001-14_20142342311_NO-3241 AMEL Y + 3 00-JP0001-14_20142342311_NO-3241 D10S1248 12 + 4 00-JP0001-14_20142342311_NO-3241 D10S1248 13 + 5 00-JP0001-14_20142342311_NO-3241 D12S391 17 + 6 00-JP0001-14_20142342311_NO-3241 D12S391 18 + 7 00-JP0001-14_20142342311_NO-3241 D16S539 10 + 8 00-JP0001-14_20142342311_NO-3241 D16S539 11 + 9 00-JP0001-14_20142342311_NO-3241 D18S51 13 + 10 00-JP0001-14_20142342311_NO-3241 D18S51 17 + # i 53 more rows + diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md deleted file mode 100644 index aa94c88..0000000 --- a/tests/testthat/_snaps/utils.md +++ /dev/null @@ -1,23 +0,0 @@ -# rm_twins works - - Code - rm_twins(dplyr::bind_rows(human_profiles, dplyr::mutate(dplyr::filter( - human_profiles, SampleName == "P1"), SampleName = "Pdup"))) - Message - Identified 2 people whose profiles appear more than once (likely identical twins). These are being removed. - Output - # A tibble: 63 x 3 - SampleName Marker Allele - - 1 00-JP0001-14_20142342311_NO-3241 AMEL X - 2 00-JP0001-14_20142342311_NO-3241 AMEL Y - 3 00-JP0001-14_20142342311_NO-3241 D10S1248 12 - 4 00-JP0001-14_20142342311_NO-3241 D10S1248 13 - 5 00-JP0001-14_20142342311_NO-3241 D12S391 17 - 6 00-JP0001-14_20142342311_NO-3241 D12S391 18 - 7 00-JP0001-14_20142342311_NO-3241 D16S539 10 - 8 00-JP0001-14_20142342311_NO-3241 D16S539 11 - 9 00-JP0001-14_20142342311_NO-3241 D18S51 13 - 10 00-JP0001-14_20142342311_NO-3241 D18S51 17 - # i 53 more rows - diff --git a/tests/testthat/test-bistro.R b/tests/testthat/test-bistro.R index 25f833a..cebde79 100644 --- a/tests/testthat/test-bistro.R +++ b/tests/testthat/test-bistro.R @@ -14,18 +14,6 @@ test_that("bistro works", { ) ) - expect_snapshot( - bistro( - bm_evid1, - hu_p1, - pop_allele_freqs = pop_allele_freqs, - kit = "ESX17", - peak_thresh = 200, - bloodmeal_ids = "evid1", - human_ids = "P1" - ) - ) - expect_snapshot(bistro( bm_evid1, hu_p1, @@ -43,17 +31,4 @@ test_that("bistro works", { ), "If `calc_allele_freqs = FALSE`, then `pop_allele_freqs` is required." ) - - expect_message(expect_message(expect_message( - expect_error( - bistro( - bloodmeal_profiles %>% dplyr::filter(Height < 200), - human_profiles, - pop_allele_freqs = pop_allele_freqs, - kit = "ESX17", - peak_thresh = 200 - ), - "All bloodmeal peak heights below threshold of 200." - ) - ))) }) diff --git a/tests/testthat/test-identify_matches.R b/tests/testthat/test-identify_matches.R index 5f2c734..eee47f3 100644 --- a/tests/testthat/test-identify_matches.R +++ b/tests/testthat/test-identify_matches.R @@ -33,7 +33,15 @@ test_that("identify_matches works", { expect_snapshot(identify_matches(lrs)) - expect_no_error(lrs |> - dplyr::mutate(log10_lr = Inf) |> - identify_matches()) + expect_equal( + lrs |> + dplyr::mutate(log10_lr = Inf) |> + identify_matches() |> + dplyr::pull(notes), + c( + "all log10LRs NA or Inf", + "all log10LRs NA or Inf;no shared alleles;euroformix error", + "all log10LRs NA or Inf" + ) + ) }) diff --git a/tests/testthat/test-preprocess_data.R b/tests/testthat/test-preprocess_data.R new file mode 100644 index 0000000..02acfc1 --- /dev/null +++ b/tests/testthat/test-preprocess_data.R @@ -0,0 +1,52 @@ +test_that("prep_bloodmeal_profiles works", { + expect_error( + prep_bloodmeal_profiles( + bloodmeal_profiles %>% dplyr::filter(Height < 200), + peak_thresh = 200 + ), + "All bloodmeal peak heights below threshold of 200." + ) + + expect_snapshot(prep_bloodmeal_profiles( + bloodmeal_profiles, + bloodmeal_ids = "evid1", + peak_thresh = 200 + )) +}) + +test_that("prep_human_profiles works", { + expect_snapshot(prep_human_profiles(human_profiles)) + + expect_snapshot(prep_human_profiles(human_profiles, human_ids = "P1")) +}) + +test_that("rm_dups works", { + expect_no_message(rm_dups(tibble::tibble(test = 1:2))) + + expect_warning( + rm_dups(tibble::tibble(test = rep(1, 2))), + "Detected and removed 1 duplicate rows." + ) +}) + +test_that("rm_twins works", { + expect_snapshot(rm_twins( + dplyr::bind_rows( + human_profiles, + human_profiles |> + dplyr::filter(SampleName == "P1") |> + dplyr::mutate(SampleName = "Pdup") + ) + )) +}) + +test_that("subset_ids works", { + expect_equal(subset_ids("a", c("a", "b"), "ids"), "a") + expect_error( + expect_warning( + subset_ids("c", c("a", "b"), "ids"), + "Removing 1 ids not found in the dataset: c" + ), + "None of the provided ids are present in the dataset." + ) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index b535e2d..da50b77 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,35 +1,3 @@ test_that("ignore_unused_imports works", { expect_no_error(ignore_unused_imports()) }) - - -test_that("rm_dups works", { - expect_no_message(rm_dups(tibble::tibble(test = 1:2))) - - expect_warning( - rm_dups(tibble::tibble(test = rep(1, 2))), - "Detected and removed 1 duplicate rows." - ) -}) - -test_that("rm_twins works", { - expect_snapshot(rm_twins( - dplyr::bind_rows( - human_profiles, - human_profiles |> - dplyr::filter(SampleName == "P1") |> - dplyr::mutate(SampleName = "Pdup") - ) - )) -}) - -test_that("subset_ids works", { - expect_equal(subset_ids("a", c("a", "b"), "ids"), "a") - expect_error( - expect_warning( - subset_ids("c", c("a", "b"), "ids"), - "Removing 1 ids not found in the dataset: c" - ), - "None of the provided ids are present in the dataset." - ) -})