Skip to content

Commit

Permalink
Add function to create human profile database from bloodmeals resolves
Browse files Browse the repository at this point in the history
  • Loading branch information
zenalapp committed Nov 20, 2023
1 parent e58cd34 commit 10ba077
Show file tree
Hide file tree
Showing 26 changed files with 297 additions and 85 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(bistro)
export(calc_allele_freqs)
export(calc_log10_lrs)
export(create_db_from_bloodmeals)
export(filter_peaks)
export(identify_matches)
export(match_exact)
Expand Down
33 changes: 33 additions & 0 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,36 @@ check_heights <- function(heights, peak_thresh) {
stop("All bloodmeal peak heights below threshold of ", peak_thresh, ".")
}
}

#' Check input to `create_db_from_bloodmeals()``
#'
#' @inheritParams bistro
#'
#' @return number of alleles in a complete profile
check_create_db_input <- function(bloodmeal_profiles, kit, peak_thresh, rm_markers) {
check_colnames(
bloodmeal_profiles,
c("SampleName", "Marker", "Allele")
)
check_ids(rm_markers)
check_peak_thresh(peak_thresh)
kit_df <- check_kit(kit)
kit_markers <- kit_df$Marker |>
unique() |>
toupper()
bm_prof_markers <- bloodmeal_profiles$Marker |>
unique() |>
toupper()
if (!is.null(rm_markers)) {
rm_markers <- toupper(rm_markers)
bm_prof_markers <- bm_prof_markers[!bm_prof_markers %in% rm_markers]
kit_markers <- kit_markers[!kit_markers %in% rm_markers]
}
check_setdiff_markers(
bm_prof_markers,
kit_markers,
"bloodmeal_profiles",
"kit"
)
length(kit_markers)
}
46 changes: 46 additions & 0 deletions R/create_db_from_bloodmeals.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Create human profile database from bloodmeal profiles
#'
#' @inheritParams bistro
#'
#' @return Human database created from complete single-source bloodmeals.
#' Complete is defined as the number of markers in the kit minus the number of markers in `rm_markers`.
#' @export
#'
#' @examples
#' \dontrun{
#' # load example data
#' path_to_data <- paste0("https://raw.githubusercontent.com/duke-malaria-collaboratory/",
#' "bistro_validation/main/data/provedit/provedit_samples_mass200thresh.csv")
#' samples <- readr::read_csv(path_to_data)
#' create_db_from_bloodmeals(samples, kit = "identifiler", peak_thresh = 200)
#' }
#'
create_db_from_bloodmeals <- function(bloodmeal_profiles,
kit,
peak_thresh,
rm_markers = c("AMEL")) {
check_pkg_version("tidyr", utils::packageVersion("tidyr"), "1.3.0")
n_markers_in_kit <- check_create_db_input(bloodmeal_profiles, kit, peak_thresh, rm_markers)
bloodmeal_profiles |>
filter_peaks(peak_thresh = peak_thresh) |>
rm_markers(markers = rm_markers) |>
dplyr::group_by(SampleName) |>
dplyr::mutate(n_markers = dplyr::n_distinct(Marker)) |>
dplyr::group_by(SampleName, Marker) |>
dplyr::mutate(n_alleles = dplyr::n_distinct(Allele)) |>
dplyr::ungroup() |>
dplyr::filter(n_markers == n_markers_in_kit) |>
dplyr::group_by(SampleName) |>
dplyr::filter(all(n_alleles <= 2)) |>
dplyr::ungroup() |>
dplyr::select(SampleName, Marker, Allele) |>
dplyr::mutate(SampleName = paste0("H", as.numeric(factor(SampleName)))) |>
dplyr::arrange(Marker, Allele) |>
dplyr::group_by(SampleName) |>
dplyr::summarize(all_alleles = stringr::str_c(paste0(Marker, "__", Allele), collapse = ";")) |>
dplyr::group_by(all_alleles) |>
dplyr::summarize() |>
dplyr::mutate(SampleName = paste0("H", dplyr::row_number()), .before = 1) |>
tidyr::separate_longer_delim(all_alleles, delim = ";") |>
tidyr::separate(all_alleles, sep = "__", into = c("Marker", "Allele"))
}
4 changes: 3 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ utils::globalVariables(
"profile",
"alleles",
"est_noc",
"locus_count"
"locus_count",
"n_markers",
"n_alleles"
)
)

Expand Down
20 changes: 10 additions & 10 deletions man/bistro.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/calc_allele_freqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 10 additions & 10 deletions man/calc_log10_lrs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 10 additions & 10 deletions man/calc_one_log10_lr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 10 additions & 10 deletions man/check_bistro_inputs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions man/check_create_db_input.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/check_if_allele_freqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions man/create_db_from_bloodmeals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/filter_peaks.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/format_allele_freqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/format_bloodmeal_profiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 10ba077

Please sign in to comment.