Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
zenalapp committed Sep 5, 2023
1 parent b999cc7 commit e459bc8
Show file tree
Hide file tree
Showing 16 changed files with 170 additions and 165 deletions.
6 changes: 3 additions & 3 deletions R/bistro.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@
#' these are included as separate rows.
#'
#' * `bloodmeal_id`: bloodmeal id
#' * `bloodmeal_locus_count`: number of loci successfully typed in the bloodmeal
#' * `locus_count`: number of loci successfully typed in the bloodmeal
#' * `est_noc`: estimated number of contributors to the bloodmeal
#' * `match`: whether a match was identified for a given bloodmeal (yes or no)
#' * `human_id`: If match, human id (NA otherwise)
#' * `log10_lr`: If match, log10 likelihood ratio (NA otherwise)
#' * `note`: Why the bloodmeal does or doesn't have a match
#' * `notes`: Why the bloodmeal does or doesn't have a match
#'
#'
#' @export
Expand Down Expand Up @@ -155,7 +155,7 @@ bistro <-
dplyr::filter(SampleName %in% human_ids) |>
rm_dups()

message("Calculating log10_lrs")
message("Calculating log10LRs")

log10_lrs <-
calc_log10_lrs(
Expand Down
20 changes: 10 additions & 10 deletions R/calc_log10_lrs.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#' which to compute the log10_lr
#'
#' @return tibble with log10_lr for bloodmeal-human pair including bloodmeal_id,
#' human_id, bloodmeal_locus_count (number of STR loci used for matching),
#' human_id, locus_count (number of STR loci used for matching),
#' est_noc (estimated number of contributors), efm_noc (number of contributors
#' used in euroformix), log10_lr (log10 likelihood ratio), note
#' used in euroformix), log10_lr (log10 likelihood ratio), notes
#' @inheritParams bistro
#'
#' @keywords internal
Expand All @@ -33,34 +33,34 @@ calc_one_log10_lr <-
human_profile <- human_profiles |>
dplyr::filter(SampleName == human_id)

bloodmeal_locus_count <-
locus_count <-
dplyr::n_distinct(bloodmeal_profile$Marker, na.rm = TRUE)
est_noc <-
ifelse(bloodmeal_locus_count == 0, 0, ceiling(max(table(
ifelse(locus_count == 0, 0, ceiling(max(table(
bloodmeal_profile$Marker
) / 2)))
efm_noc <- min(est_noc, 3)

output_df <- tibble::tibble(
bloodmeal_id = bloodmeal_id,
human_id = human_id,
bloodmeal_locus_count = bloodmeal_locus_count,
locus_count = locus_count,
est_noc = est_noc,
efm_noc = efm_noc,
log10_lr = NA,
note = NA
notes = NA
)

if (nrow(bloodmeal_profile) == 0) {
output_df$note <- "no peaks above threshold"
output_df$notes <- "no peaks above threshold"
} else if (nrow(
dplyr::inner_join(
bloodmeal_profile |> dplyr::select(-SampleName),
human_profile |> dplyr::select(-SampleName),
by = dplyr::join_by(Marker, Allele)
)
) == 0) {
output_df$note <- "no shared alleles"
output_df$notes <- "no shared alleles"
} else {
bloodmeal_profile_list <-
format_bloodmeal_profiles(bloodmeal_profile)
Expand Down Expand Up @@ -105,9 +105,9 @@ calc_one_log10_lr <-
if (is.numeric(efm_out)) {
output_df$log10_lr <- efm_out
} else if (class(efm_out)[1] == "simpleError") {
output_df$note <- "euroformix error"
output_df$notes <- "euroformix error"
} else if (class(efm_out)[1] == "TimeoutException") {
output_df$note <- "timed out"
output_df$notes <- "timed out"
}
}

Expand Down
40 changes: 26 additions & 14 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,21 @@ check_bistro_inputs <-

kit_df <- check_kit(kit)

bm_prof_markers <- toupper(unique(bloodmeal_profiles$Marker))
hu_prof_markers <- toupper(unique(human_profiles$Marker))
kit_markers <- toupper(unique(kit_df$Marker))

check_setdiff_markers(
toupper(unique(human_profiles$Marker)),
toupper(unique(kit_df$Marker)),
bm_prof_markers,
kit_markers,
"bloodmeal_profiles",
"kit"
)
check_setdiff_markers(
toupper(unique(human_profiles$Marker)),
toupper(unique(kit_df$Marker)), "human_profiles", "kit"
hu_prof_markers,
kit_markers,
"human_profiles",
"kit"
)

check_calc_allele_freqs(calc_allele_freqs)
Expand Down Expand Up @@ -143,11 +149,16 @@ check_if_allele_freqs <-
"If `calc_allele_freqs = FALSE`, ",
"then `pop_allele_freqs` is required."
)
} else if (!is.null(pop_allele_freqs) && calc_allele_freqs == FALSE) {
} else if (!is.null(pop_allele_freqs) &&
calc_allele_freqs == FALSE) {
check_colnames(pop_allele_freqs, c("Allele"))

pop_freq_markers <-
toupper(names(pop_allele_freqs)[!names(pop_allele_freqs) == "Allele"])
kit_markers <- toupper(unique(kit_df$Marker))
check_setdiff_markers(
toupper(names(pop_allele_freqs)[!names(pop_allele_freqs) == "Allele"]),
toupper(unique(kit_df$Marker)),
pop_freq_markers,
kit_markers,
"pop_allele_freqs",
"kit"
)
Expand All @@ -169,11 +180,14 @@ check_setdiff_markers <-
markers1_name,
markers2_name) {
in_markers1_only <- setdiff(toupper(markers1), toupper(markers2))
in_markers1_only <- in_markers1_only[!is.na(in_markers1_only)]
n_in_markers1_only <- length(in_markers1_only)
in_markers2_only <- setdiff(toupper(markers2), toupper(markers1))
in_markers2_only <-
setdiff(toupper(markers2), toupper(markers1))
in_markers2_only <- in_markers2_only[!is.na(in_markers2_only)]
n_in_markers2_only <- length(in_markers2_only)
if (n_in_markers1_only > 0) {
warning(
message(
n_in_markers1_only,
"/",
length(markers1),
Expand All @@ -182,12 +196,11 @@ check_setdiff_markers <-
" but not in ",
markers2_name,
": ",
paste0(in_markers1_only, collapse = ","),
"\n"
paste0(in_markers1_only, collapse = ",")
)
}
if (n_in_markers2_only > 0) {
warning(
message(
n_in_markers2_only,
"/",
length(markers2),
Expand All @@ -196,8 +209,7 @@ check_setdiff_markers <-
" but not in ",
markers1_name,
": ",
paste0(in_markers2_only, collapse = ","),
"\n"
paste0(in_markers2_only, collapse = ",")
)
}
}
Expand Down
42 changes: 21 additions & 21 deletions R/identify_matches.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
#' @param log10_lrs output from [calc_log10_lrs()]
#'
#' @return tibble with matches for bloodmeal-human pairs including bloodmeal_id,
#' bloodmeal_locus_count (number of STR loci used for matching), est_noc
#' locus_count (number of STR loci used for matching), est_noc
#' (estimated number of contributors), match, human_id (if match), log10_lr
#' (log10 likelihood ratio), note
#' (log10 likelihood ratio), notes
#' @inheritParams calc_one_log10_lr
#' @keywords internal
identify_one_match_set <- function(log10_lrs, bloodmeal_id) {
Expand All @@ -14,26 +14,26 @@ identify_one_match_set <- function(log10_lrs, bloodmeal_id) {
dplyr::filter(bloodmeal_id == bm_id)

est_noc <- unique(log10_lrs$est_noc)
bloodmeal_locus_count <- unique(log10_lrs$bloodmeal_locus_count)
notes <- stringr::str_c(unique(log10_lrs$note), collapse = ";")
locus_count <- unique(log10_lrs$locus_count)
notes <- stringr::str_c(unique(log10_lrs$notes), collapse = ";")

matches <- tibble::tibble(
bloodmeal_id = bloodmeal_id,
locus_count = locus_count,
est_noc = est_noc,
bloodmeal_locus_count = bloodmeal_locus_count,
match = "no",
human_id = NA,
log10_lr = NA,
note = notes,
notes = notes,
thresh_low = NA
)

if (all(is.na(log10_lrs$log10_lr))) {
matches_thresh <- matches
} else if (max(log10_lrs$log10_lr[!is.infinite(log10_lrs$log10_lr)]) < 1.5 &&
is.na(notes)) {
is.na(notes)) {
matches_thresh <- matches |>
dplyr::mutate(note = "all log10_lr < 1.5")
dplyr::mutate(notes = "all log10LRs < 1.5")
} else {
# matches can only have log10_lrs > 1
log10_lrs <- log10_lrs |>
Expand All @@ -48,24 +48,24 @@ identify_one_match_set <- function(log10_lrs, bloodmeal_id) {
matches_thresh <- log10_lrs |>
dplyr::filter(log10_lr >= thresh) |>
dplyr::mutate(
note = ifelse(
notes = ifelse(
dplyr::n() > est_noc,
"> min NOC matches",
"passed all filters"
),
match = ifelse(note == "passed all filters", "yes", "no"),
human_id = ifelse(note == "> min NOC matches", NA, human_id),
log10_lr = ifelse(note == "> min NOC matches", NA, log10_lr),
match = ifelse(notes == "passed all filters", "yes", "no"),
human_id = ifelse(notes == "> min NOC matches", NA, human_id),
log10_lr = ifelse(notes == "> min NOC matches", NA, log10_lr),
thresh_low = thresh
) |>
dplyr::select(
bloodmeal_id,
locus_count,
est_noc,
bloodmeal_locus_count,
match,
human_id,
log10_lr,
note,
notes,
thresh_low
) |>
dplyr::distinct() # do we need this?
Expand All @@ -80,21 +80,21 @@ identify_one_match_set <- function(log10_lrs, bloodmeal_id) {
dplyr::pull(human_id)

if ((identical(mht, mlt) &&
nrow(matches_thresh) == est_noc) ||
matches_thresh$note[1] == "> min NOC matches" ||
thresh == 0.5) {
nrow(matches_thresh) == est_noc) ||
matches_thresh$notes[1] == "> min NOC matches" ||
thresh == 0.5) {
break
}

mht <- mlt
}

if (nrow(matches_thresh) < est_noc ||
matches_thresh$note[1] == "> min NOC matches" ||
matches_thresh$thresh_low[1] == 1) {
matches_thresh$notes[1] == "> min NOC matches" ||
matches_thresh$thresh_low[1] == 1) {
# are 1st and last both required above?
temp <- matches |>
dplyr::filter(note != "> min NOC matches") |>
dplyr::filter(notes != "> min NOC matches") |>
dplyr::group_by(thresh_low) |>
dplyr::mutate(
n_samps = dplyr::n_distinct(human_id),
Expand All @@ -105,7 +105,7 @@ identify_one_match_set <- function(log10_lrs, bloodmeal_id) {
dplyr::distinct() |>
dplyr::arrange(thresh_low) |>
dplyr::mutate(next_same = human_id == dplyr::lead(human_id) &
note == dplyr::lead(note)) |>
notes == dplyr::lead(notes)) |>
dplyr::filter(next_same) |>
dplyr::filter(n_samps == suppressWarnings(max(n_samps))) |>
dplyr::slice_max(thresh_low) |>
Expand Down
4 changes: 2 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ utils::globalVariables(
"all_alleles",
"n",
"log10_lr",
"note",
"notes",
"human_id",
"thresh_low",
"next_same",
Expand All @@ -26,7 +26,7 @@ utils::globalVariables(
# import euroformix (required to get kit)
#' @import euroformix

# no note for codetools
# no notes for codetools
ignore_unused_imports <- function() {
codetools::checkUsage
}
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ detools::install_github("duke-malaria-collaboratory/bistro")

## Usage

Check out `vignette('bistro')` for more information.
Check out the [vignette](https://duke-malaria-collaboratory.github.io/bistro/articles/bistro.html) for more information.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ detools::install_github("duke-malaria-collaboratory/bistro")

## Usage

Check out `vignette('bistro')` for more information.
Check out the
[vignette](https://duke-malaria-collaboratory.github.io/bistro/articles/bistro.html)
for more information.
4 changes: 2 additions & 2 deletions man/bistro.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/calc_one_log10_lr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/identify_one_match_set.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e459bc8

Please sign in to comment.