diff --git a/.Rbuildignore b/.Rbuildignore index 49e423eb..49a1daa5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,7 +18,7 @@ ^vignettes/cache$ ^vignettes/fig_output$ ^doc$ -^Meta$ ^.*\.xlsx$ -^.*\.rds$ +^[^/]*\.rds$ ^\.github$ +^cran-comments.md$ diff --git a/.gitignore b/.gitignore index 773cc47b..3619e6c9 100644 --- a/.gitignore +++ b/.gitignore @@ -47,4 +47,5 @@ vignettes/**/*.pdf !/docs/* inst/doc doc +docs Meta diff --git a/DESCRIPTION b/DESCRIPTION index fc3801fa..4fbcbc04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: isoreader -Title: Read IRMS Data Files -Description: R interface to IRMS (isotope ratio mass spectrometry) file formats typically used in stable isotope geochemistry. +Title: Read Stable Isotope Data Files +Description: R interface to isotope ratio mass spectrometry file formats used in stable isotope geochemistry. Version: 1.2.3 Authors@R: person("Sebastian", "Kopf", email = "sebastian.kopf@colorado.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2044-0201")) URL: https://github.com/isoverse/isoreader diff --git a/R/aggregate_data.R b/R/aggregate_data.R index 69059fe2..212c3fd8 100644 --- a/R/aggregate_data.R +++ b/R/aggregate_data.R @@ -9,34 +9,34 @@ check_iso_file_param <- function(iso_file) { # Data summary information ===== #' Get data summary -#' -#' Summarize the data information from one or multiple iso files. +#' +#' Summarize the data information from one or multiple iso files. #' @inheritParams iso_read_files #' @param iso_files single iso file or collection of iso_file objects #' @return a \code{\link[tibble]{tibble}} that summarizes the data in the \code{iso_files} #' @export iso_get_data_summary <- function(iso_files, quiet = default(quiet)) { - + # global vars file_subpath <- file_path_ <- NULL - + iso_files <- iso_as_file_list(iso_files) - + if (!quiet) { - glue("Info: aggregating data summary from {length(iso_files)} data file(s)") %>% + glue("Info: aggregating data summary from {length(iso_files)} data file(s)") %>% message() } - + if (length(iso_files) == 0) return(tibble()) - + # aggregate all the info tibble( file_id = names(iso_files), file_path_ = map_chr( - iso_files, + iso_files, ~if (col_in_df(.x$file_info, "file_path")) { .x$file_info$file_path } else { NA_character_ }), file_subpath = map_chr( - iso_files, + iso_files, ~if (col_in_df(.x$file_info, "file_subpath")) { .x$file_info$file_subpath } else { NA_character_ }) ) %>% left_join(get_raw_data_info(iso_files), by = "file_id") %>% @@ -47,24 +47,24 @@ iso_get_data_summary <- function(iso_files, quiet = default(quiet)) { if (!iso_is_scan(iso_files)) left_join(., get_vendor_data_table_info(iso_files), by = "file_id") else . - } %>% + } %>% mutate(file_path = ifelse(!is.na(file_subpath), glue("{file_path_}|{file_subpath}"), file_path_)) %>% select(-file_path_, -file_subpath) } # summary of raw data info get_raw_data_info <- function(iso_files) { - + # make sure to convert to file list iso_files <- iso_as_file_list(iso_files) - + # make sure to not process empty list if (length(iso_files) == 0) return(tibble(file_id = character(), raw_data = character())) raw_data_not_read <- "raw data not read" - + # retrieve the raw data info - raw_data_sum <- + raw_data_sum <- tibble( file_id = names(iso_files), read_raw_data = map_lgl(iso_files, ~.x$read_options$raw_data), @@ -73,34 +73,34 @@ get_raw_data_info <- function(iso_files) { full_ions = map2_chr(.data$all_ions, .data$n_ions, ~if(.y > 0) { collapse(.x, sep = ", ") } else {""}), ions = .data$full_ions %>% str_replace_all("[^0-9,]", "") ) - + if (iso_is_continuous_flow(iso_files)) { - raw_data_sum <- raw_data_sum %>% + raw_data_sum <- raw_data_sum %>% mutate( n_tps = map_int(iso_files, ~nrow(.x$raw_data)), label = case_when( read_raw_data & stringr::str_detect(.data$full_ions, "[iIvV]C") ~ glue("{n_tps} time points, {n_ions} channels ({ions})"), - read_raw_data ~ glue("{n_tps} time points, {n_ions} ions ({ions})"), + read_raw_data ~ glue("{n_tps} time points, {n_ions} ions ({ions})"), TRUE ~ "raw data not read" ) ) } else if (iso_is_dual_inlet(iso_files)) { - raw_data_sum <- raw_data_sum %>% + raw_data_sum <- raw_data_sum %>% mutate( n_cycles = map_int(iso_files, ~as.integer(floor(nrow(.x$raw_data)/2))), label = case_when( read_raw_data & stringr::str_detect(full_ions, "[iIvV]C") ~ glue("{n_cycles} cycles, {n_ions} channels ({ions})"), - read_raw_data ~ glue("{n_cycles} cycles, {n_ions} ions ({ions})"), + read_raw_data ~ glue("{n_cycles} cycles, {n_ions} ions ({ions})"), TRUE ~ "raw data not read" ) ) } else if (iso_is_scan(iso_files)) { - raw_data_sum <- raw_data_sum %>% + raw_data_sum <- raw_data_sum %>% mutate( n_tps = map_int(iso_files, ~nrow(.x$raw_data)), label = case_when( read_raw_data & stringr::str_detect(full_ions, "[iIvV]C") ~ glue("{n_tps} measurements, {n_ions} channels ({ions})"), - read_raw_data ~ glue("{n_tps} measurements, {n_ions} ions ({ions})"), + read_raw_data ~ glue("{n_tps} measurements, {n_ions} ions ({ions})"), TRUE ~ "raw data not read" ) ) @@ -111,16 +111,16 @@ get_raw_data_info <- function(iso_files) { # should not get here glue("cannot process '{class(iso_files[[1]])[1]}' in get_raw_data_info") %>% stop(call. = FALSE) } - + return(dplyr::select(raw_data_sum, .data$file_id, raw_data = .data$label)) } # summary of file info get_file_info_info <- function(iso_files) { - + # make sure to convert to file list iso_files <- iso_as_file_list(iso_files) - + # make sure to not process empty list if (length(iso_files) == 0) { tibble(file_id = character(), file_info = character()) @@ -136,10 +136,10 @@ get_file_info_info <- function(iso_files) { # summary of method info get_method_info_info <- function(iso_files) { - + # make sure to convert to file list - iso_files <- iso_as_file_list(iso_files) - + iso_files <- iso_as_file_list(iso_files) + # make sure to not process empty list if (length(iso_files) == 0) { tibble(file_id = character(), method_info = character()) @@ -156,16 +156,16 @@ get_method_info_info <- function(iso_files) { has_standards ~ "standards", has_resistors ~ "resistors", TRUE ~ "no method info" - ) + ) ) %>% select(.data$file_id, .data$method_info) } - + } # summary of vendor data table get_vendor_data_table_info <- function(iso_files) { # make sure to convert to file list - iso_files <- iso_as_file_list(iso_files) + iso_files <- iso_as_file_list(iso_files) # make sure to not process empty list if (length(iso_files) == 0) { @@ -181,7 +181,7 @@ get_vendor_data_table_info <- function(iso_files) { !read_vendor_data_table ~ "vendor data table not read", .data$rows > 0 & .data$cols > 0 ~ sprintf("%d rows, %d columns", .data$rows, .data$cols), TRUE ~ "no vendor data table" - ) + ) ) %>% select(.data$file_id, .data$vendor_data_table) } } @@ -189,10 +189,10 @@ get_vendor_data_table_info <- function(iso_files) { # Specific data aggregation calls ===== #' DEPRECATED -#' +#' #' Please use \link{iso_get_all_data} instead. #' @param ... forwarded to \link{iso_get_all_data} -#' +#' #' @export iso_get_data <- function(...) { warning("'iso_get_data()' is deprecated in favor of the more descriptive 'iso_get_all_data()'. Please use 'iso_get_all_data()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE) @@ -200,9 +200,9 @@ iso_get_data <- function(...) { } #' Aggregate all isofiles data -#' +#' #' This function aggregates all isofiles data and returns it in a large data frame with nested columns for each type of information (file_info, raw_data, etc.). For targeted retrieval of specific data \code{\link{iso_get_raw_data}}, \code{\link{iso_get_file_info}}, \code{\link{iso_get_vendor_data_table}}, etc. are much faster and easier to work with. This function is primarily useful for downstream processing pipelines that want to carry all information along. To \code{\link[tidyr:nest]{unnest}} any of the specific data types (e.g. \code{raw_data}), make sure to filter first for the files that have this data type available (e.g. \code{filter(has_raw_data)}). Exclude specific types of information by setting its \code{include...} parameter to \code{NULL} (Note: for historical reasons, setting it to \code{FALSE} will also include the information). -#' +#' #' @inheritParams iso_get_raw_data #' @inheritParams iso_get_standards #' @inheritParams iso_get_vendor_data_table @@ -215,26 +215,26 @@ iso_get_data <- function(...) { #' @family data retrieval functions #' @export iso_get_all_data <- function( - iso_files, - include_file_info = everything(), include_raw_data = everything(), - include_standards = everything(), include_resistors = everything(), - include_vendor_data_table = everything(), + iso_files, + include_file_info = everything(), include_raw_data = everything(), + include_standards = everything(), include_resistors = everything(), + include_vendor_data_table = everything(), include_problems = NULL, - gather = FALSE, with_explicit_units = with_units, + gather = FALSE, with_explicit_units = with_units, with_units = FALSE, with_ratios = NULL, quiet = default(quiet)) { - + # info iso_files <- iso_as_file_list(iso_files) if (!quiet) sprintf("Info: aggregating all data from %d data file(s)", length(iso_files)) %>% message() - + # deprecated parameter if (!missing(with_ratios)) { warning("the 'with_ratios' parameter is deprecated, please use the column selection parameter 'include_standards' to explicitly include or exclude ratio columns", immediate. = TRUE, call. = FALSE) } - + # is di or cf? di_or_cf <- iso_is_continuous_flow(iso_files) || iso_is_dual_inlet(iso_files) - + # select expressions include_file_info_exp <- rlang::enexpr(include_file_info) include_file_info <- !rlang::as_label(include_file_info_exp) %in% c("NULL", "FALSE") @@ -248,18 +248,18 @@ iso_get_all_data <- function( include_vendor_data_table <- di_or_cf && !rlang::as_label(include_vendor_data_table_exp) %in% c("NULL", "FALSE") include_problems_exp <- rlang::enexpr(include_problems) include_problems <- !rlang::as_label(include_problems_exp) %in% c("NULL", "FALSE") - + # file class - file_class <- + file_class <- tibble( file_id = names(iso_files), file_type = map_chr(iso_files, ~class(.x)[1]) %>% unname() ) - + # all file data # note that this uses the iso_get_... functions to have some built in error # checking although a straight up map(~.x$...) would be faster - + # data merge function merge_with_file_class <- function(new_df, col_name) { nested_df <- nest(new_df[c(),], !!col_name := c(-.data$file_id)) @@ -268,46 +268,46 @@ iso_get_all_data <- function( nested_df <- bind_rows(nested_df, tibble(file_id = setdiff(file_class$file_id, nested_df$file_id), !!col_name := list(tibble()))) left_join(file_class, nested_df, by = "file_id") } - + # file info if (include_file_info) { - file_class <- iso_get_file_info(iso_files, select = !!include_file_info_exp, quiet = TRUE) %>% + file_class <- iso_get_file_info(iso_files, select = !!include_file_info_exp, quiet = TRUE) %>% merge_with_file_class("file_info") } - + # raw data if (include_raw_data) { - file_class <- iso_get_raw_data(iso_files, select = !!include_raw_data_exp, gather = gather, quiet = TRUE) %>% + file_class <- iso_get_raw_data(iso_files, select = !!include_raw_data_exp, gather = gather, quiet = TRUE) %>% merge_with_file_class("raw_data") } - + # standards if (include_standards) { - file_class <- iso_get_standards(iso_files, select = !!include_standards_exp, quiet = TRUE) %>% + file_class <- iso_get_standards(iso_files, select = !!include_standards_exp, quiet = TRUE) %>% merge_with_file_class("standards") } - + # resistors if (include_resistors) { - file_class <- iso_get_resistors(iso_files, select = !!include_resistors_exp, quiet = TRUE) %>% + file_class <- iso_get_resistors(iso_files, select = !!include_resistors_exp, quiet = TRUE) %>% merge_with_file_class("resistors") } - + # vendor data table (only cflow and dual inlet) if (include_vendor_data_table) { file_class <- iso_get_vendor_data_table( - iso_files, - with_explicit_units = with_explicit_units, - select = !!include_vendor_data_table_exp, quiet = TRUE) %>% + iso_files, + with_explicit_units = with_explicit_units, + select = !!include_vendor_data_table_exp, quiet = TRUE) %>% merge_with_file_class("vendor_data_table") } - + # problems if (include_problems) { - file_class <- iso_get_problems(iso_files, select = !!include_problems_exp) %>% + file_class <- iso_get_problems(iso_files, select = !!include_problems_exp) %>% merge_with_file_class("problems") } - + return(file_class) } @@ -317,17 +317,17 @@ iso_get_all_data <- function( #' #' @inheritParams iso_get_raw_data #' @inheritParams iso_select_file_info -#' @param select which columns to select - use \code{c(...)} to select multiple, supports all \link[dplyr]{select} syntax including renaming columns. File id is always included and cannot be renamed. +#' @param select which columns to select - use \code{c(...)} to select multiple, supports all \link[dplyr]{select} syntax including renaming columns. File id is always included and cannot be renamed. #' @param simplify if set to TRUE (the default), nested value columns in the file info will be unnested as long as they are compatible across file types. Note that file info entries with multiple values still remain nested multi-value (=list) columns even with \code{simplify=TRUE}. These can be unnested using \link[tidyr:nest]{unnest}. #' @family data retrieval functions -#' @note this function used to allow selecting/renaming different file_info_columns in different files to the same column. This was a significant speed impediment and only covered very rare use cases. It is still available in the related function \code{\link{iso_select_file_info}} with a special flag but is no longer the default and not incouraged for use in the frequently called \code{iso_get_file_info}. +#' @note this function used to allow selecting/renaming different file_info_columns in different files to the same column. This was a significant speed impediment and only covered very rare use cases. It is still available in the related function \code{\link{iso_select_file_info}} with a special flag but is no longer the default and not encouraged for use in the frequently called \code{iso_get_file_info}. #' @export iso_get_file_info <- function(iso_files, select = everything(), file_specific = FALSE, simplify = TRUE, quiet = default(quiet)) { - + # make sure it's an iso file list iso_files <- iso_as_file_list(iso_files) select_exp <- rlang::enexpr(select) - + if (!quiet) { glue::glue( "Info: aggregating file info from {length(iso_files)} data file(s)", @@ -335,38 +335,38 @@ iso_get_file_info <- function(iso_files, select = everything(), file_specific = message() } check_read_options(iso_files, "file_info") - + # retrieve info - file_info <- iso_files %>% - { + file_info <- iso_files %>% + { if (rlang::as_label(select_exp) != "everything()") # select columns - iso_select_file_info(., !!select_exp, file_specific = file_specific, quiet = TRUE) + iso_select_file_info(., !!select_exp, file_specific = file_specific, quiet = TRUE) else . # much faster (if selecting everything) - } %>% + } %>% # retrieve file info - map(~.x$file_info) %>% - # combine in data frame (use safe bind to make sure different data column + map(~.x$file_info) %>% + # combine in data frame (use safe bind to make sure different data column # types of the same name don't trip up the combination) - safe_bind_rows() - + safe_bind_rows() + # check if empty if(nrow(file_info) == 0) return(tibble(file_id = character(0))) - + # simplify by disaggregated columns if (simplify) # unnest aggregated columns file_info <- unnest_aggregated_data_frame(file_info) - + return(file_info) } # note: consider providing a separate iso_gather_raw_data method that works just on the raw data table and could be used in other contexts #' Aggregate raw data -#' +#' #' Aggregate the raw ion data from the provided iso_files. Can aggregate either in a wide table (for easy overview) or a gathered long table (for plotting and further data processing). The raw data is only available if the iso_files were read with parameter \code{read_raw_data=TRUE}. -#' +#' #' @inheritParams iso_read_files #' @param iso_files collection of iso_file objects #' @param select which data columns to select - use \code{c(...)} to select multiple, supports all \link[dplyr]{select} syntax. By default, all columns are selected. @@ -376,57 +376,57 @@ iso_get_file_info <- function(iso_files, select = everything(), file_specific = #' @family data retrieval functions #' @export iso_get_raw_data <- function(iso_files, select = everything(), gather = FALSE, include_file_info = NULL, quiet = default(quiet)) { - + # global raw_data <- NULL - + iso_files <- iso_as_file_list(iso_files) select_exp <- rlang::enexpr(select) include_file_info_quo <- enquo(include_file_info) - if (!quiet) { + if (!quiet) { glue::glue( "Info: aggregating raw data from {length(iso_files)} data file(s)", "{get_info_message_concat(select_exp, prefix = ', selecting data columns ', empty = 'everything()')}", "{get_info_message_concat(include_file_info_quo, prefix = ', including file info ')}") %>% message() } check_read_options(iso_files, "raw_data") - + # check whether there are any if (length(iso_files) == 0) return(tibble()) - + # fetch data data <- # fetch data tibble( file_id = names(iso_files), raw_data = map(iso_files, ~.x$raw_data) - ) %>% + ) %>% # make sure to include only existing raw data - filter(!map_lgl(raw_data, is.null)) %>% + filter(!map_lgl(raw_data, is.null)) %>% # unnest unnest(raw_data) - + # check for rows if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) - + # selecting columns select_cols <- get_column_names(data, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select - if (!"file_id" %in% select_cols) + if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file id always included - data <- data %>% + data <- data %>% # focus on selected columns only (also takes care of the rename) - dplyr::select(!!!select_cols) - + dplyr::select(!!!select_cols) + # if gathering if (gather) { data_cols_re <- "^([^0-9]+)(\\d+/?\\d*)(\\.(.+))?$" gather_cols <- stringr::str_subset(names(data), data_cols_re) - data <- data %>% + data <- data %>% # gather all masses and ratios - tidyr::pivot_longer(gather_cols, names_to = "column", values_to = "value", values_drop_na = TRUE) %>% + tidyr::pivot_longer(gather_cols, names_to = "column", values_to = "value", values_drop_na = TRUE) %>% # extract unit information - extract(.data$column, into = c("prefix", "data", "extra_parens", "units"), regex = data_cols_re) %>% - dplyr::select(-.data$extra_parens) %>% + extract(.data$column, into = c("prefix", "data", "extra_parens", "units"), regex = data_cols_re) %>% + dplyr::select(-.data$extra_parens) %>% mutate( # units cleanup units = ifelse(is.na(units) | nchar(units) == 0, NA_character_, units), @@ -442,10 +442,10 @@ iso_get_raw_data <- function(iso_files, select = everything(), gather = FALSE, i .data$category == "delta" ~ paste0("d", .data$data), TRUE ~ paste0(.data$prefix, .data$data) ) - ) %>% + ) %>% dplyr::select(-.data$prefix) - } - + } + # if file info if (!quo_is_null(include_file_info_quo)) { info <- iso_get_file_info(iso_files, select = !!include_file_info_quo, quiet = TRUE) @@ -456,71 +456,71 @@ iso_get_raw_data <- function(iso_files, select = everything(), gather = FALSE, i #' Aggregate background data -#' +#' #' Aggregate the background data from the provided iso_files. Can aggregate either in a wide table (for easy overview) or a gathered long table (for plotting and further data processing). The background data is only available if the iso_files were read with parameter \code{read_raw_data=TRUE}. -#' +#' #' @inheritParams iso_get_raw_data #' @family data retrieval functions #' @export iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE, include_file_info = NULL, quiet = default(quiet)) { - + # global vars bgrd_data <- NULL - + iso_files <- iso_as_file_list(iso_files) if (!all(map_lgl(iso_files, iso_is_dual_inlet))) stop("background data is only available in dual inlet data files", call. = FALSE) select_exp <- rlang::enexpr(select) include_file_info_quo <- enquo(include_file_info) - if (!quiet) { + if (!quiet) { glue( "Info: aggregating background data from {length(iso_files)} data file(s)", "{get_info_message_concat(select_exp, prefix = ', selecting data columns ', empty = 'everything()')}", "{get_info_message_concat(include_file_info_quo, prefix = ', including file info ')}") %>% message() } check_read_options(iso_files, "raw_data") - + # check whether there are any if (length(iso_files) == 0) return(tibble()) - + # fetch data data <- # fetch data tibble( file_id = names(iso_files), bgrd_data = map(iso_files, ~.x$bgrd_data) - ) %>% + ) %>% # make sure to include only existing raw data - filter(!map_lgl(bgrd_data, is.null)) %>% + filter(!map_lgl(bgrd_data, is.null)) %>% # unnest unnest(bgrd_data) - + # check for rows if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) - + # selecting columns select_cols <- get_column_names(data, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select - if (!"file_id" %in% select_cols) + if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file info always included - data <- data %>% + data <- data %>% # focus on selected columns only (also takes care of the rename) - dplyr::select(!!!select_cols) - + dplyr::select(!!!select_cols) + # if gathering if (gather) { column <- value <- extra_parens <- category <- NULL # global vars masses_ratios_re <- "^([vir])(\\d+/?\\d*)(\\.(.+))?$" - data <- data %>% + data <- data %>% # gather all masses and ratios - gather(column, value, matches(masses_ratios_re)) %>% + gather(column, value, matches(masses_ratios_re)) %>% # extract unit information - extract(.data$column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) %>% - dplyr::select(-.data$extra_parens) %>% + extract(.data$column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) %>% + dplyr::select(-.data$extra_parens) %>% # remove unknown data - filter(!is.na(.data$value)) %>% + filter(!is.na(.data$value)) %>% # assign category mutate(category = ifelse(.data$category == "r", "ratio", "mass")) - } - + } + # if file info if (!quo_is_null(include_file_info_quo)) { info <- iso_get_file_info(iso_files, select = !!include_file_info_quo, quiet = TRUE) @@ -530,10 +530,10 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE, } #' DEPRECATED -#' +#' #' Please use \link{iso_get_standards} instead. #' @param ... forwarded to \link{iso_get_standards} -#' +#' #' @export iso_get_standards_info <- function(...) { warning("'iso_get_standards_info()' is deprecated in favor of the simpler 'iso_get_standards()'. Please use 'iso_get_standards()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE) @@ -542,39 +542,39 @@ iso_get_standards_info <- function(...) { #' Aggregate standards from methods info #' -#' Aggregates the isotopic standard information recovered from the provided iso_files. Can aggregate just the standards' delta values or combine the delta values with the recovered ratios (if any). Use paramter \code{select} to exclude/include the ratios. All standards info is only available if the iso_files were read with parameter \code{read_method_info=TRUE}. +#' Aggregates the isotopic standard information recovered from the provided iso_files. Can aggregate just the standards' delta values or combine the delta values with the recovered ratios (if any). Use parameter \code{select} to exclude/include the ratios. All standards info is only available if the iso_files were read with parameter \code{read_method_info=TRUE}. #' #' @inheritParams iso_get_raw_data #' @param select which data columns to select - use \code{c(...)} to select multiple, supports all \link[dplyr]{select} syntax. By default, everything is included (both standards and ratios). To omit the ratios, change to \code{select = file_id:reference}. -#' @param with_ratios deprecated, please use the \code{select} paramter to explicitly include or exclude ratio columns +#' @param with_ratios deprecated, please use the \code{select} parameter to explicitly include or exclude ratio columns #' @family data retrieval functions #' @export iso_get_standards <- function(iso_files, select = everything(), include_file_info = NULL, with_ratios = NULL, quiet = default(quiet)) { - + iso_files <- iso_as_file_list(iso_files) - + # safety checks if (iso_is_scan(iso_files)) stop("scan files don't have standards information", call. = FALSE) else if (!iso_is_continuous_flow(iso_files) && !iso_is_dual_inlet(iso_files)) stop("only dual inlet and continuous flow files can have standards information", call. = FALSE) - + include_file_info_quo <- enquo(include_file_info) - if (!quiet) { + if (!quiet) { sprintf("Info: aggregating standards info from %d data file(s)%s", length(iso_files), get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) %>% message() } - + # deprecated parameter if (!missing(with_ratios)) { warning("the 'with_ratios' parameter is deprecated, please use the column selection parameter 'select' to explicitly include or exclude ratio columns", immediate. = TRUE, call. = FALSE) } - + check_read_options(iso_files, "method_info") - + # check whether there are any if (length(iso_files) == 0) return(tibble()) - + # fetch data data <- # fetch data @@ -582,33 +582,33 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf file_id = names(iso_files), standards = map(iso_files, ~.x$method_info$standard), ref_ratios = map(iso_files, ~.x$method_info$reference_ratios) - ) - + ) + # check for rows if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) - + # merge info - standards <- data %>% - dplyr::select(.data$file_id, standards) %>% - dplyr::filter(!map_lgl(.data$standards, is.null)) %>% unnest(.data$standards) - ref_ratios <- data %>% dplyr::select(.data$file_id, .data$ref_ratios) %>% - dplyr::filter(!map_lgl(.data$ref_ratios, is.null)) %>% - tidyr::unnest(.data$ref_ratios) + standards <- data %>% + dplyr::select(.data$file_id, standards) %>% + dplyr::filter(!map_lgl(.data$standards, is.null)) %>% unnest(.data$standards) + ref_ratios <- data %>% dplyr::select(.data$file_id, .data$ref_ratios) %>% + dplyr::filter(!map_lgl(.data$ref_ratios, is.null)) %>% + tidyr::unnest(.data$ref_ratios) if ("reference" %in% names(ref_ratios)) data <- dplyr::left_join(standards, ref_ratios, by = c("file_id", "reference")) else data <- standards - + # select columns (only warn if it's not the default and cols don't exist) select_exp <- rlang::enexpr(select) warn <- rlang::as_label(select_exp) != rlang::as_label(formals(iso_get_standards)$select) select_cols <- get_column_names(data, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE, warn = warn)$select - if (!"file_id" %in% select_cols) + if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file info always included - + # focus on selected columns only (also takes care of the rename) data <- dplyr::select(data, !!!select_cols) %>% unique() - + # if file info if (!quo_is_null(include_file_info_quo)) { info <- iso_get_file_info(iso_files, select = !!include_file_info_quo, quiet = TRUE) @@ -619,11 +619,11 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf #' DEPRECATED -#' +#' #' Please use \link{iso_get_resistors} instead. -#' +#' #' @param ... forwarded to \link{iso_get_resistors} -#' +#' #' @export iso_get_resistors_info <- function(...) { warning("'iso_get_resistors_info()' is deprecated in favor of the simpler 'iso_get_resistors()'. Please use 'iso_get_resistors()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE) @@ -632,51 +632,51 @@ iso_get_resistors_info <- function(...) { #' Aggregate resistors from methods info #' -#' Aggregates the resistor information recovered from the provided iso_files. This information is only available if the iso_files were read with parameter \code{read_method_info=TRUE} and only linked to specific masses if the iso_files were additionally read with parametr \code{read_raw_data=TRUE}. +#' Aggregates the resistor information recovered from the provided iso_files. This information is only available if the iso_files were read with parameter \code{read_method_info=TRUE} and only linked to specific masses if the iso_files were additionally read with parameter \code{read_raw_data=TRUE}. #' #' @inheritParams iso_get_raw_data #' @family data retrieval functions #' @export iso_get_resistors <- function(iso_files, select = everything(), include_file_info = NULL, quiet = default(quiet)) { - + # global vars resistors <- NULL - + iso_files <- iso_as_file_list(iso_files) include_file_info_quo <- enquo(include_file_info) - if (!quiet) { + if (!quiet) { sprintf("Info: aggregating resistors info from %d data file(s)%s", length(iso_files), get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) %>% message() } - + check_read_options(iso_files, "method_info") - + # check whether there are any files if (length(iso_files) == 0) return(tibble()) - + # fetch data data <- # fetch data tibble( file_id = names(iso_files), resistors = map(iso_files, ~.x$method_info$resistors) - ) %>% + ) %>% # make sure to include only existing raw data - dplyr::filter(!map_lgl(resistors, is.null)) %>% + dplyr::filter(!map_lgl(resistors, is.null)) %>% # unnest tidyr::unnest(resistors) # check for rows if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) - + # select columns select_cols <- get_column_names(data, select = enquo(select), n_reqs = list(select = "*"), cols_must_exist = FALSE)$select - if (!"file_id" %in% select_cols) + if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file info always included - + # focus on selected columns only (also takes care of the rename) - data <- dplyr::select(data, !!!select_cols) - + data <- dplyr::select(data, !!!select_cols) + # if file info if (!quo_is_null(include_file_info_quo)) { info <- iso_get_file_info(iso_files, select = !!include_file_info_quo, quiet = TRUE) @@ -686,9 +686,9 @@ iso_get_resistors <- function(iso_files, select = everything(), include_file_in } #' Aggregate vendor computed table data -#' +#' #' Aggregate data from the vendor-computed data table. This information is only available if the iso_files were read with parameter \code{read_vendor_data_table=TRUE}. -#' +#' #' @inheritParams iso_get_raw_data #' @inheritParams iso_get_file_info #' @param with_units this parameter has been DEPRECATED with the introduction of unit-data types (see \code{\link{iso_double_with_units}}) and will be removed in future versions of isoreader. Please use \code{with_explicit_units} instead if you really want columns to have units explicitly in the column name. Alternatively, consider working with the new implicit unit system and convert vendor data tables as needed with \code{\link{iso_make_units_explicit}} and \code{\link{iso_make_units_implicit}}. @@ -696,80 +696,80 @@ iso_get_resistors <- function(iso_files, select = everything(), include_file_in #' @family data retrieval functions #' @export iso_get_vendor_data_table <- function( - iso_files, with_units = FALSE, - select = everything(), include_file_info = NULL, - with_explicit_units = with_units, + iso_files, with_units = FALSE, + select = everything(), include_file_info = NULL, + with_explicit_units = with_units, quiet = default(quiet)) { - + # globals dt <- has_units <- NULL iso_files <- iso_as_file_list(iso_files) - + # safety checks if (iso_is_scan(iso_files)) stop("scan files don't have vendor data tables", call. = FALSE) else if (!iso_is_continuous_flow(iso_files) && !iso_is_dual_inlet(iso_files)) stop("only dual inlet and continuous flow files can have vendor data tables", call. = FALSE) - + # process include_file_info_quo <- enquo(include_file_info) - if (!quiet) { - sprintf("Info: aggregating vendor data table%s from %d data file(s)%s", + if (!quiet) { + sprintf("Info: aggregating vendor data table%s from %d data file(s)%s", if (with_explicit_units) " with explicit units" else "", length(iso_files), get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) %>% message() } check_read_options(iso_files, "vendor_data_table") - + # units if (!missing(with_units)) { warning( "The 'use_units' parameter has been DEPRECATED with the introduction of unit-data types (see ?iso_double_with_units) and will be removed in future versions of isoreader. Please use parameter 'with_explicit_units' instead if you really want columns to have units explicitly in the column name. Alternatively, simply remove all units with ?iso_strip_units or consider working with the new implicit unit system and convert vendor data tables as needed with ?iso_make_units_explicit", call. = FALSE, immediate. = TRUE) } - + # check whether there are any files if (length(iso_files) == 0) return(tibble()) - + # get vendor data column <- units <- NULL # global vars - + # fetch data vendor_data_table <- # fetch data tibble( file_id = names(iso_files), dt = map(iso_files, ~.x$vendor_data_table) - ) %>% + ) %>% # make sure to include only existing data filter(map_lgl(dt, ~!is.null(.x) & nrow(.x) > 0)) - + # check for any rows if (nrow(vendor_data_table) == 0) return(dplyr::select(vendor_data_table, .data$file_id)) - + # make units explicit if wanted if (with_explicit_units) { - vendor_data_table <- vendor_data_table %>% + vendor_data_table <- vendor_data_table %>% mutate(dt = map(dt, iso_make_units_explicit)) } - + # unnest vendor_data_table <- dplyr::select(vendor_data_table, .data$file_id, .data$dt) %>% unnest(.data$dt) # get include information select_cols <- get_column_names(vendor_data_table, select = enquo(select), n_reqs = list(select = "*"), cols_must_exist = FALSE)$select - if (!"file_id" %in% select_cols) + if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file info always included - + # focus on selected columns only (also takes care of the rename) - vendor_data_table <- dplyr::select(vendor_data_table, !!!select_cols) - + vendor_data_table <- dplyr::select(vendor_data_table, !!!select_cols) + # include file info if (!quo_is_null(include_file_info_quo)) { info <- iso_get_file_info(iso_files, select = !!include_file_info_quo, quiet = TRUE) vendor_data_table <- right_join(info, vendor_data_table, by = "file_id") } - + return(vendor_data_table) } @@ -778,9 +778,9 @@ check_read_options <- function(iso_files, option) { iso_files <- iso_as_file_list(iso_files) option_values <- map(iso_files, "read_options") %>% map_lgl(option) if (!all(option_values)) { - warning(sum(!option_values), "/", length(iso_files), - " files were read without extracting the ", str_replace_all(option, "_", " "), - " (parameter '", str_c("read_", option), + warning(sum(!option_values), "/", length(iso_files), + " files were read without extracting the ", str_replace_all(option, "_", " "), + " (parameter '", str_c("read_", option), "=FALSE') and will have missing values", call. = FALSE, immediate. = TRUE) } @@ -791,29 +791,29 @@ check_read_options <- function(iso_files, option) { # helper function to convert file_path into rooted path for unrooted legacy files convert_file_path_to_rooted <- function(iso_files, root = ".", ...) { - + stopifnot(iso_is_file_list(iso_files)) - + # the ones needing updating needs_conversion <- map_lgl(iso_files, ~is.null(.x$file_info[["file_root"]]) || is.na(.x$file_info$file_root)) - + if (any(needs_conversion)) { - + # get paths - paths <- - map_chr(iso_files[needs_conversion], ~.x$file_info$file_path) %>% + paths <- + map_chr(iso_files[needs_conversion], ~.x$file_info$file_path) %>% iso_root_paths(root = root, check_existence = FALSE) - + # prepare file info updates file_info_update <- with(paths, map2(root, path, ~list(file_info = list(file_root = .x, file_path = .y)))) names(file_info_update) <- names(iso_files[needs_conversion]) - + # make sure to keep format - iso_files <- as.list(iso_files) %>% - modifyList(file_info_update) %>% + iso_files <- as.list(iso_files) %>% + modifyList(file_info_update) %>% iso_as_file_list(...) } - + return(iso_files) } @@ -853,16 +853,16 @@ unnest_aggregated_data_frame <- function(df) { # global vars column <- min_length <- main_class <- has_identical_class <- max_length <- identical_class <- is_missing <- NULL - + # safety stopifnot(is.data.frame(df)) if (nrow(df) == 0) return(df) - + # NA defaults NA_defaults <- list(character = NA_character_, numeric = NA_real_, integer = NA_integer_, logical = NA) - + # get information about the data frame columns - cols <- + cols <- tibble( column = names(df), id = 1:length(column), @@ -877,69 +877,69 @@ unnest_aggregated_data_frame <- function(df) { unnest_single_value = max_length == 1 & has_identical_class, renest_missing_value = min_length == 0 & max_length > 1 & has_identical_class & identical_class %in% names(NA_defaults) ) - + # warning message about inconsistent data columns with multiple data types if (any(!cols$has_identical_class)) { glue("encountered different value types within the same column(s), they cannot be automatically unnested: ", - "'{collapse(filter(cols, !has_identical_class)$column, sep = \"', '\")}'") %>% + "'{collapse(filter(cols, !has_identical_class)$column, sep = \"', '\")}'") %>% warning(immediate. = TRUE, call. = FALSE) } - + # unnest data for (i in 1:nrow(cols)) { - + if (cols$unnest_single_value[i]) { # unnest single values if (cols$identical_class[i] == "character") - df <- mutate(df, !!cols$column[i] := - map2_chr(!!sym(cols$column[i]), cols$is_missing[[i]], + df <- mutate(df, !!cols$column[i] := + map2_chr(!!sym(cols$column[i]), cols$is_missing[[i]], ~if(.y) { NA_character_ } else {.x[1]})) else if(cols$identical_class[i] == "numeric") - df <- mutate(df, !!cols$column[i] := - map2_dbl(!!sym(cols$column[i]), cols$is_missing[[i]], + df <- mutate(df, !!cols$column[i] := + map2_dbl(!!sym(cols$column[i]), cols$is_missing[[i]], ~if(.y) { NA_real_ } else {.x[1]})) else if (cols$identical_class[i] == "logical") - df <- mutate(df, !!cols$column[i] := - map2_lgl(!!sym(cols$column[i]), cols$is_missing[[i]], + df <- mutate(df, !!cols$column[i] := + map2_lgl(!!sym(cols$column[i]), cols$is_missing[[i]], ~if(.y) { NA } else {.x[1]})) else if (cols$identical_class[i] == "integer") - df <- mutate(df, !!cols$column[i] := - map2_int(!!sym(cols$column[i]), cols$is_missing[[i]], + df <- mutate(df, !!cols$column[i] := + map2_int(!!sym(cols$column[i]), cols$is_missing[[i]], ~if(.y) { NA_integer_ } else {.x[1]})) - else if (cols$identical_class[i] == "POSIXct") - df <- mutate(df, !!cols$column[i] := + else if (cols$identical_class[i] == "POSIXct") + df <- mutate(df, !!cols$column[i] := # have to switch to int b/c there is no map2_datetime yet - map2_int(!!sym(cols$column[i]), cols$is_missing[[i]], + map2_int(!!sym(cols$column[i]), cols$is_missing[[i]], # NA_integer_ is okay here because of the as_datetime wrapper afterwards - ~if (.y) { NA_integer_ } else { as.integer(.x[1]) }) %>% + ~if (.y) { NA_integer_ } else { as.integer(.x[1]) }) %>% as_datetime(tz = Sys.timezone())) - else if (cols$identical_class[i] == "iso_double_with_units") - df <- mutate(df, !!cols$column[i] := + else if (cols$identical_class[i] == "iso_double_with_units") + df <- mutate(df, !!cols$column[i] := do.call( - vctrs::vec_c, - map2(!!sym(cols$column[i]), cols$is_missing[[i]], + vctrs::vec_c, + map2(!!sym(cols$column[i]), cols$is_missing[[i]], ~if(.y) { NA } else {.x[1]}) ) ) else { - glue("cannot unnest file info column {cols$column[i]}, encountered unusual class {cols$identical_class[i]}") %>% + glue("cannot unnest file info column {cols$column[i]}, encountered unusual class {cols$identical_class[i]}") %>% warning(immediate. = TRUE, call. = FALSE) } } else if (cols$renest_missing_value[i]) { # replace NA values in columns that have too many values for unnesting (so unnesting is easy for the user) - df <- mutate(df, !!cols$column[i] := - map2(!!sym(cols$column[i]), cols$is_missing[[i]], + df <- mutate(df, !!cols$column[i] := + map2(!!sym(cols$column[i]), cols$is_missing[[i]], ~if(.y) { NA_defaults[[cols$identical_class[i]]] } else {.x})) } } - + return(df) } # helper function to concatenate list columns for export file formats that cannot handle the embedded data collapse_list_columns <- function(df, sep = ", ") { collapse_function <- function(x) collapse(x, sep = sep) - df %>% + df %>% mutate_if(.predicate = is.list, .funs = map_chr, collapse_function) } diff --git a/R/cleanup.R b/R/cleanup.R index 68a5822f..aa849d53 100644 --- a/R/cleanup.R +++ b/R/cleanup.R @@ -17,30 +17,30 @@ readr::parse_double readr::parse_datetime #' Overview of text data extraction functions -#' -#' The following functions are intened to make it easy to extract relevant information from textual data. -#' These functions are primarily intended for use in \code{\link{iso_mutate_file_info}} and inside the filtering conditions passed to \code{\link{iso_filter_files}}. However, they can of course also be used stand-alone and in regular \code{\link[dplyr]{mutate}} or \code{\link[dplyr]{filter}} calls on the data frames returned by the data retrievel functions (\code{\link{iso_get_raw_data}}, \code{\link{iso_get_file_info}}, \code{\link{iso_get_vendor_data_table}}, etc.). Not that all the \code{parse_} functions are used in \code{\link{iso_parse_file_info}} for easy type conversions. -#' +#' +#' The following functions are intended to make it easy to extract relevant information from textual data. +#' These functions are primarily intended for use in \code{\link{iso_mutate_file_info}} and inside the filtering conditions passed to \code{\link{iso_filter_files}}. However, they can of course also be used stand-alone and in regular \code{\link[dplyr]{mutate}} or \code{\link[dplyr]{filter}} calls on the data frames returned by the data retrieval functions (\code{\link{iso_get_raw_data}}, \code{\link{iso_get_file_info}}, \code{\link{iso_get_vendor_data_table}}, etc.). Not that all the \code{parse_} functions are used in \code{\link{iso_parse_file_info}} for easy type conversions. +#' #' For simultaneous extraction of pure text data into multiple columns, please see the \code{\link[tidyr]{extract}} function from the \link{tidyr} package. -#' +#' #' \itemize{ -#' \item \code{\link{extract_substring}} is a generic convience function to extract parts of textual data (based on regular expression matches). +#' \item \code{\link{extract_substring}} is a generic convenience function to extract parts of textual data (based on regular expression matches). #' Can be used in combination with the parsing functions to turn extracted substrings into numerical or logical data. #' -#' \item \code{\link{extract_word}} is a more specific convenience function to extract the 1st/2nd/3rd word from textual data. -#' -#' \item \code{\link[readr:parse_atomic]{parse_number}} is a convenience function to extract a number even if it is surrouded by text (re-exported from the \link{readr} package). -#' -#' \item \code{\link[readr:parse_atomic]{parse_double}} parses text that holds double (decimal) numerical values without any extraneous text around - +#' \item \code{\link{extract_word}} is a more specific convenience function to extract the 1st/2nd/3rd word from textual data. +#' +#' \item \code{\link[readr:parse_atomic]{parse_number}} is a convenience function to extract a number even if it is surrounded by text (re-exported from the \link{readr} package). +#' +#' \item \code{\link[readr:parse_atomic]{parse_double}} parses text that holds double (decimal) numerical values without any extraneous text around - #' use \code{\link[readr:parse_atomic]{parse_number}} instead if this is not the case (re-exported from the \link{readr} package) -#' -#' \item \code{\link[readr:parse_atomic]{parse_integer}} parses text that holds integer (whole number) numerical values without any extraneous text around - +#' +#' \item \code{\link[readr:parse_atomic]{parse_integer}} parses text that holds integer (whole number) numerical values without any extraneous text around - #' use \code{\link[readr:parse_atomic]{parse_number}} instead if this is not the case (re-exported from the \link{readr} package) -#' +#' #' \item \code{\link[readr:parse_atomic]{parse_logical}} parses text that holds logical (boolean, i.e. TRUE/FALSE) values (re-exported from the \link{readr} package) -#' +#' #' \item \code{\link[readr:parse_atomic]{parse_datetime}} parses text that holds date and time information (re-exported from the \link{readr} package) -#' +#' #' } #' @name extract_data #' @family data extraction functions @@ -48,29 +48,29 @@ NULL #' Extract a substring from text -#' -#' This is a convenience function to capture substrings from textual data. +#' +#' This is a convenience function to capture substrings from textual data. #' Uses \code{\link[stringr:str_match]{str_match_all}} internally but instead of returning everything, always returns only one single part of the match, depending on parameters \code{capture_n} and \code{capture_group}. -#' +#' #' @param string string to extract #' @param pattern regular expression pattern to search for #' @param capture_n within each string, which match of the \code{pattern} should be extracted? e.g. if the pattern searches for words, should the first, second or third word be captured? -#' @param capture_bracket for the captured match, which capture group should be extracted? i.e. which parentheses-enclosed segment of the \code{pattern}? -#' by default captures the whole pattern (\code{capture_bracket = 0}). +#' @param capture_bracket for the captured match, which capture group should be extracted? i.e. which parentheses-enclosed segment of the \code{pattern}? +#' by default captures the whole pattern (\code{capture_bracket = 0}). #' @param missing what to replace missing values with? Note that values can be missing because there are not enough captured matches or because the actual capture_bracket is empty. #' @return character vector of same length as \code{string} with the extracted substrings #' @family data extraction functions #' @export extract_substring <- function(string, pattern, capture_n = 1, capture_bracket = 0, missing = NA_character_) { - + # safety checks if (missing(string)) stop("no string supplied", call. = FALSE) if (missing(pattern)) stop("no extraction pattern supplied", call. = FALSE) if (length(string) == 0) return(c()) - + # find matches matches <- str_match_all(string, pattern) - + # safety checks on capture backets (are there enough?) if (ncol(matches[[1]]) < (capture_bracket+1)) stop(glue("regexp capture group {capture_bracket} requested but only {ncol(matches[[1]])-1} groups captured"), call. = FALSE) @@ -82,10 +82,10 @@ extract_substring <- function(string, pattern, capture_n = 1, capture_bracket = } #' Extract words from text -#' -#' This extracts words from text, by default looks for continuous sequences of numbers and/or letters. +#' +#' This extracts words from text, by default looks for continuous sequences of numbers and/or letters. #' Can adjust whether characters such as "_", "-", " ", and "." should be counted as part of a word or separate them and whether numbers should be included. -#' +#' #' @inheritParams extract_substring #' @param capture_n which word to extract? 1st, 2nd, 3rd? #' @param include_numbers whether to include numbers (0-9) as part of the word (if FALSE, numbers will work as a word separator) @@ -95,7 +95,7 @@ extract_substring <- function(string, pattern, capture_n = 1, capture_bracket = #' @param include_colon whether to include the colon character (.) as part of a word (if FALSE, it will work as a word separator) #' @family data extraction functions #' @examples -#' x_text <- extract_word(c("sample number16.2", "sample number7b"), +#' x_text <- extract_word(c("sample number16.2", "sample number7b"), #' capture_n = 2, include_colon = TRUE) #' # "number16.2" "number7b" #' x_num <- parse_number(x_text) diff --git a/R/export.R b/R/export.R index e81e989c..47ccce3b 100644 --- a/R/export.R +++ b/R/export.R @@ -1,9 +1,9 @@ ## Export functions ======= #' Export data to R Data Archive (.rda) (deprecated) -#' +#' #' This function is deprecated. Please use \code{\link{iso_save}} instead to save collections of isofiles. -#' +#' #' @inheritParams iso_get_raw_data #' @param filepath the path (folder and filename) to the export file. The correct file extension is automatically added if not already in the filename, i.e. filename can be provided with or without extension. #' @family export functions @@ -18,9 +18,9 @@ iso_export_to_rda <- function(iso_files, filepath, quiet = default(quiet)) { } #' Export data to Excel -#' -#' This function exports the passed in iso_files to Excel. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate tabs within the excel file. Use the various \code{include_...} parameters to specifiy what information to include. Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in the excel export. -#' +#' +#' This function exports the passed in iso_files to Excel. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate tabs within the excel file. Use the various \code{include_...} parameters to specify what information to include. Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in the excel export. +#' #' @inheritParams iso_save #' @inheritParams iso_get_all_data #' @param include_method_info deprecated in favor of the more specific include_standards and include_resistors @@ -28,23 +28,23 @@ iso_export_to_rda <- function(iso_files, filepath, quiet = default(quiet)) { #' @return returns the iso_files object invisibly for use in pipelines #' @export iso_export_to_excel <- function( - iso_files, filepath, - include_file_info = everything(), include_raw_data = everything(), - include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info), - include_vendor_data_table = everything(), include_problems = everything(), + iso_files, filepath, + include_file_info = everything(), include_raw_data = everything(), + include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info), + include_vendor_data_table = everything(), include_problems = everything(), with_explicit_units = FALSE, include_method_info = everything(), with_ratios = NULL, quiet = default(quiet)) { - + # safety checks if(!iso_is_object(iso_files)) stop("can only export iso files or lists of iso files", call. = FALSE) export_iso_files <- iso_as_file_list(iso_files) filepath <- get_excel_export_filepath(export_iso_files, filepath) - + # info message if (!quiet) { - sprintf("Info: exporting data from %d iso_files into Excel '%s'", length(export_iso_files), + sprintf("Info: exporting data from %d iso_files into Excel '%s'", length(export_iso_files), str_replace(filepath, "^\\.(/|\\\\)", "")) %>% message() } @@ -52,12 +52,12 @@ iso_export_to_excel <- function( if (!missing(include_method_info)) { warning("the 'include_method_info' parameter was deprecated in favor of the more specific 'include_resistors' and 'include_standards' parameters. Please use those directly instead in the future.", immediate. = TRUE, call. = FALSE) } - + # deprecated parameter if (!missing(with_ratios)) { warning("the 'with_ratios' parameter is deprecated, please use the column selection parameter 'include_standards' to explicitly include or exclude ratio columns", immediate. = TRUE, call. = FALSE) } - + # get all data all_data <- iso_get_all_data( export_iso_files, @@ -70,52 +70,52 @@ iso_export_to_excel <- function( with_explicit_units = with_explicit_units, quiet = FALSE ) - + # make excel workbook wb <- createWorkbook() - + # file info if ("file_info" %in% names(all_data)) { # note: collapse_list_columns takes care of nested vectors, they get concatenated with ', ' - file_info <- - all_data %>% select(.data$file_id, .data$file_info) %>% - unnest(.data$file_info) %>% + file_info <- + all_data %>% select(.data$file_id, .data$file_info) %>% + unnest(.data$file_info) %>% collapse_list_columns() add_excel_sheet(wb, "file info", file_info) } - + # raw data if ("raw_data" %in% names(all_data)) { raw_data <- all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) add_excel_sheet(wb, "raw data", raw_data) } - + # standards if ("standards" %in% names(all_data)) { standards <- all_data %>% select(.data$file_id, standards) %>% unnest(standards) add_excel_sheet(wb, "standards", standards) - } - + } + # resistors if ("resistors" %in% names(all_data)) { resistors <- all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) add_excel_sheet(wb, "resistors", resistors) - } - + } + # vendor data table if ("vendor_data_table" %in% names(all_data)) { - vendor_data <- all_data %>% select(.data$file_id, .data$vendor_data_table) %>% + vendor_data <- all_data %>% select(.data$file_id, .data$vendor_data_table) %>% unnest(.data$vendor_data_table) %>% iso_strip_units() add_excel_sheet(wb, "vendor data table", vendor_data) } - + # problems if ("problems" %in% names(all_data)) { problems <- all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) add_excel_sheet(wb, "problems", problems) } saveWorkbook(wb, filepath, overwrite = TRUE) - + return(invisible(iso_files)) } @@ -124,11 +124,11 @@ iso_export_to_excel <- function( # @param dbl_digits how many digits to export for dbls # @param col_max_width maximum column width add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = 75) { - + # sheet addWorksheet(wb, sheet_name) hs <- createStyle(textDecoration = "bold") # header style - + # data sheet_data_sets <- list(...) start_row <- 1L @@ -160,14 +160,14 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = start_row <- start_row + nrow(sheet_data) + 2L } } - + # calculate header widths - header_widths <- - sheet_data_sets %>% + header_widths <- + sheet_data_sets %>% # account for bold width purrr::map(~nchar(names(.x))) max_n_cols <- purrr::map_int(header_widths, length) %>% max() - + # calculate data widths if (max_n_cols > 0) { calculate_data_width <- function(x) { @@ -177,9 +177,9 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = return(max(c(0, nchar(x)), na.rm = TRUE)) } data_widths <- - sheet_data_sets %>% + sheet_data_sets %>% purrr::map( - ~dplyr::summarise_all(.x, list(calculate_data_width)) %>% + ~dplyr::summarise_all(.x, list(calculate_data_width)) %>% unlist(use.names = FALSE) ) max_widths <- purrr::map2(header_widths, data_widths , ~{ @@ -190,13 +190,13 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = col_widths <- do.call(pmax, args = max_widths) openxlsx::setColWidths(wb, sheet_name, cols = 1:length(col_widths), widths = col_widths) } - + } #' Export to feather -#' +#' #' This function exports the passed in iso_files to the Python and R shared feather file format. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate feather files that are saved with the provided \code{filepath_prefix} as prefix. All are only exported if the corresponding \code{include_} parameter is set to \code{TRUE} and only for data types for which this type of data is available and was read (see \code{\link{iso_read_dual_inlet}}, \code{\link{iso_read_continuous_flow}} for details on read parameters). Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in feather output. -#' +#' #' @inheritParams iso_save #' @inheritParams iso_export_to_excel #' @param filepath_prefix what to use as the prefix for the feather file names (e.g. name of the data collection or current date) @@ -204,27 +204,27 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = #' @return returns the iso_files object invisibly for use in pipelines #' @export iso_export_to_feather <- function( - iso_files, filepath_prefix, - include_file_info = everything(), include_raw_data = everything(), - include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info), - include_vendor_data_table = everything(), include_problems = everything(), + iso_files, filepath_prefix, + include_file_info = everything(), include_raw_data = everything(), + include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info), + include_vendor_data_table = everything(), include_problems = everything(), with_explicit_units = FALSE, include_method_info = everything(), quiet = default(quiet)) { - + # safety checks if(!iso_is_object(iso_files)) stop("can only export iso files or lists of iso files", call. = FALSE) export_iso_files <- iso_as_file_list(iso_files) filepaths <- get_feather_export_filepaths(export_iso_files, filepath_prefix) - + # include method info message if (!missing(include_method_info)) { warning("the 'include_method_info' parameter was deprecated in favor of the more specific 'include_resistors' and 'include_standards' parameters. Please use those directly instead in the future.", immediate. = TRUE, call. = FALSE) } - + # info if (!quiet) { - sprintf("Info: exporting data from %d iso_files into %s files at '%s'", length(iso_as_file_list(iso_files)), + sprintf("Info: exporting data from %d iso_files into %s files at '%s'", length(iso_as_file_list(iso_files)), filepaths[['ext']], str_replace(filepaths[['base']], "^\\.(/|\\\\)", "")) %>% message() } @@ -240,48 +240,48 @@ iso_export_to_feather <- function( with_explicit_units = with_explicit_units, quiet = FALSE ) - + # create feather files in temporary dir # file info if ("file_info" %in% names(all_data)) { # note: collapse_list_columns takes care of nested vectors, they get concatenated with ', ' - all_data %>% select(.data$file_id, .data$file_info) %>% - unnest(.data$file_info) %>% - collapse_list_columns() %>% + all_data %>% select(.data$file_id, .data$file_info) %>% + unnest(.data$file_info) %>% + collapse_list_columns() %>% write_feather(filepaths[['file_info']]) } - + # raw data if ("raw_data" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) %>% + all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) %>% write_feather(filepaths[['raw_data']]) } - + # standards if ("standards" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$standards) %>% unnest(.data$standards) %>% + all_data %>% select(.data$file_id, .data$standards) %>% unnest(.data$standards) %>% write_feather(filepaths[['method_info_standards']]) - } - + } + # resistors if ("resistors" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) %>% + all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) %>% write_feather(filepaths[['method_info_resistors']]) - } - + } + # vendor data table if ("vendor_data_table" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$vendor_data_table) %>% - unnest(.data$vendor_data_table) %>% iso_strip_units() %>% + all_data %>% select(.data$file_id, .data$vendor_data_table) %>% + unnest(.data$vendor_data_table) %>% iso_strip_units() %>% write_feather(filepaths[['vendor_data_table']]) } - + # problems if ("problems" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) %>% + all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) %>% write_feather(filepaths[['problems']]) } - + return(invisible(iso_files)) } @@ -297,7 +297,7 @@ get_export_filepath <- function(filepath, ext) { if (!file.exists(folder)) stop("the folder '", folder, "' does not exist", call. = FALSE) if (!is.null(ext)) filename <- filename %>% str_replace(fixed(ext), "") %>% str_c(ext) # to make sure correct extension - return(file.path(folder, filename)) + return(file.path(folder, filename)) } # excel export filephat @@ -309,7 +309,7 @@ get_excel_export_filepath <- function(iso_files, filepath) { else if (iso_is_scan(iso_files)) ext <- ".scan.xlsx" else - stop("Excel export of this type of iso_files not yet supported", call. = FALSE) + stop("Excel export of this type of iso_files not yet supported", call. = FALSE) return(get_export_filepath(filepath, ext)) } @@ -322,8 +322,8 @@ get_feather_export_filepaths <- function(iso_files, filepath) { else if (iso_is_scan(iso_files)) ext <- ".scan.feather" else - stop("Feather export of this type of iso_files not yet supported", call. = FALSE) - + stop("Feather export of this type of iso_files not yet supported", call. = FALSE) + filepath <- get_export_filepath(filepath, NULL) return( c( diff --git a/R/file_info_operations.R b/R/file_info_operations.R index 0821dcb2..2054fa66 100644 --- a/R/file_info_operations.R +++ b/R/file_info_operations.R @@ -3,21 +3,21 @@ # select ================== #' Select file info columns -#' -#' Select which file info columns (\code{\link{iso_get_file_info}}) to keep within isofile objects. Works just like dplyr's \link[dplyr]{select} and can rename columns on-the-fly. You can also use \link[dplyr]{select} directly but it will not provide summary information on the operation. To rename columns without removing all other information, use \link{iso_rename_file_info} instead. Set \code{file_specific = TRUE} to select different columns in different iso_files depending on what exists in each file. This is very useful when working with data from multiple instruments that may have the same information (e.g. sample name) stored in different columns. -#' +#' +#' Select which file info columns (\code{\link{iso_get_file_info}}) to keep within isofile objects. Works just like dplyr's \link[dplyr]{select} and can rename columns on-the-fly. You can also use \link[dplyr]{select} directly but it will not provide summary information on the operation. To rename columns without removing all other information, use \link{iso_rename_file_info} instead. Set \code{file_specific = TRUE} to select different columns in different iso_files depending on what exists in each file. This is very useful when working with data from multiple instruments that may have the same information (e.g. sample name) stored in different columns. +#' #' @inheritParams iso_get_raw_data -#' @param ... dplyr-style \link[dplyr]{select} conditions applied based on each file's file_info (see \code{\link{iso_get_file_info}}). Note that the \code{file_id} column will always be kept, no matter the selection criteria, and cannot be renamed to protect from unexpected behaviour. +#' @param ... dplyr-style \link[dplyr]{select} conditions applied based on each file's file_info (see \code{\link{iso_get_file_info}}). Note that the \code{file_id} column will always be kept, no matter the selection criteria, and cannot be renamed to protect from unexpected behavior. #' @param file_specific whether to run the select criteria (\code{...}) specifically within each individual file rather than on all files jointly. This is a lot slower but makes it possible to select different columns in different iso_files depending on what exists in each file and is mostly of use when working with data from multiple instruments. #' @family file_info operations -#' @export +#' @export iso_select_file_info <- function(iso_files, ..., file_specific = FALSE, quiet = default(quiet)) { UseMethod("iso_select_file_info") } #' @export iso_select_file_info.default <- function(iso_files, ..., file_specific = FALSE, quiet = default(quiet)) { - stop("this function is not defined for objects of type '", + stop("this function is not defined for objects of type '", class(iso_files)[1], "'", call. = FALSE) } @@ -28,52 +28,52 @@ iso_select_file_info.iso_file <- function(iso_files, ..., file_specific = FALSE, #' @export iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = FALSE, quiet = default(quiet)) { - + # info message select_exps <- rlang::enexprs(...) select_exp <- rlang::expr(c(!!!select_exps)) - if (!quiet) { + if (!quiet) { if (file_specific) { message( "Info: selecting/renaming the following file info:") } else{ glue::glue( "Info: selecting/renaming the following file info across {length(iso_files)} data file(s): ", - if (length(select_exps) == 0) "keeping only 'file_id'" + if (length(select_exps) == 0) "keeping only 'file_id'" else get_info_message_concat(select_exps, include_names = TRUE, names_sep = "->", flip_names_and_values = TRUE) ) %>% message() } } - + # perform selections if (file_specific) { - + # run select isofiles_select <- map(iso_files, function(isofile) { # get column names - select_cols <- + select_cols <- tryCatch( get_column_names( isofile$file_info, df_name = "file_info", - select = select_exp, n_reqs = list(select = "*"), + select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select, warning = function(w) { w$message }) - + # check if there was an error error_msg <- NA_character_ if (is.null(names(select_cols))) { error_msg <- select_cols select_cols <- get_column_names( - isofile$file_info, df_name = "file_info", - select = select_exp, n_reqs = list(select = "*"), + isofile$file_info, df_name = "file_info", + select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE, warn = FALSE)$select } - + # make sure to include file_id if (!"file_id" %in% select_cols) - select_cols <- c(c(file_id = "file_id"), select_cols) - + select_cols <- c(c(file_id = "file_id"), select_cols) + # selected variables vars <- tibble( file_id = isofile$file_info$file_id, @@ -81,29 +81,29 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F to = names(select_cols), changed = .data$from != .data$to ) - + # select file_info columns isofile$file_info <- dplyr::select(isofile$file_info, !!!select_cols) - + # check for file id if (!"file_id" %in% names(isofile$file_info)) { - stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behaviour and is therefore not allowed, sorry", call. = FALSE) + stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behavior and is therefore not allowed, sorry", call. = FALSE) } - + #return both return(list(isofile = isofile, vars = vars, error = error_msg)) }) - + # get iso files updated_iso_files <- iso_as_file_list(map(isofiles_select, "isofile")) - + # summarize individual file updates if (!quiet) { - info <- map(isofiles_select, "vars") %>% + info <- map(isofiles_select, "vars") %>% bind_rows() %>% group_by(.data$file_id) %>% summarize( - label = + label = ifelse( .data$changed, sprintf("'%s'->'%s'", .data$from, .data$to), @@ -115,71 +115,71 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F arrange(desc(.data$n)) message(paste(info$label, collapse = "\n")) } - + # check if same error for all files errors <- map_chr(isofiles_select, "error") if (!any(is.na(errors)) && all(errors == errors[1])) { warning(errors[[1]], immediate. = TRUE, call. = FALSE) } - + } else { # across all files - fast but less flexible # retrieve info - file_info <- iso_files %>% + file_info <- iso_files %>% # retrieve file info - map(~.x$file_info) %>% - # combine in data frame (use safe bind to make sure different data column + map(~.x$file_info) %>% + # combine in data frame (use safe bind to make sure different data column # types of the same name don't trip up the combination) - safe_bind_rows() - + safe_bind_rows() + # check if there are any file_info if (nrow(file_info) > 0L) { - + # selecting columns select_cols <- get_column_names( - file_info, select = select_exp, - n_reqs = list(select = "*"), + file_info, select = select_exp, + n_reqs = list(select = "*"), cols_must_exist = FALSE)$select - - if (!"file_id" %in% select_cols) + + if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file id always included - + # final processing - file_info <- - file_info %>% + file_info <- + file_info %>% # focus on selected columns only (also takes care of the rename) - dplyr::select(!!!select_cols) - + dplyr::select(!!!select_cols) + # check for file id if (!"file_id" %in% names(file_info)) { - stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behaviour and is therefore not allowed, sorry", call. = FALSE) + stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behavior and is therefore not allowed, sorry", call. = FALSE) } - + # convert back to list format file_info <- file_info %>% # should still be list columns but doesn't hurt to check ensure_data_frame_list_columns() %>% # split by file info - split(seq(nrow(file_info))) %>% + split(seq(nrow(file_info))) %>% # clean back out the columns that were only added through the row bind map(~.x[!map_lgl(.x, ~is.list(.x) && all(map_lgl(.x, is.null)))]) - + # update updated_iso_files <- map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% iso_as_file_list() - + } else { # no updates updated_iso_files <- iso_files } } - + # return updated iso files return(updated_iso_files) } - + #' @export select.iso_file <- function(.data, ...) { @@ -196,20 +196,20 @@ select.iso_file_list <- function(.data, ...) { # rename ================== #' Rename file info columns -#' -#' Rename file info columns (\code{\link{iso_get_file_info}}) within isofile objects. Works just like dplyr's \link[dplyr]{rename}. You can also use \link[dplyr]{rename} directly but it will not provide summary information on the operation. To select specific columns to keep (discarding all others), use \link{iso_select_file_info} instead. Set \code{file_specific = TRUE} to rename different columns in different iso_files depending on what exists in each file. This is very useful when working with data from multiple instruments that may have the same information (e.g. sample name) stored in different columns. -#' +#' +#' Rename file info columns (\code{\link{iso_get_file_info}}) within isofile objects. Works just like dplyr's \link[dplyr]{rename}. You can also use \link[dplyr]{rename} directly but it will not provide summary information on the operation. To select specific columns to keep (discarding all others), use \link{iso_select_file_info} instead. Set \code{file_specific = TRUE} to rename different columns in different iso_files depending on what exists in each file. This is very useful when working with data from multiple instruments that may have the same information (e.g. sample name) stored in different columns. +#' #' @inheritParams iso_select_file_info #' @param ... dplyr-style \link[dplyr]{rename} conditions applied based on each file's file_info (see \code{\link{iso_get_file_info}}) #' @family file_info operations -#' @export +#' @export iso_rename_file_info <- function(iso_files, ..., file_specific = FALSE, quiet = default(quiet)) { UseMethod("iso_rename_file_info") } #' @export iso_rename_file_info.default <- function(iso_files, ..., file_specific = FALSE, quiet = default(quiet)) { - stop("this function is not defined for objects of type '", + stop("this function is not defined for objects of type '", class(iso_files)[1], "'", call. = FALSE) } @@ -220,11 +220,11 @@ iso_rename_file_info.iso_file <- function(iso_files, ..., file_specific = FALSE, #' @export iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = FALSE, quiet = default(quiet)) { - + # info message rename_exps <- rlang::enexprs(...) rename_exp <- rlang::expr(c(!!!rename_exps)) - if (!quiet) { + if (!quiet) { if (file_specific) { message( "Info: renaming the following file info:") } else{ @@ -234,33 +234,33 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F ) %>% message() } } - + # perform renames if (file_specific) { - + # run rename isofiles_rename <- map(iso_files, function(isofile) { # get column names - rename_cols <- + rename_cols <- tryCatch( get_column_names( isofile$file_info, df_name = "file_info", - rename = rename_exp, n_reqs = list(rename = "*"), + rename = rename_exp, n_reqs = list(rename = "*"), cols_must_exist = FALSE)$rename, warning = function(w) { w$message }) - + # check if there was an error error_msg <- NA_character_ if (is.null(names(rename_cols))) { error_msg <- rename_cols rename_cols <- get_column_names( isofile$file_info, df_name = "file_info", - rename = rename_exp, n_reqs = list(rename = "*"), + rename = rename_exp, n_reqs = list(rename = "*"), cols_must_exist = FALSE, warn = FALSE)$rename } - + # rename variables vars <- tibble( file_id = isofile$file_info$file_id, @@ -268,30 +268,30 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F to = names(rename_cols), changed = .data$from != .data$to ) - + # rename file_info columns if (length(rename_cols) > 0) isofile$file_info <- dplyr::rename(isofile$file_info, !!!rename_cols) - + # check for file id if (!"file_id" %in% names(isofile$file_info)) { - stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behaviour and is therefore not allowed, sorry", call. = FALSE) + stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behavior and is therefore not allowed, sorry", call. = FALSE) } - + #return both return(list(isofile = isofile, vars = vars, error = error_msg)) }) - + # get iso files updated_iso_files <- iso_as_file_list(map(isofiles_rename, "isofile")) - + # summarize individual file updates if (!quiet) { - info <- map(isofiles_rename, "vars") %>% + info <- map(isofiles_rename, "vars") %>% bind_rows() %>% group_by(.data$file_id) %>% summarize( - label = + label = ifelse( .data$changed, sprintf("'%s'->'%s'", .data$from, .data$to), @@ -303,53 +303,53 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F arrange(desc(.data$n)) message(paste(info$label, collapse = "\n")) } - + # check if same error for all files errors <- map_chr(isofiles_rename, "error") if (!any(is.na(errors)) && all(errors == errors[1])) { warning(errors[[1]], immediate. = TRUE, call. = FALSE) } - + } else { # across all files - fast but less flexible # retrieve info - file_info <- iso_files %>% + file_info <- iso_files %>% # retrieve file info - map(~.x$file_info) %>% - # combine in data frame (use safe bind to make sure different data column + map(~.x$file_info) %>% + # combine in data frame (use safe bind to make sure different data column # types of the same name don't trip up the combination) - safe_bind_rows() - + safe_bind_rows() + # renaming columns rename_cols <- get_column_names( file_info, df_name = "file_info", - rename = rename_exp, n_reqs = list(rename = "*"), + rename = rename_exp, n_reqs = list(rename = "*"), cols_must_exist = FALSE)$rename - + # then run the rename file_info <- dplyr::rename(file_info, !!!rename_cols) - + # check for file id if (!"file_id" %in% names(file_info)) { - stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behaviour and is therefore not allowed, sorry", call. = FALSE) + stop("renaming the 'file_id' column inside an isofile may lead to unpredictable behavior and is therefore not allowed, sorry", call. = FALSE) } - + # convert back to list format file_info <- file_info %>% # should still be list columns but doesn't hurt to check ensure_data_frame_list_columns() %>% # split by file info - split(seq(nrow(file_info))) %>% + split(seq(nrow(file_info))) %>% # clean back out the columns that were only added through the row bind map(~.x[!map_lgl(.x, ~is.list(.x) && all(map_lgl(.x, is.null)))]) - + # update updated_iso_files <- map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% iso_as_file_list() } - + # return updated iso files return(updated_iso_files) } @@ -368,20 +368,20 @@ rename.iso_file_list <- function(.data, ...) { #' Filter iso_files -#' +#' #' Filter for specific isofiles using file info columns (\code{\link{iso_get_file_info}}). Works just like dplyr's \link[dplyr]{filter} except that it provides the user with some information on what has been filtered. Returns \code{NULL} if none of the isofiles' file info matches the filter criteria. You can also use \link[dplyr]{filter} directly to filter collections of \code{iso_file} objects. -#' +#' #' @inheritParams iso_get_raw_data #' @param ... dplyr-style \link[dplyr]{filter} conditions applied based on each file's file_info (see \code{\link{iso_get_file_info}}) #' @family file_info operations -#' @export +#' @export iso_filter_files <- function(iso_files, ..., quiet = default(quiet)) { UseMethod("iso_filter_files") -} - +} + #' @export iso_filter_files.default <- function(iso_files, ..., quiet = default(quiet)) { - stop("this function is not defined for objects of type '", + stop("this function is not defined for objects of type '", class(iso_files)[1], "'", call. = FALSE) } @@ -397,13 +397,13 @@ iso_filter_files.iso_file_list <- function(iso_files, ..., quiet = default(quiet filtered_iso_files <- if (nrow(file_info) == 0) NULL else iso_files[names(iso_files) %in% file_info$file_id] - + # information if (!quiet) { - str_interp("Info: applying file filter, keeping $[d]{n} of $[d]{n_all} files", + str_interp("Info: applying file filter, keeping $[d]{n} of $[d]{n_all} files", list(n = length(filtered_iso_files), n_all = length(iso_files))) %>% message() } - + return(filtered_iso_files) } @@ -420,20 +420,20 @@ filter.iso_file_list <- function(.data, ..., .preserve = FALSE) { # mutate ================== #' Mutate file info -#' +#' #' Mutate the file info (\code{\link{iso_get_file_info}}) within isofile objects by changing existing columns or introducing new ones. Works just like dplyr's \link[dplyr]{mutate}. You can also use \link[dplyr]{mutate} directly but it will not provide summary information on the operation. Note that this will create missing columns that exist in some but not all of the passed in isofile objects in all isofile objects (filling them with NAs) the same way that \code{\link{iso_get_file_info}} does. -#' +#' #' @inheritParams iso_get_raw_data #' @param ... dplyr-style \link[dplyr]{mutate} conditions applied to the combined file info (see \code{\link{iso_get_file_info}}) #' @family file_info operations -#' @export +#' @export iso_mutate_file_info <- function(iso_files, ..., quiet = default(quiet)) { UseMethod("iso_mutate_file_info") -} +} #' @export iso_mutate_file_info.default <- function(iso_files, ..., quiet = default(quiet)) { - stop("this function is not defined for objects of type '", + stop("this function is not defined for objects of type '", class(iso_files)[1], "'", call. = FALSE) } @@ -444,29 +444,29 @@ iso_mutate_file_info.iso_file <- function(iso_files, ..., quiet = default(quiet) #' @export iso_mutate_file_info.iso_file_list <- function(iso_files, ..., quiet = default(quiet)) { - + # information if (!quiet) { - glue::glue("Info: mutating file info for {length(iso_files)} data file(s)") %>% + glue::glue("Info: mutating file info for {length(iso_files)} data file(s)") %>% message() } - + # mutate iso_files' file info - file_info <- - iso_get_file_info(iso_files, quiet = TRUE) %>% - dplyr::mutate(...) - + file_info <- + iso_get_file_info(iso_files, quiet = TRUE) %>% + dplyr::mutate(...) + # convert back to list format file_info <- - file_info %>% - ensure_data_frame_list_columns() %>% - split(seq(nrow(file_info))) - + file_info %>% + ensure_data_frame_list_columns() %>% + split(seq(nrow(file_info))) + # mutate mutated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% iso_as_file_list() - + # return return(mutated_iso_files) } @@ -484,16 +484,16 @@ mutate.iso_file_list <- function(.data, ...) { # file root ===== #' Set iso file directory root -#' +#' #' Sets the root directory for a set of iso_files (property \code{file_root} in the file information), which is particularly useful for re-reading files (\link{reread_iso_files}) after they have changed location. Can optionally remove the previous root (\code{remove_embedded_root}) if it is still embedded in the isofiles' \code{file_path} instead of \code{file_root}. Will warn about any paths that cannot be simplified by removing the embedded root. -#' +#' #' @inheritParams iso_get_raw_data #' @param root new root directory for the isofiles. Can be relative to the current working directory (e.g. \code{"data"}) or an absolute path on the file system (e.g. \code{"/Users/..."} or \code{"C:/Data/.."}). Can be supplied as a vector of same length as the \code{iso_files} if the files have different roots. Use \code{root = "."} to set the root to the current working directory (the default). #' @param remove_embedded_root set this parameter to a root path that is embedded in the isofiles' \code{file_path}. Will warn about any paths that cannot be simplified by removing the specified \code{remove_embedded_root}. #' @family file_info operations #' @export iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL, quiet = default(quiet)) { - + # safety check if (is.null(root) || is.na(root) || !length(root) %in% c(1L, length(iso_files))) { stop("must supply a value for the file root, either single value or a vector with the same length as iso_files", call. = FALSE) @@ -501,38 +501,38 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL if (!is.null(remove_embedded_root) && length(remove_embedded_root) != 1) { stop("only a single value can be provided to remove an embedded root. If you want to remove different embedded roots, split your iso_files using iso_filter_files() and then remove the embedded root in the subsets.", call. = FALSE) } - + # single vs. multiple iso files single_file <- iso_is_file(iso_files) # to make sure return is the same as supplied iso_files <- iso_as_file_list(iso_files) - + # information if (!quiet) { glue::glue( "Info: setting file root for {length(iso_files)} data file(s)", if(length(root) == 1) {" to '{root}'"} else {""}, - if(!is.null(remove_embedded_root)) {" and removing embedded root '{remove_embedded_root}'"} else {""}) %>% + if(!is.null(remove_embedded_root)) {" and removing embedded root '{remove_embedded_root}'"} else {""}) %>% message() } - + # remove embedded root if (!is.null(remove_embedded_root)) { embedded_root_simplified <- iso_shorten_relative_paths(remove_embedded_root)$path original_paths <- map_chr(iso_files, ~.x$file_info$file_path) - paths <- - original_paths %>% - iso_root_paths(root = embedded_root_simplified, check_existence = FALSE) %>% + paths <- + original_paths %>% + iso_root_paths(root = embedded_root_simplified, check_existence = FALSE) %>% mutate(original_path = !!original_paths) - + no_match_paths <- filter(paths, root != !!embedded_root_simplified) if (nrow(no_match_paths) > 0) { sprintf( - "%d/%d file paths do not include the embedded root. The following paths could NOT be simplified:\n - %s", - nrow(no_match_paths), nrow(paths), + "%d/%d file paths do not include the embedded root. The following paths could NOT be simplified:\n - %s", + nrow(no_match_paths), nrow(paths), paste(no_match_paths$original_path, collapse = "\n - ") ) %>% warning(immediate. = TRUE, call. = FALSE) } - + # file info updates paths <- paths %>% mutate( path = ifelse(.data$root == !!embedded_root_simplified, .data$path, .data$original_path), @@ -540,7 +540,7 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL ) file_info_update <- with(paths, map2(root, path, ~list(file_info = list(file_root = .x, file_path = .y)))) names(file_info_update) <- names(iso_files) - + } else { # just the root update file_info_update <- map(names(iso_files), ~list(file_info = list(file_root = root))) @@ -548,11 +548,11 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL } # update - iso_files <- as.list(iso_files) %>% - modifyList(file_info_update) %>% + iso_files <- as.list(iso_files) %>% + modifyList(file_info_update) %>% iso_as_file_list() - - # return single (if passed in as single) + + # return single (if passed in as single) if (single_file && length(iso_files) == 1) return (iso_files[[1]]) return(iso_files) } @@ -561,9 +561,9 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL # parse ====== #' Parse file info -#' +#' #' Convenience function to batch parse file info (\code{\link{iso_get_file_info}}) columns in isofile objects for the most common parsing calls. Uses the \code{parse_} functions exported from \link{readr} and described in \link{extract_data}. Note that for less common parsing calls or calls that require additional parameters to the parsing function, it is better to parse columns one-by-one using \code{\link{iso_mutate_file_info}} instead. -#' +#' #' @inheritParams iso_get_raw_data #' @param number dplyr-style \link[dplyr]{select} condition to choose columns that should be converted to a number using \link[readr:parse_atomic]{parse_number}. Use \code{c(...)} to select multiple columns. #' @param double dplyr-style \link[dplyr]{select} condition to choose columns that should be converted to a double using \link[readr:parse_atomic]{parse_double}. Use \code{c(...)} to select multiple columns. @@ -572,14 +572,14 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL #' @param datetime dplyr-style \link[dplyr]{select} condition to choose columns that should be converted to a date-time using \link[readr:parse_atomic]{parse_datetime}. Use \code{c(...)} to select multiple columns. #' @param text dplyr-style \link[dplyr]{select} condition to choose columns that should be converted to text using \link[base]{as.character}. Use \code{c(...)} to select multiple columns. #' @family file_info operations -#' @export +#' @export iso_parse_file_info <- function(iso_files, number = c(), double = c(), integer = c(), logical = c(), datetime = c(), text = c(), quiet = default(quiet)) { UseMethod("iso_parse_file_info") -} +} #' @export iso_parse_file_info.default <- function(iso_files, ...) { - stop("this function is not defined for objects of type '", + stop("this function is not defined for objects of type '", class(iso_files)[1], "'", call. = FALSE) } @@ -590,12 +590,12 @@ iso_parse_file_info.iso_file <- function(iso_files, ...) { #' @export iso_parse_file_info.iso_file_list <- function(iso_files, number = c(), double = c(), integer = c(), logical = c(), datetime = c(), text = c(), quiet = default(quiet)) { - + # get file info - file_info <- iso_get_file_info(iso_files, quiet = TRUE) - + file_info <- iso_get_file_info(iso_files, quiet = TRUE) + # conversion classes - classes <- + classes <- tribble( ~parse, ~new_class, ~func, "number", "numeric", "parse_number", @@ -605,56 +605,56 @@ iso_parse_file_info.iso_file_list <- function(iso_files, number = c(), double = "datetime", "POSIXct", "parse_datetime", "text", "character", "as.character" ) - + # determine variables vars <- list( - number = + number = names(file_info)[tidyselect::eval_select(rlang::enexpr(number), file_info)], - double = + double = names(file_info)[tidyselect::eval_select(rlang::enexpr(double), file_info)], - integer = + integer = names(file_info)[tidyselect::eval_select(rlang::enexpr(integer), file_info)], - logical = + logical = names(file_info)[tidyselect::eval_select(rlang::enexpr(logical), file_info)], - datetime = + datetime = names(file_info)[tidyselect::eval_select(rlang::enexpr(datetime), file_info)], - text = + text = names(file_info)[tidyselect::eval_select(rlang::enexpr(text), file_info)] - ) %>% - tibble::enframe(name = "parse", value = "column") %>% - tidyr::unnest(.data$column) %>% + ) %>% + tibble::enframe(name = "parse", value = "column") %>% + tidyr::unnest(.data$column) %>% # find out number of casts per column - group_by(.data$column) %>% mutate(n = n()) %>% ungroup() %>% + group_by(.data$column) %>% mutate(n = n()) %>% ungroup() %>% # get column info - left_join(classes, by = "parse") %>% + left_join(classes, by = "parse") %>% mutate( old_class = map_chr(.data$column, ~class(file_info[[.x]])[1]), already_cast = .data$new_class == .data$old_class, problem = !.data$already_cast & .data$new_class != "character" & .data$old_class != "character" ) - + # check on multi-casts if (any(vars$n > 1)) { - probs <- - vars %>% filter(.data$n > 1) %>% group_by(.data$column) %>% - summarize(convert_to = paste(unique(.data$parse), collapse = ", ")) %>% + probs <- + vars %>% filter(.data$n > 1) %>% group_by(.data$column) %>% + summarize(convert_to = paste(unique(.data$parse), collapse = ", ")) %>% mutate(label = sprintf(" - '%s' to %s", .data$column, .data$convert_to)) glue::glue("cannot convert the same column(s) to multiple formats:\n", - "{paste(probs$label, collapse = '\n')}") %>% + "{paste(probs$label, collapse = '\n')}") %>% stop(call. = FALSE) } - + # information if (!quiet) { - info <- - vars %>% filter(!.data$problem, !.data$already_cast) %>% - group_by(.data$parse) %>% - summarize(convert = paste(unique(.data$column), collapse = "', '")) %>% + info <- + vars %>% filter(!.data$problem, !.data$already_cast) %>% + group_by(.data$parse) %>% + summarize(convert = paste(unique(.data$column), collapse = "', '")) %>% mutate(label = sprintf(" - to %s: '%s'", .data$parse, .data$convert)) - already <- filter(vars, .data$already_cast)$column %>% - { if(length(.) > 0) - sprintf("\n - already the target data type (and thus ignored): '%s'", + already <- filter(vars, .data$already_cast)$column %>% + { if(length(.) > 0) + sprintf("\n - already the target data type (and thus ignored): '%s'", paste(., collapse = "', '")) else "" } @@ -662,26 +662,26 @@ iso_parse_file_info.iso_file_list <- function(iso_files, number = c(), double = "Info: parsing {nrow(filter(vars, !.data$problem, !.data$already_cast))} ", "file info columns for {length(iso_files)} data file(s)", if (nrow(info) > 0) ":\n{paste(info$label, collapse = '\n')}" else "", - "{already}") %>% + "{already}") %>% message() } - + # check on conversion problems if (any(vars$problem)) { - probs <- - vars %>% filter(.data$problem) %>% - mutate(label = - sprintf(" - cannot convert '%s' from %s to %s", + probs <- + vars %>% filter(.data$problem) %>% + mutate(label = + sprintf(" - cannot convert '%s' from %s to %s", .data$column, .data$old_class, .data$parse)) glue::glue( "missing automatic parsers for the following type conversions ", - "(columns are ignored):\n{paste(probs$label, collapse = '\n')}") %>% + "(columns are ignored):\n{paste(probs$label, collapse = '\n')}") %>% warning(immediate. = TRUE, call. = FALSE) } - + # cast - mutate_quos <- - vars %>% filter(!.data$problem, !.data$already_cast) %>% + mutate_quos <- + vars %>% filter(!.data$problem, !.data$already_cast) %>% # note for RMD check, since this is a with statement, does not take .data! { rlang::set_names( @@ -689,19 +689,19 @@ iso_parse_file_info.iso_file_list <- function(iso_files, number = c(), double = .$column ) } - + # mutate file info - file_info <- - file_info %>% - mutate(!!!mutate_quos) %>% - ensure_data_frame_list_columns() %>% - split(seq(nrow(file_info))) - + file_info <- + file_info %>% + mutate(!!!mutate_quos) %>% + ensure_data_frame_list_columns() %>% + split(seq(nrow(file_info))) + # mutate mutated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% iso_as_file_list() - + # return return(mutated_iso_files) } @@ -711,38 +711,38 @@ iso_parse_file_info.iso_file_list <- function(iso_files, number = c(), double = #' Add additional file information #' #' This function makes it easy to add additional file info (\code{\link{iso_get_file_info}}) to isofile objects and data frames by a single \code{\link[dplyr:mutate-joins]{left_join}} or multiple sequential \code{\link[dplyr:mutate-joins]{left_join}} operations. The function provides a detailed summary of the information that was added unless \code{quiet = TRUE}. Note that one-to-many joins are not permitted (and will fail with an informative error) since this would lead to likely unintended data duplication in the isofiles. However, one-to-one and many-to-one joins are fully supported and should cover all needed use cases for this function. Also note that for each join, only the \code{new_file_info} rows that have defined non-NA, non-empty ("") values in all \code{join_by} columns will be considered for the join and that only \code{new_file_info} columns that do NOT already exist in ANY file information will be added. For changing the values of existing file information, please use \code{\link{iso_mutate_file_info}} instead. -#' -#' Single \code{\link[dplyr:mutate-joins]{left_join}}: this is the most common use of this function and basically a simple left join operation (with some additional safety checks). Specify a single \code{join_by} in the \code{...}, such as e.g. \code{c("file_id")} to add additional file information joining by the \code{file_id} column. -#' +#' +#' Single \code{\link[dplyr:mutate-joins]{left_join}}: this is the most common use of this function and basically a simple left join operation (with some additional safety checks). Specify a single \code{join_by} in the \code{...}, such as e.g. \code{c("file_id")} to add additional file information joining by the \code{file_id} column. +#' #' Multiple sequential \code{\link[dplyr:mutate-joins]{left_join}}: this use case is for applying a set of increasingly more specific \code{join_by} rules. For example, \code{... = c("Identifier 1", "Identifier 2"), c("file_id")} would serve to first add one set of new file information for all isofiles based on their \code{Identifier 1} and \code{Identifier 2} columns and then overwrite the new information with more specific details for a subset of isofiles based on their \code{file_id} column, all based on a single overview \code{new_file_info} data frame. Basically, each set of \code{join_by} conditions specified in \code{...} must describe a valid \code{\link[dplyr:mutate-joins]{left_join}} \code{join_by} parameter to merge the \code{new_file_info} with the existing file info. Each set of \code{new_file_info} data can overwrite the previous \code{join_by} matches such that the last set of \code{join_by} column(s) provided in \code{...} will overwrite all previous matches for which it applies, even if they have already been a match for a previous column. #' @rdname iso_add_file_info #' @inheritParams iso_get_raw_data #' @param new_file_info data frame with new file information to add to the isofiles -#' @param ... each parameter specifies a set of \code{join_by} column(s) to add the \code{new_file_info} to the existing file information. The provided paramters are applied sequentially. At least one must be specified. +#' @param ... each parameter specifies a set of \code{join_by} column(s) to add the \code{new_file_info} to the existing file information. The provided parameters are applied sequentially. At least one must be specified. #' @return the original iso files or data frame with the new file info added in. #' @family file_info operations #' @export iso_add_file_info.iso_file_list <- function(iso_files, new_file_info, ..., quiet = default(quiet)) { # add to iso_files' file_info - file_info <- - iso_get_file_info(iso_files, quiet = TRUE) %>% + file_info <- + iso_get_file_info(iso_files, quiet = TRUE) %>% iso_add_file_info(new_file_info = new_file_info, ..., quiet = quiet) - + # safety check if (!identical(names(iso_files), file_info$file_id)) { stop("file IDs of added file information does not match original file IDs, this should not be possible to happen and suggests there is a bug in the iso_add_file_info function, please report how this happened at https://github.com/isoverse/isoreader/issues", call. = FALSE) } - + # convert back to list format file_info <- - file_info %>% - ensure_data_frame_list_columns() %>% - split(seq(nrow(file_info))) - + file_info %>% + ensure_data_frame_list_columns() %>% + split(seq(nrow(file_info))) + # mutate updated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% iso_as_file_list() return(updated_iso_files) @@ -758,10 +758,10 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default if (missing(new_file_info)) stop("no new_file_info supplied", call. = FALSE) if (length(join_bys) == 0) stop("must specify at least one set of join_by column(s) in ...", call. = FALSE) if (!"file_id" %in% names(df)) stop("file_id column must be part of the data frame", call. = FALSE) - + # new columns new_cols <- setdiff(names(new_file_info), names(df)) - + # information n_data_files <- length(unique(df$file_id)) if (!quiet) { @@ -771,10 +771,10 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default "joining by '{purrr::map_chr(join_bys, paste, collapse = \"'+'\") %>% paste(collapse = \"' then '\")}'...") %>% message() } - + # additional safety checks if (length(new_cols) == 0) { - glue::glue("no new information columns that don't already exist, returning data unchanged") %>% + glue::glue("no new information columns that don't already exist, returning data unchanged") %>% warning(immediate. = TRUE, call. = FALSE) return(df) } @@ -785,44 +785,44 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default "all join_by (...) columns must exist in both the existing file ", "information and the new_file_info", if(length(missing_df) > 0) "\n - missing in existing file info: '{paste(missing_df, collapse = \"', '\")}'" else "", - if (length(missing_new_fi) > 0) "\n - missing in new file info: '{paste(missing_new_fi, collapse = \"', '\")}'" else "") %>% + if (length(missing_new_fi) > 0) "\n - missing in new file info: '{paste(missing_new_fi, collapse = \"', '\")}'" else "") %>% stop(call. = FALSE) } - + # figure out which new file info columns that are in the join_bys have data in which rows - join_by_cols <- + join_by_cols <- tibble( join_by_col = join_bys, ..priority = 1:length(.data$join_by_col) ) - + new_data_rows <- - join_by_cols %>% - unnest(.data$join_by_col) %>% + join_by_cols %>% + unnest(.data$join_by_col) %>% mutate( new_data_idx = map(.data$join_by_col, ~which(!is.na(new_file_info[[.x]]) & nchar(as.character(new_file_info[[.x]])) > 0)) - ) %>% - group_by(.data$..priority) %>% - summarize(new_data_idx = list(Reduce(intersect, .data$new_data_idx))) %>% + ) %>% + group_by(.data$..priority) %>% + summarize(new_data_idx = list(Reduce(intersect, .data$new_data_idx))) %>% right_join(join_by_cols, by = "..priority") - + # prep for joins shared_cols <- intersect(names(new_file_info), names(df)) %>% { rlang::set_names(., paste0("..ni_temp_", .)) } df <- mutate(df, ..df_id = dplyr::row_number()) new_file_info <- mutate(new_file_info, ..ni_id = dplyr::row_number()) - + # join new file info based on the join by and new row indices join_new_file_info <- function(join_by, new_rows, shared_cols) { if (length(join_by) > 0 && length(new_rows) > 0) { dplyr::inner_join(df, rename(new_file_info[new_rows, ], !!!shared_cols), by = join_by) - } else { + } else { tibble() } } - + # generate joined data - joined_data <- - new_data_rows %>% + joined_data <- + new_data_rows %>% mutate( n_ni_considered = map_int(.data$new_data_idx, length), shared_cols = map(.data$join_by_col, ~shared_cols[!shared_cols %in% .x]), @@ -830,33 +830,33 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default n_ni_matches = map_int(.data$data, ~length(unique(.x$..ni_id))), n_df_matches = map_int(.data$data, ~length(unique(.x$file_id))) ) - + # select data based on priority final_data <- - joined_data %>% - select(.data$..priority, .data$data) %>% + joined_data %>% + select(.data$..priority, .data$data) %>% # avoid problems with the temp columns during unnest - mutate(data = map(.data$data, ~select(.x, -starts_with("..ni_temp_")))) %>% - unnest(.data$data) %>% - select(-starts_with("..ni_temp_")) %>% - group_by(.data$..df_id) %>% - filter(.data$..priority == max(.data$..priority)) %>% + mutate(data = map(.data$data, ~select(.x, -starts_with("..ni_temp_")))) %>% + unnest(.data$data) %>% + select(-starts_with("..ni_temp_")) %>% + group_by(.data$..df_id) %>% + filter(.data$..priority == max(.data$..priority)) %>% ungroup() - + # make sure all data is present (even those not matched by any join) - final_data <- final_data %>% - vctrs::vec_rbind(filter(df, !.data$..df_id %in% final_data$..df_id)) %>% + final_data <- final_data %>% + vctrs::vec_rbind(filter(df, !.data$..df_id %in% final_data$..df_id)) %>% arrange(.data$..df_id) - + # safety checks dup_data <- final_data %>% group_by(.data$..df_id) %>% mutate(n = n()) %>% filter(.data$n > 1L) if (nrow(dup_data) > 0) { - error_data <- dup_data %>% - left_join(joined_data, by = "..priority") %>% - group_by(.data$..priority) %>% + error_data <- dup_data %>% + left_join(joined_data, by = "..priority") %>% + group_by(.data$..priority) %>% summarize( label = sprintf( - "'%s' join: %d/%d new info rows match %d/%d data files but would lead to the duplication of %d data files.", + "'%s' join: %d/%d new info rows match %d/%d data files but would lead to the duplication of %d data files.", paste(.data$join_by_col[[1]], collapse = "'+'"), .data$n_ni_matches[1], .data$n_ni_considered[1], @@ -868,34 +868,34 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default glue::glue( "join operation(s) would create duplicate entries:\n - ", - "{paste(error_data$label, collapse = '\n - ')}") %>% + "{paste(error_data$label, collapse = '\n - ')}") %>% stop(call. = FALSE) } - + # info summary - info_sum <- - final_data %>% group_by(.data$..priority) %>% + info_sum <- + final_data %>% group_by(.data$..priority) %>% summarize( n_ni_actual = length(unique(.data$..ni_id)), n_df_actual = length(unique(.data$file_id)) ) %>% - right_join(joined_data, by = "..priority") %>% + right_join(joined_data, by = "..priority") %>% mutate( n_ni_actual = ifelse(is.na(.data$n_ni_actual), 0L, .data$n_ni_actual), n_df_actual = ifelse(is.na(.data$n_df_actual), 0L, .data$n_df_actual), label = sprintf( - "'%s' join: %d/%d new info rows matched %d/%d data files%s", + "'%s' join: %d/%d new info rows matched %d/%d data files%s", purrr::map_chr(.data$join_by_col, paste, collapse = "'+'"), .data$n_ni_matches, .data$n_ni_considered, .data$n_df_matches, n_data_files, - ifelse(.data$n_ni_actual != .data$n_ni_matches | .data$n_df_actual != .data$n_df_matches, - sprintf(" - %d of these was/were also matched by subsequent joins which took precedence", + ifelse(.data$n_ni_actual != .data$n_ni_matches | .data$n_df_actual != .data$n_df_matches, + sprintf(" - %d of these was/were also matched by subsequent joins which took precedence", .data$n_df_matches - .data$n_df_actual), "" ) - # NOTE: the column overwrite leads to more confusing behaviour than probably worth it + # NOTE: the column overwrite leads to more confusing behavior than probably worth it # --> new columns should be universal and will be part of overall message # ifelse(n_df_actual > 0, # sprintf(", columns added: '%s'", purrr::map_chr(new_cols, paste, collapse = "', '")), @@ -906,7 +906,7 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default if (!quiet && nrow(info_sum) > 0) { message(" - ", paste(info_sum$label, collapse = "\n - ")) } - + return(select(final_data, -.data$..df_id, -.data$..ni_id, -.data$..priority)) } @@ -920,7 +920,7 @@ iso_add_file_info <- function(...) { #' @export iso_add_file_info.default <- function(x, ...) { if (missing(x)) - stop("this function cannot be called without parameters", call. = FALSE) + stop("this function cannot be called without parameters", call. = FALSE) stop("this function is not defined for objects of type '", class(x)[1], "'", call. = FALSE) } @@ -928,4 +928,4 @@ iso_add_file_info.default <- function(x, ...) { #' @export iso_add_file_info.iso_file <- function(iso_files, ...) { iso_add_file_info(iso_as_file_list(iso_files), ...)[[1]] -} \ No newline at end of file +} diff --git a/R/isodata_structures.R b/R/isodata_structures.R index 949a7f41..e65abfd4 100644 --- a/R/isodata_structures.R +++ b/R/isodata_structures.R @@ -8,13 +8,13 @@ make_iso_file_data_structure <- function(file_id = NA_character_) { read_options = list( # records read options+defaults file_info = FALSE, # whether file info was read method_info = FALSE, # whether method info was read - raw_data = FALSE # whether mass data was read - ), + raw_data = FALSE # whether mass data was read + ), file_info = tibble::tibble( file_id = file_id, # unique identifer file_root = NA_character_, # root directory for file path file_path = NA_character_, # path to file (file extension is key for processing) - file_subpath = NA_character_, # sub path in case file is an archieve + file_subpath = NA_character_, # sub path in case file is an archive file_datetime = lubridate::as_datetime(NA), # the run date and time of the file file_size = NA_integer_ # the size of the file in bytes ), @@ -22,7 +22,7 @@ make_iso_file_data_structure <- function(file_id = NA_character_) { raw_data = tibble::tibble() # all mass data ), class = c("iso_file") - ) %>% + ) %>% initialize_problems_attribute() } @@ -61,7 +61,7 @@ make_scan_data_structure <- function(file_id = NA_character_) { # get last structure update get_last_structure_update_version <- function() { # last version which included any structure updates - # determines + # determines # - whether the file version warning will be shown during file read # - whether cached files are re-read (if reread_outdated_cache_files is active) # - backwards compatibility checks are run during collection reading @@ -70,14 +70,14 @@ get_last_structure_update_version <- function() { # get version for all objects get_iso_object_versions <- function(iso_obj) { - iso_obj %>% iso_as_file_list() %>% + iso_obj %>% iso_as_file_list() %>% purrr::map(~if (!is.null(.x$version)) { .x$version } else { as.package_version("0.0.0") }) } # get outdated boolean vector get_iso_object_outdated <- function(iso_obj) { - iso_obj %>% - get_iso_object_versions() %>% + iso_obj %>% + get_iso_object_versions() %>% purrr::map_lgl(~.x < get_last_structure_update_version()) } @@ -91,8 +91,8 @@ is_iso_object_outdated <- function(iso_obj) { #' Isoreader data structure functions -#' -#' @description \code{iso_is_file} tests if the object is an iso_file +#' +#' @description \code{iso_is_file} tests if the object is an iso_file #' #' @param x an object to test whether it has the specific class #' @rdname iso_data_structure @@ -138,28 +138,28 @@ iso_is_scan <- function(x) { # Iso file list ---- -#' @description \code{iso_as_file_list} concatenates iso_file and iso_file list object(s) into one combined iso_file list (equivalent to calling \code{c(...)}), flattens all passed lists into one list structure, all individual objects and objects within iso_file lists have to be the same type of iso_file, issues warnings if there are duplicate file ids and summarizes all problems in the iso_file list. If duplicates are allowed (\code{discard_duplicates = FALSE}), their file IDs will append a #1, #2, #3, etc. to preserve unique file IDs (important for many data aggregation operations). +#' @description \code{iso_as_file_list} concatenates iso_file and iso_file list object(s) into one combined iso_file list (equivalent to calling \code{c(...)}), flattens all passed lists into one list structure, all individual objects and objects within iso_file lists have to be the same type of iso_file, issues warnings if there are duplicate file ids and summarizes all problems in the iso_file list. If duplicates are allowed (\code{discard_duplicates = FALSE}), their file IDs will append a #1, #2, #3, etc. to preserve unique file IDs (important for many data aggregation operations). #' @param ... iso_file and iso_file_list objects to concatenate -#' @param discard_duplicates whether to automatically discard files with duplicate file IDs (i.e. duplicate file names). If \code{TRUE} (the default), only the first files are kept and any files with the same file ID are discarded. If \code{FALSE}, all duplicate files are kept but their file IDs are appended with suffix \code{#1}, \code{#2}, etc. +#' @param discard_duplicates whether to automatically discard files with duplicate file IDs (i.e. duplicate file names). If \code{TRUE} (the default), only the first files are kept and any files with the same file ID are discarded. If \code{FALSE}, all duplicate files are kept but their file IDs are appended with suffix \code{#1}, \code{#2}, etc. #' @rdname iso_data_structure #' @export iso_as_file_list <- function(..., discard_duplicates = TRUE) { # global vars has_duplicates <- NULL - + # dots passed in iso_objs <- list(...) - + # return iso file list right away if it's the only thing passed in if (length(iso_objs) == 1 && iso_is_file_list(..1)) return (..1) - + # allow simple list to be passed in if (length(iso_objs) == 1 && !iso_is_object(..1) && is.list(..1)) iso_objs <- ..1 - + # list classes list_classes <- "iso_file_list" - + if (length(iso_objs) == 0) { # empty list iso_list <- list() @@ -168,44 +168,44 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { # check if everything is an iso object if(!all(is_iso <- map_lgl(iso_objs, iso_is_object))) { stop("can only process iso_file and iso_file_list objects, encountered incompatible data type(s): ", - unlist(lapply(iso_objs[!is_iso], class)) %>% unique() %>% str_c(collapse = ", "), + unlist(lapply(iso_objs[!is_iso], class)) %>% unique() %>% str_c(collapse = ", "), call. = FALSE) } # flatten isofiles and isofile lists to make one big isofile list iso_list <- map(iso_objs, ~if(iso_is_file_list(.x)) { .x } else { list(.x) }) %>% unlist(recursive = FALSE) - + # reset file ids file_ids <- map_chr(iso_list, ~.x$file_info$file_id) if (any(is.na(file_ids))) - stop("encountered undefined (NA) file ID(s). This is prohibited because it can lead to unexpected behaviour in iso files collections.", + stop("encountered undefined (NA) file ID(s). This is prohibited because it can lead to unexpected behavior in iso files collections.", call. = FALSE) names(iso_list) <- file_ids - + # check if al elements are the same data type - classes <- map_chr(iso_list, ~class(.x)[1]) + classes <- map_chr(iso_list, ~class(.x)[1]) if (!all(classes == classes[1])) { wrong_dt <- classes[classes != classes[1]] %>% unique %>% collapse(", ") - glue("can only process iso_file objects with the same data type (first: {classes[1]}), encountered: {wrong_dt}") %>% + glue("can only process iso_file objects with the same data type (first: {classes[1]}), encountered: {wrong_dt}") %>% stop(call. = FALSE) } list_classes <- c(paste0(classes[1], "_list"), list_classes) - + # check for file_id duplicates - dups <- + dups <- tibble( idx = 1:length(iso_list), file_id = names(iso_list) - ) %>% - group_by(.data$file_id) %>% - mutate(n = 1:n(), has_duplicates = any(n > 1)) %>% - ungroup() %>% + ) %>% + group_by(.data$file_id) %>% + mutate(n = 1:n(), has_duplicates = any(n > 1)) %>% + ungroup() %>% filter(has_duplicates) - + # process duplicates if (nrow(dups) > 0) { msg <- if(discard_duplicates) "duplicate files encountered, only first kept" else "duplicate files kept but with recoded file IDs" - + # work on duplicates for (i in 1:nrow(dups)) { # register warnings @@ -220,25 +220,25 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { names(iso_list)[idx] <- recode_id } } - - # finalize duplicates + + # finalize duplicates if (discard_duplicates) { # discard all but first duplicate iso_list[filter(dups, n > 1)$idx] <- NULL - } + } } - + # propagate problems - all_problems <- map(iso_list, ~get_problems(.x) %>% mutate(file_id = .x$file_info$file_id)) %>% + all_problems <- map(iso_list, ~get_problems(.x) %>% mutate(file_id = .x$file_info$file_id)) %>% bind_rows() %>% dplyr::select(.data$file_id, everything()) } - + # problems if (nrow(all_problems) > 0) { # remove duplicate entries all_problems <- unique(all_problems) } - + # generate structure structure( iso_list, @@ -250,22 +250,22 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { # Printing ---- #' Isofile printing -#' +#' #' Print summary of individual iso_files (dual inlet or continuous flow) or collection of iso_files. #' @param x Object to show. #' @param ... additional parameters passed to print.default #' @rdname iso_printing #' @export print.iso_file_list <- function(x, ...) { - + # what type of iso files if (length(x) == 0) data_type <- "unknown" else data_type <- class(x[[1]]) %>% { .[.!="iso_file"][1] } %>% str_replace("_", " ") - + # print summary glue("Data from {length(x)} {data_type} iso files:") %>% cat("\n") print(iso_get_data_summary(x, quiet = TRUE)) - + if (n_problems(x) > 0) { cat("\nProblem summary:\n", sep = "") print(iso_get_problems_summary(x), ...) @@ -317,7 +317,7 @@ set_ds_file_path <- function(ds, file_root, file_path, file_id = basename(file_p ds$file_info$file_path <- file_path ds$file_info$file_id <- file_id ds$file_info$file_subpath <- file_subpath - if (!file.exists(get_ds_file_path(ds))) + if (!file.exists(get_ds_file_path(ds))) stop("file/folder does not exist: ", file_path, call. = FALSE) return(ds) } @@ -331,12 +331,12 @@ get_ds_file_root <- function(ds) { get_ds_file_path <- function(ds, include_root = TRUE) { if (!col_in_df(ds$file_info, "file_path")) stop("file_path column does not exist in file info (lost during rename?), cannot proceed", call. = FALSE) - + if (include_root) { file_root <- get_ds_file_root(ds) if (!is.na(file_root)) return(file.path(file_root, ds$file_info$file_path)) } - + return(ds$file_info$file_path) } @@ -344,7 +344,7 @@ get_ds_file_path <- function(ds, include_root = TRUE) { update_read_options <- function(ds, read_options) { # remove read_ prefix in function parameters if(!is.list(read_options)) read_options <- as.list(read_options) - names(read_options) <- names(read_options) %>% str_replace("^read_", "") + names(read_options) <- names(read_options) %>% str_replace("^read_", "") update <- read_options[names(read_options) %in% names(ds$read_options)] # update all that exist in the read options ds$read_options <- modifyList(ds$read_options, update) @@ -358,23 +358,23 @@ set_ds_file_size <- function(ds) { # legacy file that doesnt have file root info yet return(ds) } - + col_exists <- col_in_df(ds$file_info, "file_size") if (col_exists && !is.na(ds$file_info$file_size)) { # already set return(ds) } - - # setting file size + + # setting file size file_path <- get_ds_file_path(ds) - if (file.exists(file_path)) + if (file.exists(file_path)) file_size <- as.integer(round(file.size(file_path))) else file_size <- NA_integer_ - + # update file size ds$file_info <- dplyr::mutate(ds$file_info, file_size = !!file_size) - + # make sure file size is at the proper position if it is introduced for the first time if (!col_exists) { ds$file_info <- dplyr::select(ds$file_info, starts_with("file_"), everything()) diff --git a/R/isoread.R b/R/isoread.R index 61fef534..3d17ec4e 100644 --- a/R/isoread.R +++ b/R/isoread.R @@ -1,16 +1,16 @@ # file types & reader ================= #' Register file readers -#' +#' #' Register file extensions and reader functions for different data files. Isoreader automatically registers all built-in file readers so this function is usually only needed when registering additional readers provided for testing purposes from outside of the isoreader package. Note that file extensions are case-insensitive, i.e. a reader for \code{.ext} will also recognize \code{.Ext} and \code{.EXT} -#' +#' #' @details \code{iso_register_dual_inlet_file_reader}: use this function to register file readers for dual inlet files. -#' +#' #' @rdname file_readers #' @param extension the file extension (e.g. \code{.dxf}) of the data file. Must be unique otherwise different files can not automatically be matched with the appropriate file reader based on their extension. #' @param func the name of the function that should be used a filter reader. All file reader functions must accept a data structure argument as the first argument and return the same data structure with added data. #' @param description what is this file type about? -#' @param software what is the software program that creates this filetype? +#' @param software what is the software program that creates this file type? #' @param cacheable whether this file type is cacheable. If \code{TRUE} (the default), user requests to cache the file will be honored. If \code{FALSE}, this file type will never be cached no matter what the user requests. #' @param post_read_check whether isoreader should conduct a data integrity check after reading the file. Should always be \code{TRUE} unless there is independent data integrity checking already taking place inside the reader. #' @param overwrite whether to overwrite an existing file reader for the same extension @@ -45,42 +45,42 @@ register_file_reader <- function(type, call, extension, func, description, softw if (!is.character(func)) stop("please provide the function name rather than the function itself to register it", call. = FALSE) - + if (length(env) == 0) - stop("could not find function '", func, "' in any environment - please make sure that it is defined", + stop("could not find function '", func, "' in any environment - please make sure that it is defined", call. = FALSE) - + if (length(env) > 1) glue::glue("function '{func}' exists in more than one environment ", - "({paste(env, collapse = ', ')})", - ", please specify parameter 'env' to clarify") %>% + "({paste(env, collapse = ', ')})", + ", please specify parameter 'env' to clarify") %>% stop(call. = FALSE) - + frs <- default("file_readers", allow_null = TRUE) - + new_fr <- tibble::tibble( type = type, call = call, extension = extension, - func = func, cacheable = cacheable, + func = func, cacheable = cacheable, post_read_check = post_read_check, description = description, software = software, env = env ) - + if (!is.null(frs) && extension %in% frs$extension) { if (identical(new_fr, dplyr::filter(frs, extension == !!extension))) { # already exists and is identical, nothing more to do return(frs) } - + if (!overwrite) { # already exists but don't overwrite --> error glue::glue( "file reader for extension '{extension}' already exists, specify overwrite = TRUE to replace the existing file reader" ) %>% stop(call. = FALSE) - } - + } + # already exists and will be overwritten glue::glue("file reader for extension '{extension}' already exists and will be overwritten") %>% warning(immediate. = TRUE, call. = FALSE) @@ -97,9 +97,9 @@ find_func <- function(func) { } #' Supported file types -#' +#' #' Get an overview of all the file types currently supported by the isoreader package. To register additional file readers, use the \code{\link{iso_register_dual_inlet_file_reader}} and \code{\link{iso_register_continuous_flow_file_reader}} functions. -#' +#' #' @family file_types #' @export iso_get_supported_file_types <- function() { @@ -121,7 +121,7 @@ get_supported_scan_files <- function() { # file reading =========== #' Read isotope data file -#' +#' #' This function from the original isoread package is deprecated, please use \link{iso_read_dual_inlet}, \link{iso_read_continuous_flow} and \link{iso_read_scan} instead. #' #' @param ... original isoread parameters @@ -133,7 +133,7 @@ isoread <- function(...) { } #' Load dual inlet data -#' +#' #' @inheritParams iso_read_files #' @param ... one or multiple file/folder paths. All files must have a supported file extension. All folders are expanded and searched for files with supported file extensions (which are then included in the read). #' @param read_raw_data whether to read the raw mass/ion data from the file @@ -144,16 +144,16 @@ isoread <- function(...) { #' @family isoread functions for different types of IRMS data #' @export iso_read_dual_inlet <- function( - ..., + ..., root = ".", - read_raw_data = default(read_raw_data), read_file_info = default(read_file_info), + read_raw_data = default(read_raw_data), read_file_info = default(read_file_info), read_method_info = default(read_method_info), read_vendor_data_table = default(read_vendor_data_table), nu_masses = c(), - discard_duplicates = TRUE, + discard_duplicates = TRUE, parallel = FALSE, parallel_plan = future::multisession, parallel_cores = future::availableCores(), cache = default(cache), read_cache = default(cache), reread_outdated_cache = FALSE, quiet = default(quiet), cache_files_with_errors = TRUE) { - + # cache files with errors deprecation warning if (!missing(cache_files_with_errors)) { warning( @@ -161,7 +161,7 @@ iso_read_dual_inlet <- function( immediate. = TRUE, call. = FALSE ) } - + # process data iso_read_files( unlist_paths(list(...)), @@ -187,20 +187,20 @@ iso_read_dual_inlet <- function( } #' Load continuous flow data -#' +#' #' @inheritParams iso_read_dual_inlet #' @family isoread functions for different types of IRMS data #' @export iso_read_continuous_flow <- function( - ..., + ..., root = ".", - read_raw_data = default(read_raw_data), read_file_info = default(read_file_info), - read_method_info = default(read_method_info), read_vendor_data_table = default(read_vendor_data_table), - discard_duplicates = TRUE, + read_raw_data = default(read_raw_data), read_file_info = default(read_file_info), + read_method_info = default(read_method_info), read_vendor_data_table = default(read_vendor_data_table), + discard_duplicates = TRUE, parallel = FALSE, parallel_plan = future::multisession, parallel_cores = future::availableCores(), cache = default(cache), read_cache = default(cache), reread_outdated_cache = FALSE, quiet = default(quiet), cache_files_with_errors = TRUE) { - + # cache files with errors deprecation warning if (!missing(cache_files_with_errors)) { warning( @@ -208,7 +208,7 @@ iso_read_continuous_flow <- function( immediate. = TRUE, call. = FALSE ) } - + # process data iso_read_files( unlist_paths(list(...)), @@ -234,19 +234,19 @@ iso_read_continuous_flow <- function( } #' Load scan data -#' +#' #' @inheritParams iso_read_dual_inlet #' @family isoread functions for different types of IRMS data #' @export iso_read_scan <- function( - ..., + ..., root = ".", read_raw_data = default(read_raw_data), read_file_info = default(read_file_info), read_method_info = default(read_method_info), - discard_duplicates = TRUE, + discard_duplicates = TRUE, parallel = FALSE, parallel_plan = future::multisession, parallel_cores = future::availableCores(), cache = default(cache), read_cache = default(cache), reread_outdated_cache = FALSE, quiet = default(quiet), cache_files_with_errors = TRUE) { - + # cache files with errors deprecation warning if (!missing(cache_files_with_errors)) { warning( @@ -254,7 +254,7 @@ iso_read_scan <- function( immediate. = TRUE, call. = FALSE ) } - + # process data iso_read_files( unlist_paths(list(...)), @@ -280,11 +280,11 @@ iso_read_scan <- function( #' Core function to read isotope data files -#' -#' This function takes care of extracting basic information about iso_files, dealing with problems and making sure only valid fire formats are processed. -#' This function is not typicaly called directly but indirectly by calling \link{iso_read_dual_inlet}, \link{iso_read_continuous_flow} and \link{iso_read_scan}. +#' +#' This function takes care of extracting basic information about iso_files, dealing with problems and making sure only valid fire formats are processed. +#' This function is not typically called directly but indirectly by calling \link{iso_read_dual_inlet}, \link{iso_read_continuous_flow} and \link{iso_read_scan}. #' It is made available outside the package because it can be very useful for testing new file readers. -#' +#' #' @param paths one or multiple file/folder paths. All files must have a supported file extension. All folders are expanded and searched for files with supported file extensions (which are then included in the read). Paths can be absolute paths or relative to the provided file \code{root} (which is the current working directory by default). For absolute paths, a common root directory will be guessed using \link{iso_find_absolute_path_roots}. The root portion of paths will never be displayed in info messages. #' @inheritParams iso_expand_paths #' @param supported_extensions data frame with supported extensions and corresponding reader functions (columns 'extension', 'func', 'cacheable') @@ -299,9 +299,9 @@ iso_read_scan <- function( #' @param read_cache whether to reload from cache if a cached version exists. Note that it will only read from cache if the raw data file has not been modified since. Files that have been modified on disc (e.g. edited in the vendor software) will always be read anew. To automatically reread cached files that were cached by an outdated version of the isoreader package, set the \code{reread_outdated_cache} flag. #' @param reread_outdated_cache whether to re-read outdated cache files whenever they are encountered. #' @param read_options vector of read options to be stored in the data structure (e.g. \code{c(read_vendor_data_table = FALSE)}). The \code{read_} prefix is optional. -#' @param reader_options list of paramters to be passed on to the reader +#' @param reader_options list of parameters to be passed on to the reader #' @return single iso_file object (if single file) or list of iso_files (iso_file_list) -iso_read_files <- function(paths, root, supported_extensions, data_structure, +iso_read_files <- function(paths, root, supported_extensions, data_structure, read_options = c(), reader_options = list(), discard_duplicates = TRUE, cache_files_with_errors = TRUE, parallel = FALSE, parallel_plan = future::multisession, parallel_cores = future::availableCores(), cache = default(cache), read_cache = default(cache), reread_outdated_cache = FALSE, @@ -309,11 +309,11 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, # start timer start_time <- Sys.time() - + # set quiet for the current and sub-calls and reset back to previous setting on exit on_exit_quiet <- update_quiet(quiet) on.exit(on_exit_quiet(), add = TRUE) - + # parallel processing if (parallel) { available_cores <- future::availableCores() @@ -326,49 +326,49 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, cores <- min(parallel_cores, available_cores) oplan <- plan(parallel_plan) on.exit(plan(oplan), add = TRUE) - } - + } + # supplied data checks col_check(c("extension", "func", "cacheable"), supported_extensions) if(!is(data_structure, "iso_file")) stop("data structure must include class 'iso_file'", call. = FALSE) col_check(c("file_info"), data_structure) - + # read options update in data structure data_structure <- update_read_options(data_structure, read_options) - + # expand & safety check paths (will warn if non-supported file types are included or same filename occurs multiple times) if (missing(paths) || is.null(paths) || all(is.na(paths))) { stop("file path(s) required, none provided", call. = FALSE) } filepaths <- iso_expand_paths(paths, extensions = supported_extensions$extension, root = root) - + # check if there are any - if (nrow(filepaths) == 0) + if (nrow(filepaths) == 0) return(iso_as_file_list(list())) - + # find roots for absolute paths filepaths <- iso_find_absolute_path_roots(filepaths$path, filepaths$root) - + # initialize progress bar pb <- progress::progress_bar$new( format = sprintf("Progress: [:bar] :current/%d (:percent) :elapsed", nrow(filepaths)), clear = FALSE, show_after = 0, total = nrow(filepaths)) set_temp("progress_bar", pb) pb$tick(0) - + # overview if (!default("quiet")) { glue::glue( "preparing to read {nrow(filepaths)} data files", if (cache) { " (all will be cached)" } else {""}, - if (parallel) { ", setting up {min(cores, nrow(filepaths))} parallel processes..." } - else {"..."}) %>% + if (parallel) { ", setting up {min(cores, nrow(filepaths))} parallel processes..." } + else {"..."}) %>% log_message() } # generate read files overview - files <- - filepaths %>% + files <- + filepaths %>% mutate( file_n = 1:n(), files_n = n(), @@ -376,9 +376,9 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, old_cachepath = generate_old_cache_filepaths(file.path(.data$root, .data$path), data_structure$read_options), process = if(!parallel) NA_integer_ else ((.data$file_n - 1) %% cores) + 1L, reader_options = list(!!reader_options) - ) %>% + ) %>% # merge in supported extensions with reader and cacheable info - match_to_supported_file_types(supported_extensions) %>% + match_to_supported_file_types(supported_extensions) %>% # make cache read/write decisions mutate( read_from_cache = read_cache & .data$cacheable & file.exists(.data$cachepath), @@ -386,24 +386,24 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, reread_outdated_cache = !!reread_outdated_cache, write_to_cache = cache & .data$cacheable ) - + # safety check on reader functions req_readers <- unique(files$func) in_workspace <- map_lgl(req_readers, exists, mode = "function") in_isoreader_ns <- map_lgl(req_readers, exists, mode = "function", where = asNamespace("isoreader")) if ( any(missing <- !in_workspace & !in_isoreader_ns) ) { - stop("required reader function(s) does not seem to exist: ", + stop("required reader function(s) does not seem to exist: ", str_c(req_readers[missing], collapse = ", "), call. = FALSE) } - + # set up log files for parallel processing if (parallel) setup_parallel_logs() - + # setup up processes set_temp("parallel_process", NA_integer_) # mark the main process - processes <- - files %>% - nest(data = c(-.data$process)) %>% + processes <- + files %>% + nest(data = c(-.data$process)) %>% mutate( result = purrr::map2( .data$process, @@ -422,20 +422,20 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, # parallel monitor_parallel_logs(processes) cleanup_parallel_logs() - iso_files <- processes$result %>% lapply(future::value) %>% + iso_files <- processes$result %>% lapply(future::value) %>% unlist(recursive = FALSE) } - + # terminate progress bar while (!pb$finished) pb$tick() - + # final user update if (!default("quiet")) { end_time <- Sys.time() sprintf( "finished reading %s files in %.2f %s", - nrow(files), as.numeric(end_time - start_time), - attr(end_time - start_time, "units")) %>% + nrow(files), as.numeric(end_time - start_time), + attr(end_time - start_time, "units")) %>% log_message() } @@ -443,42 +443,42 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, all_probs <- iso_get_problems(iso_files) ## outdated files if (any(stringr::str_detect( - all_probs$details, + all_probs$details, fixed("outdated version of the isoreader package")))) { glue::glue( "some files were read from outdated cache or storage (.rds) files. They were checked for compatibility and should work without issues. However, to avoid this warning and improve read spead, please call iso_reread_outdated_files() on your collection of iso files to refresh outdated cache files." - ) %>% + ) %>% warning(immediate. = TRUE, call. = FALSE) } ## unix file creation date if (any(stringr::str_detect( - all_probs$details, + all_probs$details, fixed("file creation date cannot be accessed on this Linux system")))) { glue::glue( "file creation date could not be accessed for all files because this information is not available on some Linux systems, reporting last modified time for file_datetime instead. To turn these warnings off, call iso_turn_datetime_warnings_off() and reread these files with iso_reread_all_files()." - ) %>% + ) %>% warning(immediate. = TRUE, call. = FALSE) } - + # turn into iso_file list - iso_files <- iso_as_file_list(iso_files, discard_duplicates = discard_duplicates) + iso_files <- iso_as_file_list(iso_files, discard_duplicates = discard_duplicates) # bring files into the correct order after potential parallel processing jumble - indices <- - tibble(path = purrr::map_chr(iso_files, ~.x$file_info$file_path) %>% unname(), idx = 1:length(.data$path)) %>% - dplyr::left_join(files, by = "path") %>% - dplyr::arrange(.data$file_n) %>% - dplyr::pull(.data$idx) %>% + indices <- + tibble(path = purrr::map_chr(iso_files, ~.x$file_info$file_path) %>% unname(), idx = 1:length(.data$path)) %>% + dplyr::left_join(files, by = "path") %>% + dplyr::arrange(.data$file_n) %>% + dplyr::pull(.data$idx) %>% unique() iso_files <- iso_files[indices] - + # report problems if (!default("quiet") && iso_has_problems(iso_files)) { sprintf("encountered %.0f problems in total", n_problems(iso_files)) %>% log_message() print(all_probs) cat("\n") } - + # return single or file or list if (length(iso_files) == 1) return (iso_files[[1]]) return(iso_files) @@ -487,32 +487,32 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, # wrapper function for creating a read procss # @param process if NA --> set up process in the current session, if integer --> set up parallel process create_read_process <- function(process, data_structure, files) { - + # specify relevant files columns to match read_iso_file parameters - files <- files %>% + files <- files %>% select( - .data$root, .data$path, .data$file_n, .data$files_n, + .data$root, .data$path, .data$file_n, .data$files_n, .data$read_from_cache, .data$read_from_old_cache, .data$reread_outdated_cache, - .data$write_to_cache, .data$cachepath, .data$old_cachepath, - .data$post_read_check, ext = .data$extension, + .data$write_to_cache, .data$cachepath, .data$old_cachepath, + .data$post_read_check, ext = .data$extension, reader_fun = .data$func, reader_options = .data$reader_options, reader_fun_env = .data$env ) - + # parallel if (!is.na(process)) { # session options all_opts <- get_all_options() # find required global functions and packages from the used readers - func_globals <- filter(files, .data$reader_fun_env == "R_GlobalEnv")$reader_fun %>% + func_globals <- filter(files, .data$reader_fun_env == "R_GlobalEnv")$reader_fun %>% unique() %>% { rlang::set_names(purrr::map(., ~rlang::eval_tidy(rlang::sym(.x))), .) } packages <- c("isoreader", "purrr", filter(files, .data$reader_fun_env != "R_GlobalEnv")$reader_fun_env) %>% unique() log_file <- get_temp("parallel_log_file") progress_file <- get_temp("parallel_progress_file") - # parallel via futures - result <- + # parallel via futures + result <- future::future( globals = c(func_globals, list( - process = process, data_structure = data_structure, files = files, + process = process, data_structure = data_structure, files = files, log_file = log_file, progress_file = progress_file, all_opts = all_opts)), packages = packages, expr = { @@ -535,12 +535,12 @@ create_read_process <- function(process, data_structure, files) { } #' Read individual iso file -#' +#' #' Low level read function for an individual iso file. Usually not called directly but available for methods development. #' @inheritParams iso_read_files #' @param ds the basic data structure for the type of iso_file #' @param path file path -#' @param file_n numer of processsed file for info messages +#' @param file_n number of processed file for info messages #' @param files_n total number of files for info messages #' @param read_from_cache whether to read from cache #' @param read_from_old_cache whether to read from old cache files (to be deprecated in isoreader 2.0) @@ -552,62 +552,62 @@ create_read_process <- function(process, data_structure, files) { #' @param ext file extension #' @param reader_fun file reader function #' @param reader_fun_env where to find the reader function -#' +#' #' @export read_iso_file <- function( - ds, root, path, file_n, files_n, + ds, root, path, file_n, files_n, read_from_cache, read_from_old_cache, reread_outdated_cache, - write_to_cache, cachepath, old_cachepath, + write_to_cache, cachepath, old_cachepath, post_read_check, ext, reader_fun, reader_options, reader_fun_env) { - + # prepare iso_file object ds <- set_ds_file_path(ds, root, path) iso_file <- ds # default - + # cache has_cache <- read_from_cache || read_from_old_cache - + # progress update if (!default("quiet")) { - if (has_cache) { + if (has_cache) { msg <- glue("reading file '{path}' from cache...") } else { msg <- glue("reading file '{path}' with '{ext}' reader...") } log_message(msg) } - + # evaluate read file event quosure if it exists read_file_event <- getOption("isoreader.read_file_event") if (!is.null(read_file_event) && is_quosure(read_file_event) && !quo_is_null(read_file_event)) { eval_tidy(get_expr(read_file_event)) } - + # whether to reread raw file reread_file <- FALSE - outdated_message <- function(iso_file) { + outdated_message <- function(iso_file) { sprintf( - "cache file created by an outdated version of the isoreader package (%s)", + "cache file created by an outdated version of the isoreader package (%s)", as.character(get_iso_object_versions(iso_file)[[1]]) ) } - + # read cache if (has_cache) { - iso_file <- + iso_file <- if (read_from_cache) load_cached_iso_file(cachepath) else if (read_from_old_cache) load_cached_iso_file(old_cachepath) - + # check if reader options match, if not: re-read # FIXME: implement this # pseudocode # if (!has_same_reader_options(iso_file, reader_options)) { # reread_file <- TRUE # } - + # check cached file version if (iso_is_object(iso_file) && is_iso_object_outdated(iso_file)) { - + # re-read file if (reread_outdated_cache) { # reread file @@ -621,20 +621,20 @@ read_iso_file <- function( "created by isoreader version < {as.character(get_last_structure_update_version())}") ) # warning and compatibility checks - iso_file <- - iso_file %>% + iso_file <- + iso_file %>% # warning register_warning( details = outdated_message(iso_file), func = "read_iso_file", warn = FALSE - ) %>% + ) %>% # compatibility ensure_iso_file_backwards_compatibility() } } } - + # read isofile if (!has_cache || reread_file) { # read from original file @@ -646,26 +646,26 @@ read_iso_file <- function( if (post_read_check) { iso_file <- run_post_read_check(iso_file) } - + # check version if (iso_is_object(iso_file) && is_iso_object_outdated(iso_file)) { - + outdated_files <- get_iso_object_outdated(iso_file) - + # post info message log_message( glue("running compatibility checks for outdated files ", "({sum(outdated_files)}/{length(outdated_files)}) ", "created by isoreader version < {as.character(get_last_structure_update_version())}") ) - + # run compatibility checks iso_file <- ensure_iso_file_backwards_compatibility(iso_file) - + # attach warning if (iso_is_file(iso_file)) { # single file - iso_file <- + iso_file <- register_warning( iso_file, details = outdated_message(iso_file), @@ -681,30 +681,30 @@ read_iso_file <- function( ) }) } - + } - + # cleanup any binary and source content depending on debug setting if (!default(debug)) { iso_file$binary <- NULL # @FIXME: binary should be renamed to source throughout iso_file$source <- NULL iso_file$temp <- NULL } - + # store in cached file - if (write_to_cache) + if (write_to_cache) cache_iso_file(iso_file, cachepath) } - + # evaluate finish file event quosure if it exists finish_file_event <- getOption("isoreader.finish_file_event") if (!is.null(finish_file_event) && is_quosure(finish_file_event) && !quo_is_null(finish_file_event)) { eval_tidy(get_expr(finish_file_event)) } - + # marke progress for progress bar log_progress() - + return(iso_file) } @@ -712,7 +712,7 @@ read_iso_file <- function( run_post_read_check <- function(iso_files) { # file info column check iso_files <- ensure_file_info_list_columns(iso_files) - + # what else should go in here for data integrity checks? return(iso_files) } @@ -723,7 +723,7 @@ ensure_file_info_list_columns <- function(iso_files) { # standard fields standard_fields <- names(make_iso_file_data_structure()$file_info) - + # check list vs. single file if (iso_is_file_list(iso_files)) { iso_files <- map(iso_files, ~{ @@ -733,7 +733,7 @@ ensure_file_info_list_columns <- function(iso_files) { } else { iso_files$file_info <- ensure_data_frame_list_columns(iso_files$file_info, exclude = standard_fields) } - + return(iso_files) } @@ -745,7 +745,7 @@ ensure_iso_file_backwards_compatibility <- function(iso_files) { iso_files <- ensure_file_info_list_columns(iso_files) # convert data frame units attribute to implicit double with units - # check for file size paramter + # check for file size parameter # check for proper file datetime column type ensure_compatibility <- function(iso_file) { if (!"file_root" %in% names(iso_file$file_info)) iso_file$file_info$file_root <- "." @@ -754,14 +754,14 @@ ensure_iso_file_backwards_compatibility <- function(iso_files) { iso_file$vendor_data_table <- convert_df_units_attr_to_implicit_units(iso_file$vendor_data_table) return(iso_file) } - + # check list vs. single file if (iso_is_file_list(iso_files)) { iso_files <- map(iso_files, ensure_compatibility) %>% iso_as_file_list() } else { iso_files <- ensure_compatibility(iso_files) } - + return(iso_files) } @@ -775,9 +775,9 @@ check_file_datetime <- function(iso_file) { # file re-reading ========= #' Re-read iso_files -#' +#' #' Actual multi-purpose file-reread function (not exported) that powers \link{iso_reread_files}. -#' +#' #' @inheritParams iso_reread_files #' @param reread_only_changed_files whether to re-read only files that have since be changed on disc (i.e. have no valid cache file), default FALSE i.e. re-read ALL files #' @param reread_only_outdated_files whether to re-read only files that were read by an outdated version of isoreader (default FALSE, i.e. re-read ALL files) @@ -785,19 +785,19 @@ check_file_datetime <- function(iso_file) { #' @param reread_files_with_errors whether to re-read files that had read in with errors the last time (default TRUE) #' @param reread_files_with_warnings whether to re-read files that had read in with warnings the last time (default TRUE) reread_iso_files <- function( - iso_files, ..., stop_if_missing = FALSE, - reread_only_changed_files = FALSE, + iso_files, ..., stop_if_missing = FALSE, + reread_only_changed_files = FALSE, reread_only_outdated_files = FALSE, reread_files_without_problems = TRUE, reread_files_with_errors = TRUE, reread_files_with_warnings = TRUE, quiet = default(quiet)) { - + # checks if(missing(iso_files) || !iso_is_object(iso_files)) stop("can only re-read iso_files, not objects of type ", class(iso_files)[1], call. = FALSE) single_file <- iso_is_file(iso_files) # to make sure return is the same as supplied iso_files <- iso_as_file_list(iso_files) - + # find file ids for reread all_files <- names(iso_files) old_files <- all_files[get_iso_object_outdated(iso_files)] @@ -811,21 +811,21 @@ reread_iso_files <- function( if (reread_files_with_warnings) reread_file_ids <- c(reread_file_ids, warning_files) if (reread_only_outdated_files) reread_file_ids <- intersect(reread_file_ids, old_files) reread_file_ids <- unique(reread_file_ids) - + # reread paths - file_paths <- + file_paths <- tibble( file_id = reread_file_ids, file_root = iso_files[reread_file_ids] %>% map_chr(get_ds_file_root) %>% as.character(), file_path = iso_files[reread_file_ids] %>% map_chr(get_ds_file_path, include_root = FALSE) %>% as.character(), file_exists = file.path(.data$file_root, .data$file_path) %>% map_lgl(file.exists) ) - + # safety check for non existent data files if (!all(file_paths$file_exists)) { - msg <- + msg <- # 'unique' paths to account for IARC type multi-file re-reads - file_paths %>% select(-.data$file_id) %>% filter(!.data$file_exists) %>% unique() %>% + file_paths %>% select(-.data$file_id) %>% filter(!.data$file_exists) %>% unique() %>% { sprintf( "%d file(s) do not exist at their referenced location and can not be re-read. Consider setting a new root directory with iso_set_file_root() first:\n - %s\n", @@ -837,26 +837,26 @@ reread_iso_files <- function( log_warning(msg) iso_files[filter(file_paths, !.data$file_exists)$file_id] <- map( - iso_files[filter(file_paths, !.data$file_exists)$file_id], - register_warning, - func = "reread_iso_files", + iso_files[filter(file_paths, !.data$file_exists)$file_id], + register_warning, + func = "reread_iso_files", details = "file does not exist at its referenced location and can not be re-read", warn = FALSE ) } } - + # finalize file paths - file_paths <- - file_paths %>% + file_paths <- + file_paths %>% # don't re-read non-existent - filter(.data$file_exists) %>% + filter(.data$file_exists) %>% # check if has cache mutate( cachepath = generate_cache_filepaths(file.path(.data$file_root, .data$file_path)), has_cache = file.exists(.data$cachepath) ) - + # check if only rereading changed files if (reread_only_changed_files) { file_paths <- filter(file_paths, !.data$has_cache) @@ -879,21 +879,21 @@ reread_iso_files <- function( reread_sum <- " with errors" } else if (!reread_files_without_problems && !reread_files_with_errors && reread_files_with_warnings) { reread_sum <- " with warnings" - } - sprintf("found %d %s%sdata file(s)%s, re-reading %d/%d%s", + } + sprintf("found %d %s%sdata file(s)%s, re-reading %d/%d%s", nrow(file_paths), changed, outdated, reread_sum, nrow(file_paths), length(all_files), - if(nrow(file_paths) > 0) { ":" } else {"."}) %>% + if(nrow(file_paths) > 0) { ":" } else {"."}) %>% log_message() } - + # reread files if (nrow(file_paths) > 0) { # 'unique' paths to account for IARC type multi-file re-reads reread_file_paths <- file_paths %>% select(-.data$file_id) %>% unique() args <- c(list( - paths = reread_file_paths$file_path, root = reread_file_paths$file_root, + paths = reread_file_paths$file_path, root = reread_file_paths$file_root, read_cache = reread_only_outdated_files, - reread_outdated_cache = reread_only_outdated_files), + reread_outdated_cache = reread_only_outdated_files), list(...)) if (iso_is_continuous_flow(iso_files)) { # read continuous flow @@ -907,8 +907,8 @@ reread_iso_files <- function( } else { stop("re-reading iso_files objects of type ", class(iso_files[[1]])[1], " is not yet supported", call. = FALSE) } - - # replace the ones that were re-read, remove missing rereads, and add new files in case there were any + + # replace the ones that were re-read, remove missing rereads, and add new files in case there were any # (both missing and new can happen e.g. from updated iarc archives) actual_reread_ids <- intersect(names(new_iso_files), file_paths$file_id) missing_reread_ids <- setdiff(file_paths$file_id, actual_reread_ids) @@ -917,18 +917,18 @@ reread_iso_files <- function( if (length(missing_reread_ids) > 0) iso_files[missing_reread_ids] <- NULL if (length(new_ids) > 0) iso_files <- c(iso_files, new_iso_files[new_ids]) } - - # return single (if passed in as single) + + # return single (if passed in as single) if (single_file && length(iso_files) == 1) return (iso_files[[1]]) return(iso_files) } #' Re-read iso_files -#' -#' Sometimes it is useful to reload isotope files from their original data files (e.g. after modifying raw data files in vendor software, or after upgrading to a newer version of the isoreader package that provides new functionality). The functions described below are intended to make this very easy. However, re-reading files from disc is only possible if file paths still point to the original raw data files. If they have moved, please use \code{\link{iso_set_file_root}} first to change the root directory of your \code{iso_files}. -#' +#' +#' Sometimes it is useful to reload isotope files from their original data files (e.g. after modifying raw data files in vendor software, or after upgrading to a newer version of the isoreader package that provides new functionality). The functions described below are intended to make this very easy. However, re-reading files from disc is only possible if file paths still point to the original raw data files. If they have moved, please use \code{\link{iso_set_file_root}} first to change the root directory of your \code{iso_files}. +#' #' To re-read files that have been modified on disc, please use \code{iso_reread_changed_files()}. To re-read files because of an isoreader version upgrade, please use \code{iso_reread_outdated_files()}. To try re-reading files that previously had warnings and/or errors, please use \code{iso_reread_problem_files()}. -#' +#' #' @inheritParams iso_read_files #' @param iso_files collection of iso_files #' @param ... additional read parameters that should be used for re-reading the iso_files, see \code{\link{iso_read_dual_inlet}}, \code{\link{iso_read_continuous_flow}} and \code{\link{iso_read_scan}} for details (except \code{read_cache} which is always set to \code{FALSE} to force re-reads). @@ -941,8 +941,8 @@ iso_reread_files <- function(iso_files, ...) { iso_reread_changed_files(iso_files, ...) } -#' @details \code{iso_reread_all_files} re-reads all files in the collection. -#' +#' @details \code{iso_reread_all_files} re-reads all files in the collection. +#' #' @rdname iso_reread_files #'@examples #'\dontrun{ @@ -957,11 +957,11 @@ iso_reread_files <- function(iso_files, ...) { #'} #' @export iso_reread_all_files <- function( - iso_files, ..., stop_if_missing = FALSE, + iso_files, ..., stop_if_missing = FALSE, quiet = default(quiet)) { - + reread_iso_files( - iso_files, ..., stop_if_missing = stop_if_missing, + iso_files, ..., stop_if_missing = stop_if_missing, reread_only_changed_files = FALSE, reread_only_outdated_files = FALSE, reread_files_without_problems = TRUE, @@ -969,19 +969,19 @@ iso_reread_all_files <- function( reread_files_with_warnings = TRUE, quiet = quiet ) - + } -#' @details \code{iso_reread_changed_files} re-reads all files that have been modified (e.g. in the vendor software) since they were last read by isoreader. -#' +#' @details \code{iso_reread_changed_files} re-reads all files that have been modified (e.g. in the vendor software) since they were last read by isoreader. +#' #' @rdname iso_reread_files #' @export iso_reread_changed_files <- function( - iso_files, ..., stop_if_missing = FALSE, + iso_files, ..., stop_if_missing = FALSE, quiet = default(quiet)) { - + reread_iso_files( - iso_files, ..., stop_if_missing = stop_if_missing, + iso_files, ..., stop_if_missing = stop_if_missing, reread_only_changed_files = TRUE, reread_only_outdated_files = FALSE, reread_files_without_problems = TRUE, @@ -989,19 +989,19 @@ iso_reread_changed_files <- function( reread_files_with_warnings = TRUE, quiet = quiet ) - + } -#' @details \code{iso_reread_outdated_files} re-reads all files that were read with an outdated version of isoreader. -#' +#' @details \code{iso_reread_outdated_files} re-reads all files that were read with an outdated version of isoreader. +#' #' @rdname iso_reread_files #' @export iso_reread_outdated_files <- function( - iso_files, ..., stop_if_missing = FALSE, + iso_files, ..., stop_if_missing = FALSE, quiet = default(quiet)) { - + reread_iso_files( - iso_files, ..., stop_if_missing = stop_if_missing, + iso_files, ..., stop_if_missing = stop_if_missing, reread_only_changed_files = FALSE, reread_only_outdated_files = TRUE, reread_files_without_problems = TRUE, @@ -1011,17 +1011,17 @@ iso_reread_outdated_files <- function( ) } -#' @details \code{iso_reread_problem_files} re-reads all files that have had errors the last time they were read by isoreader (set \code{reread_files_with_warnings = TRUE} to also re-read those that have warninigs). -#' +#' @details \code{iso_reread_problem_files} re-reads all files that have had errors the last time they were read by isoreader (set \code{reread_files_with_warnings = TRUE} to also re-read those that have warnings). +#' #' @rdname iso_reread_files #' @export iso_reread_problem_files <- function( - iso_files, ..., stop_if_missing = FALSE, + iso_files, ..., stop_if_missing = FALSE, reread_files_with_errors = TRUE, reread_files_with_warnings = FALSE, quiet = default(quiet)) { - + reread_iso_files( - iso_files, ..., stop_if_missing = stop_if_missing, + iso_files, ..., stop_if_missing = stop_if_missing, reread_only_changed_files = FALSE, reread_only_outdated_files = FALSE, reread_files_without_problems = FALSE, @@ -1051,32 +1051,32 @@ iso_reread_archive <- function(...) { # generates the cash file paths for iso_files inclulding file name, the file size and last modified # does NOT include: file path, isoreader version, file contents, read_options generate_cache_filepaths <- function(filepaths) { - + # generate cache filepaths file.info(filepaths) %>% tibble::rownames_to_column(var = "filepath") %>% dplyr::mutate( cache_file = sprintf("%s_%s_%.0f.rds", basename(.data$filepath), format(.data$mtime, "%Y%m%d%H%M%S"), .data$size), cache_filepath = file.path(default("cache_dir"), .data$cache_file) - ) %>% + ) %>% dplyr::pull(.data$cache_filepath) } # generates old cache file path generate_old_cache_filepaths <- function(filepaths, read_options = list()) { - + calculate_unf_hash <- function(filepath, size, modified) { obj <- c(list(filepath, size, modified), read_options) unf(obj)$hash %>% str_c(collapse = "") } - + # old cached files versioning iso_v <- packageVersion("isoreader") %>% { if (.$major < 1) paste0(.$major, ".", .$minor) else paste0(.$major, ".0") } - + file_info <- file.info(filepaths) %>% dplyr::as_tibble() %>% rownames_to_column() %>% @@ -1086,7 +1086,7 @@ generate_old_cache_filepaths <- function(filepaths, read_options = list()) { cache_file = sprintf("iso_file_v%s_%s_%s.rds", !!iso_v, basename(.data$filepath), .data$hash), cache_filepath = file.path(default("cache_dir"), .data$cache_file) ) - + return(file_info$cache_filepath) } @@ -1100,28 +1100,28 @@ cache_iso_file <- function(iso_file, cachepath) { load_cached_iso_file <- function(filepath) { # safety check (should never be a problem) if (!file.exists(filepath)) stop("cached file does not exist: ", filepath, call. = FALSE) - + # load iso_file <- readr::read_rds(filepath) - + # make sure object in file was loaded properly if (!(iso_is_object(iso_file))) { sprintf("cached file '%s' does not contain iso_file(s)", basename(filepath)) %>% stop(call. = FALSE) } - + # return return(iso_file) } #' Cleanup cached files #' -#' Removes all cached files. +#' Removes all cached files. #' @param all deprecated #' @export iso_cleanup_reader_cache <- function(all = FALSE) { - + if (!missing(all)) warning("the 'all' parameter is deprecated because this function now always deletes all cached files", call. = FALSE, immediate. = TRUE) - + files <- list.files(default("cache_dir"), pattern = "^iso_?file_.*\\.rds$", full.names = TRUE) file.remove(files) if (!default("quiet")) message("Info: removed all (", length(files), ") cached isoreader files.") diff --git a/R/isoread_flow_iarc.R b/R/isoread_flow_iarc.R index 0240db69..30f46b91 100644 --- a/R/isoread_flow_iarc.R +++ b/R/isoread_flow_iarc.R @@ -1,4 +1,4 @@ -# read ionos .iarc archieves for their continuous flow data +# read ionos .iarc archives for their continuous flow data # @param ds the iso_file data structure to fill # @param custom reader options - none needed iso_read_flow_iarc <- function(ds, options = list()) { @@ -7,11 +7,11 @@ iso_read_flow_iarc <- function(ds, options = list()) { if(!iso_is_file(ds) || !is(ds, "continuous_flow")) stop("data structure must be a 'continuous_flow' iso_file", call. = FALSE) - # unzipping iarc archieve ==== + # unzipping iarc archive ==== folder_name <- ds$file_info$file_path %>% basename() %>% { str_replace(., fixed(get_file_ext(.)), "") } folder_path <- file.path(tempdir(), folder_name) if (!file.exists(folder_path)) { - if (!default("quiet")) log_message("unpacking isoprime archieve file...", prefix = " ") + if (!default("quiet")) log_message("unpacking isoprime archive file...", prefix = " ") unzip(get_ds_file_path(ds), exdir = folder_path) } diff --git a/R/settings.R b/R/settings.R index 77f78a14..733efc03 100644 --- a/R/settings.R +++ b/R/settings.R @@ -132,7 +132,7 @@ iso_turn_reader_caching_off <- function(data = NULL) { #' @param read_raw_data if provided, set as the default for `read_raw_data` parameters #' @param read_file_info if provided, set as the default for `read_file_info` parameters #' @param read_method_info if provided, set as the default for `read_method_info` parameters -#' @param read_vendor_data_table if provided, set as the default for `read_vendor_data_tabl` parameters +#' @param read_vendor_data_table if provided, set as the default for `read_vendor_data_table` parameters #' @export #' @family settings functions iso_set_default_read_parameters <- function(data = NULL, read_raw_data, read_file_info, read_method_info, read_vendor_data_table, quiet = default(quiet)) { diff --git a/R/units.R b/R/units.R index f5f59e31..fb0c2884 100644 --- a/R/units.R +++ b/R/units.R @@ -1,17 +1,17 @@ # Class Definitions ====== #' Generate values with units -#' +#' #' These functions generate values with units that work well within data frames and tibbles and implement safety checks on operations that combine values with different units. To retrieve the value without units, use \code{\link{iso_strip_units}} (works for single variables and data frames/tibbles). To retrieve the unit use \code{\link{iso_get_units}}. Note that to correctly combine data frames / tibbles that have values with units in them, use \link[vctrs:vec_bind]{vec_rbind} instead of \link{rbind} or \link[dplyr:bind]{bind_rows}. \link[vctrs:vec_bind]{vec_rbind} will combine columns that have values with units if they have the same unit and otherwise convert back to plain values without units with a warning. The other functions will either fail or reduce the unit values to plain values with a cryptic warning message about not preserving attributes. -#' +#' #' @details \code{iso_with_units} is the primary function to generate values with units. At present, only numeric values are supported so this function is just a shorter alias for the number-specific \code{iso_double_with_units}. It is not clear yet whether any non-numeric values with units make sense to be supported at a later point or whether integer and decimal numbers should be treated differently when they have units. -#' +#' #' @param x the values (single value or vector) #' @param units the units for the value, by default "undefined units" but this parameter should always be supplied when working with real data that has units #' @family functions for values with units #' @export iso_with_units <- function(x, units = "undefined units") { - if (is.numeric(x)) + if (is.numeric(x)) iso_double_with_units(x, units) else stop("cannot add units to a value of type '", class(x)[1], "', try parse_number() to turn your value into a number first", call. = FALSE) @@ -38,7 +38,7 @@ new_iso_double_with_units <- function(x = double(), units = "undefined units") { methods::setOldClass(c("iso_double_with_units", "vctrs_vctr")) #' Check if a value has units -#' +#' #' Check if a variable is a double with units. That is if it has been generated by \code{\link{iso_double_with_units}}. #' @param x vector to check for whether it is a double with units #' @family functions for values with units @@ -48,11 +48,11 @@ iso_is_double_with_units <- function(x) { } #' Retrieve number units -#' +#' #' This function returns the units of a numerical value generated by \code{\link{iso_double_with_units}}. It returns \code{NA}) for unitless variables. Returns a column-named vector of units if \code{x} is a data frame / tibble. Returns the direct units of \code{x} in all other cases. #' @param x variable to get the units for (vector or data frame) #' @family functions for values with units -#' @export +#' @export iso_get_units <- function(x) { if (is.data.frame(x)) { units <- purrr::map_chr(x, iso_get_units) @@ -65,7 +65,7 @@ iso_get_units <- function(x) { #' Strip units from variables -#' +#' #' This function converts numbers with units back into unitless numbers both for single variables and data frames / tibbles. For single variables, this is equivalent to the \code{as.numeric} function. #' @param x variable to strip units from (vector or data frame) #' @family functions for values with units @@ -80,23 +80,23 @@ iso_strip_units <- function(x) { } #' Make units explicit -#' +#' #' This function is intended for data frames / tibbles only and makes the units of columns that have numbers with units explicit in the column name. It also strips the units attribute from those columns using \code{\link{iso_strip_units}}. The reverse function is \code{\link{iso_make_units_implicit}}. -#' +#' #' @param df the data frame in which to make the units explicit #' @param prefix the prefix for the units #' @param suffix the suffix for the units #' @family functions for values with units -#' @examples +#' @examples #' # a data frame with implicit units #' df <- tibble(peak = 1:5, height = iso_double_with_units(1:5, "V")) -#' df -#' +#' df +#' #' # show with explicit units -#' iso_make_units_explicit(df) -#' +#' iso_make_units_explicit(df) +#' #' # show with explicit units (custom prefix & suffix) -#' iso_make_units_explicit(df, prefix = ".", suffix = "") +#' iso_make_units_explicit(df, prefix = ".", suffix = "") #' @export iso_make_units_explicit <- function(df, prefix = " [", suffix = "]") { if(!is.data.frame(df)) stop("can only make units explicit in data frames", call. = FALSE) @@ -109,49 +109,49 @@ iso_make_units_explicit <- function(df, prefix = " [", suffix = "]") { } #' Make units implicit -#' +#' #' This function is intended for data frames /tibbles only and tries to figure out which numeric columns have units in the column names and makes those units implicit using \code{\link{iso_double_with_units}}. The reverse function is \code{\link{iso_make_units_explicit}}. -#' @param df the data frame in which to make the units implicit/eplicit +#' @param df the data frame in which to make the units implicit/explicit #' @inheritParams iso_make_units_explicit -#' @examples +#' @examples #' # generate implicit units #' df <- tibble(peak = 1:5, `height [V]` = 1:5) -#' iso_make_units_implicit(df) -#' +#' iso_make_units_implicit(df) +#' #' # convert back and forth -#' iso_make_units_implicit(df) %>% iso_make_units_explicit() -#' +#' iso_make_units_implicit(df) %>% iso_make_units_explicit() +#' #' # implicit units from custom prefix & suffix #' df <- tibble(peak = 1:5, height.V = 1:5) -#' iso_make_units_implicit(df, prefix = ".", suffix = "") +#' iso_make_units_implicit(df, prefix = ".", suffix = "") #' @family functions for values with units #' @export iso_make_units_implicit <- function(df, prefix = " [", suffix = "]") { if(!is.data.frame(df)) stop("can only make units implicit in data frames", call. = FALSE) if(nchar(prefix) == 0) stop("prefix must be at least 1 character", call. = FALSE) col_names <- names(df) - # find pattern keeping in mind that prefix and suffix could be a random set of + # find pattern keeping in mind that prefix and suffix could be a random set of # characters so we don't just want to stick them into a regexp and use fixed instead if (nchar(suffix) > 0) ends_with_suffix <- stringr::str_ends(col_names, fixed(suffix)) - else + else ends_with_suffix <- rep(TRUE, length(col_names)) col_names <- stringr::str_sub(col_names, 1L, -1L - ends_with_suffix * nchar(suffix)) prefix <- stringr::str_locate_all(col_names, fixed(prefix)) prefix_start <- prefix %>% purrr::map(~.x[,1]) %>% purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) }) prefix_end <- prefix %>% purrr::map(~.x[,2]) %>% purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) }) has_units <- ends_with_suffix & !is.na(prefix_end) - + # update units units <- stringr::str_sub(col_names[has_units], prefix_end[has_units] + 1L) df[has_units] <- map2(df[has_units], units, ~iso_double_with_units(.x, units = .y)) - + # update column names col_names <- stringr::str_sub(col_names, 1L, prefix_start - 1L) new_col_names <- names(df) new_col_names[has_units] <- col_names[has_units] names(df) <- new_col_names - + return(df) } @@ -161,7 +161,7 @@ check_units_identical <- function(x, y, warn_if_not = FALSE) { if (!check && warn_if_not) { glue::glue( "don't know how to reconcile different units '{iso_get_units(x)}' and ", - "'{iso_get_units(y)}', converting to double without units to continue") %>% + "'{iso_get_units(y)}', converting to double without units to continue") %>% warning(call. = FALSE, immediate. = TRUE) } return(check) @@ -236,7 +236,7 @@ vec_cast.iso_double_with_units.iso_double_with_units <- function(x, to, ...) { } else { # convert to a double without units return(vctrs::vec_data(x)) - } + } } # combining a double with units with a double without units yields a double without units @@ -375,7 +375,7 @@ vec_arith.numeric.iso_double_with_units <- function(op, x, y, ...) { #' @method vec_arith.iso_double_with_units MISSING #' @export vec_arith.iso_double_with_units.MISSING <- function(op, x, y, ...) { - switch(op, + switch(op, `-` = x * -1, `+` = x, vctrs::stop_incompatible_op(op, x, y) @@ -387,9 +387,9 @@ vec_arith.iso_double_with_units.MISSING <- function(op, x, y, ...) { # convert data frame globas units attr to implicit units using iso_double_with_unit # if there is no data frame units attribute, returns df convert_df_units_attr_to_implicit_units <- function(df) { - + if (!is.data.frame(df) || length(df) == 0) return(df) - + units <- attr(df, "units") if (is.null(units) || !is.data.frame(units) || !all(c("column", "units") %in% names(units))) { @@ -397,39 +397,39 @@ convert_df_units_attr_to_implicit_units <- function(df) { attr(df, "units") <- NULL return(df) } - + # process units - units <- units %>% + units <- units %>% # find out which columns are numeric dplyr::left_join( purrr::map_lgl(df, is.numeric) %>% tibble::enframe("column", "numeric"), by = "column" - ) %>% + ) %>% filter(nchar(units) > 0) - + # info check if (nrow(problematic <- filter(units, !numeric)) > 0) { glue::glue("encountered non-numeric data table columns with units: ", "{paste(problematic$units, collapse = ', ')}. Only numeric column ", - "units can be preserved.") %>% + "units can be preserved.") %>% warning(immediate. = TRUE, call. = FALSE) } - + # convert columns into double_with_units - units <- dplyr::filter(units, numeric) %>% + units <- dplyr::filter(units, numeric) %>% dplyr::mutate(units = stringr::str_remove(units, "^\\[") %>% stringr::str_remove("\\]$")) - + # construct the conversion quos - unit_quos <- - with(units, - purrr::map2(column, units, - ~quo(iso_double_with_units(!!sym(.x), units = !!.y))) %>% + unit_quos <- + with(units, + purrr::map2(column, units, + ~quo(iso_double_with_units(!!sym(.x), units = !!.y))) %>% rlang::set_names(column)) - + # convert the units df <- dplyr::mutate(df, !!!unit_quos) attr(df, "units") <- NULL - + return(df) } @@ -438,7 +438,7 @@ convert_df_units_attr_to_implicit_units <- function(df) { #' Format values #' #' Convenience function to easily format and concatenate text and numeric values. Can be used with any test and number data. Automatically detects \code{\link{iso_with_units}} values and incorporates the units into the formatting. -#' +#' #' @param ... variable names with data. Must have the same dimensions if multiple are supplied. Can be named to rename variable name output. Will include units in output for all \link{iso_with_units}. #' @param signif number of significant digits for numbered data #' @param format_names how to format the variable names, set to \code{NULL} to remove names @@ -456,15 +456,15 @@ iso_format <- function(..., signif = 3, format_names = "%s: ", format_units="%s" vars <- rlang::enquos(...) has_name <- nchar(names(vars)) > 0 names(vars)[!has_name] <- map_chr(vars[!has_name], rlang::as_label) - + # evaluate variables vars <- purrr::map(vars, rlang::eval_tidy) - + # check length vars_size <- purrr::map_int(vars, length) if (!all(vars_size == vars_size[1])) stop("iso_format encountered variables with unequal lengths", call. = FALSE) - + # format data values <- purrr::map2(vars, names(vars), ~{ value <- @@ -476,12 +476,10 @@ iso_format <- function(..., signif = 3, format_names = "%s: ", format_units="%s" if (!is.null(format_names)) value <- paste0(sprintf(format_names, .y), value) value }) - + # full text return( do.call(paste, args = c(values, list(sep = sep))) %>% stringr::str_replace_all(fixed("permil"), "\u2030") ) } - - diff --git a/R/utils.R b/R/utils.R index b66c6ff7..ddf49962 100644 --- a/R/utils.R +++ b/R/utils.R @@ -382,7 +382,7 @@ unlist_paths <- function(path_list) { #' Expand file paths #' -#' Helper function to expand the provided paths to find data files in folders and subfolders that match any of the specified extensions. Filepaths will be kept as is, only folders will be expanded. Note that this function is rarely called directly. It is used automatically by \code{\link{iso_read_dual_inlet}} and \code{\link{iso_read_continuous_flow}} to identify fiels of interest based on the file paths provided. +#' Helper function to expand the provided paths to find data files in folders and subfolders that match any of the specified extensions. Filepaths will be kept as is, only folders will be expanded. Note that this function is rarely called directly. It is used automatically by \code{\link{iso_read_dual_inlet}} and \code{\link{iso_read_continuous_flow}} to identify files of interest based on the file paths provided. #' #' @param path vector of file/folder paths, mixed relative and absolute paths are allowed. #' @param extensions which extensions to look for? (with or without leading .) - this is typically one or more of the extensions listed by \code{\link{iso_get_supported_file_types}} @@ -465,7 +465,7 @@ iso_root_paths <- function(path, root = ".", check_existence = TRUE) { #' Shorten relative paths #' -#' Convenience function to shorten relative paths based on overlap with the provided root(s). Also simplifies current directory repeats (e.g. "././." becomes ".") for better legiblity. Does not check whether the original or resulting paths point to valid files or folders. Relative paths that do not start with the supplied \code{root} default back to the current working directory (\code{.}). Absolute paths are allowed but are returned as is without attempts at shortening. See \code{iso_find_absolute_path_roots} for rooting absolute paths. +#' Convenience function to shorten relative paths based on overlap with the provided root(s). Also simplifies current directory repeats (e.g. "././." becomes ".") for better legibility. Does not check whether the original or resulting paths point to valid files or folders. Relative paths that do not start with the supplied \code{root} default back to the current working directory (\code{.}). Absolute paths are allowed but are returned as is without attempts at shortening. See \code{iso_find_absolute_path_roots} for rooting absolute paths. #' #' @inheritParams iso_expand_paths #' @return a data frame with the root directories and paths relative to the root - order of input paths is preserved @@ -535,7 +535,7 @@ iso_shorten_relative_paths <- function(path, root = ".") { #' Find roots for absolute paths #' -#' Helper function to find the roots of absolute paths. Tries to put absolute paths into the context of the relative root. For those that this is not possible (because they are not in fact a sub-path of the relative roots), identifies the greatest common denominator for absolute paths as their root. Does not change relative paths but does check wheter they do exist if \code{check_existence = TRUE} (the default). To modify relative paths, use \link{iso_shorten_relative_paths} prior to calling this function. +#' Helper function to find the roots of absolute paths. Tries to put absolute paths into the context of the relative root. For those that this is not possible (because they are not in fact a sub-path of the relative roots), identifies the greatest common denominator for absolute paths as their root. Does not change relative paths but does check whether they do exist if \code{check_existence = TRUE} (the default). To modify relative paths, use \link{iso_shorten_relative_paths} prior to calling this function. #' @inheritParams iso_expand_paths #' @param check_existence whether to check for the existence of the paths #' @return a data frame with the root directories and paths relative to the root - order of input paths is preserved diff --git a/R/zzz.R b/R/zzz.R index fe8fff46..269d3125 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -16,7 +16,7 @@ initialize_options <- function() { isoreader.file_readers = NULL ) options(default_options) - + # set temporary options used during file processing temp_options <- list( isoreader_temp.parallel_process = NA_integer_, @@ -25,7 +25,7 @@ initialize_options <- function() { isoreader_temp.progress_bar = NULL ) options(temp_options) - + # register file readers iso_register_dual_inlet_file_reader(".did", "iso_read_did", "Dual Inlet file format (newer)", "Isodat", env = "isoreader") iso_register_dual_inlet_file_reader(".caf", "iso_read_caf", "Dual Inlet file format (older)", "Isodat", env = "isoreader") @@ -34,7 +34,7 @@ initialize_options <- function() { iso_register_dual_inlet_file_reader(".di.rds", "iso_read_rds", "R Data Storage", "isoreader", cacheable = FALSE, post_read_check = FALSE, env = "isoreader") iso_register_continuous_flow_file_reader(".cf", "iso_read_cf", "Continuous Flow file format (older)", "Isodat", env = "isoreader") iso_register_continuous_flow_file_reader(".dxf", "iso_read_dxf", "Continuous Flow file format (newer)", "Isodat", env = "isoreader") - iso_register_continuous_flow_file_reader(".iarc", "iso_read_flow_iarc", "Continous Flow data archieve", "ionOS", env = "isoreader") + iso_register_continuous_flow_file_reader(".iarc", "iso_read_flow_iarc", "Continuous Flow data archive", "ionOS", env = "isoreader") iso_register_continuous_flow_file_reader(".cf.rda", "iso_read_rda", "R Data Archive (deprecated)", "isoreader", cacheable = FALSE, env = "isoreader") iso_register_continuous_flow_file_reader(".cf.rds", "iso_read_rds", "R Data Storage", "isoreader", cacheable = FALSE, post_read_check = FALSE, env = "isoreader") iso_register_scan_file_reader(".scn", "iso_read_scn", "Scan file format", "Isodat", env = "isoreader") diff --git a/README.Rmd b/README.Rmd index 9ff81847..cc3439b6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -14,20 +14,20 @@ knitr::opts_chunk$set( version <- as.character(packageVersion("isoreader")) ``` -# isoreader +# isoreader [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/isoreader)](https://cran.r-project.org/package=isoreader) [![Git_Hub_Version](https://img.shields.io/badge/GitHub-`r version`-orange.svg?style=flat-square)](https://github.com/isoverse/isoreader/commits) -[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](http://isoreader.isoverse.org/) +[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](https://isoreader.isoverse.org/) [![R build status](https://github.com/isoverse/isoreader/workflows/R-CMD-check/badge.svg)](https://github.com/isoverse/isoreader/actions?workflow=R-CMD-check) [![Binder](https://img.shields.io/badge/launch-RStudio-blue.svg)](https://mybinder.org/v2/gh/isoverse/isoreader/binder?urlpath=rstudio) [![Binder](https://img.shields.io/badge/launch-Jupyter-orange.svg)](https://mybinder.org/v2/gh/isoverse/isoreader/binder?urlpath=lab) ## About -This package is intended as a unified one-stop command line interface to all common IRMS (isotope ratio mass spectrometry) file formats used in stable isotope geochemistry. It is an extension and highly stream-lined re-implemention of the proof-of-concept [isoread](https://github.com/sebkopf/isoread) package and is designed to fit into a larger framework of IRMS data tools that includes the web-based graphical user interface package [isoviewer](https://github.com/isoverse/isoviewer) and the data processing and visualization pipeline [isoprocessor](https://github.com/isoverse/isoprocessor). +This package is intended as a unified one-stop command line interface to all common IRMS (isotope ratio mass spectrometry) file formats used in stable isotope geochemistry. It is an extension and highly stream-lined re-implementation of the proof-of-concept [isoread](https://github.com/sebkopf/isoread) package and is designed to fit into a larger framework of IRMS data tools that includes the web-based graphical user interface package [isoviewer](https://github.com/isoverse/isoviewer) and the data processing and visualization pipeline [isoprocessor](https://github.com/isoverse/isoprocessor). -[isoreader](http://isoreader.isoverse.org/) enables the reading and processing of stable isotope data directly from the data files and thus provides a tool for platform-independent (Windows, Mac, Linux), efficient and reproducible data reduction. Although implemented in R, it can be used in both RMarkdown as well as Jupyter data processing notebooks and also provides functionality for easy export to Python using the shared R/Python feather file format. At present, it can read most Thermo dual inlet (.did, .caf) and continuous flow (.dxf, .cf) data files as well as Elementar continuous flow data archives (.iarc) with additional extensions for other file formats in the works. Due to the dynamic implementation and design based on the popular [tidyverse](https://www.tidyverse.org/) style of R programming, isoreader is easily extendable, takes care of error catching to avoid pipeline breaks due to problems encountered in source data files (modeled after [readr](https://readr.tidyverse.org/)) and works great with [tidyverse](https://www.tidyverse.org/) packages such as [tidyr](https://tidyr.tidyverse.org/), [dplyr](https://dplyr.tidyverse.org/) and [ggplot](https://ggplot2.tidyverse.org/). +[isoreader](https://isoreader.isoverse.org/) enables the reading and processing of stable isotope data directly from the data files and thus provides a tool for platform-independent (Windows, Mac, Linux), efficient and reproducible data reduction. Although implemented in R, it can be used in both RMarkdown as well as Jupyter data processing notebooks and also provides functionality for easy export to Python using the shared R/Python feather file format. At present, it can read most Thermo dual inlet (.did, .caf) and continuous flow (.dxf, .cf) data files as well as Elementar continuous flow data archives (.iarc) with additional extensions for other file formats in the works. Due to the dynamic implementation and design based on the popular [tidyverse](https://www.tidyverse.org/) style of R programming, isoreader is easily extendable, takes care of error catching to avoid pipeline breaks due to problems encountered in source data files (modeled after [readr](https://readr.tidyverse.org/)) and works great with [tidyverse](https://www.tidyverse.org/) packages such as [tidyr](https://tidyr.tidyverse.org/), [dplyr](https://dplyr.tidyverse.org/) and [ggplot](https://ggplot2.tidyverse.org/). ## Installation @@ -35,7 +35,7 @@ You can install isoreader from github with the devtools package (version > 1.13. ```{r gh-installation, eval = FALSE} # installs the development tools package if not yet installed -if(!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") +if(!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") # install.packages("devtools") # only needed once devtools::install_github("isoverse/isoreader") @@ -49,27 +49,27 @@ Currently supported file types: ```{r, echo=FALSE, warning=FALSE, message=FALSE} library(isoreader) -iso_get_supported_file_types() %>% - dplyr::select(extension, software, description, type) %>% +iso_get_supported_file_types() %>% + dplyr::select(extension, software, description, type) %>% knitr::kable() ``` - - for a full reference of all available functions, see the **[Function Reference](http://isoreader.isoverse.org/reference/)** - - for an example of how to work with continuos flow data files, see the vignette on **[Continuous Flow](http://isoreader.isoverse.org/articles/continuous_flow.html)** - - for an example of how to work with dual inlet data files, see the vignette on **[Dual Inlet](http://isoreader.isoverse.org/articles/dual_inlet.html)** + - for a full reference of all available functions, see the **[Function Reference](https://isoreader.isoverse.org/reference/)** + - for an example of how to work with continuous flow data files, see the vignette on **[Continuous Flow](https://isoreader.isoverse.org/articles/continuous_flow.html)** + - for an example of how to work with dual inlet data files, see the vignette on **[Dual Inlet](https://isoreader.isoverse.org/articles/dual_inlet.html)** ## Troubleshooting -If you run into a file format that is not currently supported or any issues with supported formats, please file a request/bug report in the [issue tracker](https://github.com/isoverse/isoreader/issues). Likewise if you run into any unexpected behaviour or uncaught errors. Most isoreader functionality is continuously tested on Unix and Windows systems using [Travis](https://travis-ci.org/) and [AppVeyor](https://ci.appveyor.com/), respectively. This makes it possible to ensure proper functionality and catch issues quickly, however, sometimes something slips through or is not yet automatically tested. We try to make sure to fix such errors as soon as possible but ask for patience due to the small develoment team. If you have the skills and are willing to fix problems yourself, that's great, please take a look at the development section below. +If you run into a file format that is not currently supported or any issues with supported formats, please file a request/bug report in the [issue tracker](https://github.com/isoverse/isoreader/issues). Likewise if you run into any unexpected behavior or uncaught errors. Most isoreader functionality is continuously tested on Unix and Windows systems using [Travis](https://travis-ci.org/) and [AppVeyor](https://ci.appveyor.com/), respectively. This makes it possible to ensure proper functionality and catch issues quickly, however, sometimes something slips through or is not yet automatically tested. We try to make sure to fix such errors as soon as possible but ask for patience due to the small development team. If you have the skills and are willing to fix problems yourself, that's great, please take a look at the development section below. ## Development -If you are interested in helping with development, that's fantastic! Please fork the repository and branch off from the [dev branch](https://github.com/isoverse/isoreader/tree/dev) since it contains the most up-to-date development version of [isoreader](http://isoreader.isoverse.org/). Make sure to write [```testthat``` tests](http://r-pkgs.had.co.nz/tests.html) for your work (stored in the tests/testthat directory). All tests can be run automatically and continuously during development to make it easier to spot any code problems on the go. The easiest way to run them is by running ```make auto_test``` in the [isoreader](http://isoreader.isoverse.org/) directory from command line (it will test everything automatically in a completely separate R session). +If you are interested in helping with development, that's fantastic! Please fork the repository and branch off from the [dev branch](https://github.com/isoverse/isoreader/tree/dev) since it contains the most up-to-date development version of [isoreader](https://isoreader.isoverse.org/). Make sure to write [```testthat``` tests](http://r-pkgs.had.co.nz/tests.html) for your work (stored in the tests/testthat directory). All tests can be run automatically and continuously during development to make it easier to spot any code problems on the go. The easiest way to run them is by running ```make auto_test``` in the [isoreader](https://isoreader.isoverse.org/) directory from command line (it will test everything automatically in a completely separate R session). ## Open Source -[isoreader](http://isoreader.isoverse.org/) is and will always be fully open-source (i.e. free as in 'freedom' and free as in 'free beer') and is provided as is. The source code is released under GPL-2. +[isoreader](https://isoreader.isoverse.org/) is and will always be fully open-source (i.e. free as in *freedom* and free as in *free beer*) and is provided as is. The source code is released under GPL-2. -## isoverse +## isoverse This package is part of the isoverse suite of data tools for stable isotopes. If you like the functionality that isoverse packages provide to the geochemical community, please help us spread the word and include an isoverse or individual package logo on one of your posters or slides. All logos are posted in high resolution in [this repository](https://github.com/isoverse/logos). diff --git a/README.md b/README.md index ccb435d6..9f40e22e 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -# isoreader +# isoreader [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/isoreader)](https://cran.r-project.org/package=isoreader) -[![Git\_Hub\_Version](https://img.shields.io/badge/GitHub-1.2.2-orange.svg?style=flat-square)](https://github.com/isoverse/isoreader/commits) -[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](http://isoreader.isoverse.org/) +[![Git\_Hub\_Version](https://img.shields.io/badge/GitHub-1.2.3-orange.svg?style=flat-square)](https://github.com/isoverse/isoreader/commits) +[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](https://isoreader.isoverse.org/) [![R build status](https://github.com/isoverse/isoreader/workflows/R-CMD-check/badge.svg)](https://github.com/isoverse/isoreader/actions?workflow=R-CMD-check) [![Binder](https://img.shields.io/badge/launch-RStudio-blue.svg)](https://mybinder.org/v2/gh/isoverse/isoreader/binder?urlpath=rstudio) @@ -16,7 +16,7 @@ status](https://github.com/isoverse/isoreader/workflows/R-CMD-check/badge.svg)]( This package is intended as a unified one-stop command line interface to all common IRMS (isotope ratio mass spectrometry) file formats used in stable isotope geochemistry. It is an extension and highly stream-lined -re-implemention of the proof-of-concept +re-implementation of the proof-of-concept [isoread](https://github.com/sebkopf/isoread) package and is designed to fit into a larger framework of IRMS data tools that includes the web-based graphical user interface package @@ -24,7 +24,7 @@ web-based graphical user interface package processing and visualization pipeline [isoprocessor](https://github.com/isoverse/isoprocessor). -[isoreader](http://isoreader.isoverse.org/) enables the reading and +[isoreader](https://isoreader.isoverse.org/) enables the reading and processing of stable isotope data directly from the data files and thus provides a tool for platform-independent (Windows, Mac, Linux), efficient and reproducible data reduction. Although implemented in R, it @@ -51,7 +51,7 @@ You can install isoreader from github with the devtools package (version ``` r # installs the development tools package if not yet installed -if(!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") +if(!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") # install.packages("devtools") # only needed once devtools::install_github("isoverse/isoreader") @@ -76,34 +76,34 @@ Currently supported file types: | .di.rds | isoreader | R Data Storage | dual inlet | | .cf | Isodat | Continuous Flow file format (older) | continuous flow | | .dxf | Isodat | Continuous Flow file format (newer) | continuous flow | -| .iarc | ionOS | Continous Flow data archieve | continuous flow | +| .iarc | ionOS | Continuous Flow data archive | continuous flow | | .cf.rda | isoreader | R Data Archive (deprecated) | continuous flow | | .cf.rds | isoreader | R Data Storage | continuous flow | | .scn | Isodat | Scan file format | scan | | .scan.rds | isoreader | R Data Storage | scan | - for a full reference of all available functions, see the **[Function - Reference](http://isoreader.isoverse.org/reference/)** - - for an example of how to work with continuos flow data files, see + Reference](https://isoreader.isoverse.org/reference/)** + - for an example of how to work with continuous flow data files, see the vignette on **[Continuous - Flow](http://isoreader.isoverse.org/articles/continuous_flow.html)** + Flow](https://isoreader.isoverse.org/articles/continuous_flow.html)** - for an example of how to work with dual inlet data files, see the vignette on **[Dual - Inlet](http://isoreader.isoverse.org/articles/dual_inlet.html)** + Inlet](https://isoreader.isoverse.org/articles/dual_inlet.html)** ## Troubleshooting If you run into a file format that is not currently supported or any issues with supported formats, please file a request/bug report in the [issue tracker](https://github.com/isoverse/isoreader/issues). Likewise -if you run into any unexpected behaviour or uncaught errors. Most +if you run into any unexpected behavior or uncaught errors. Most isoreader functionality is continuously tested on Unix and Windows systems using [Travis](https://travis-ci.org/) and [AppVeyor](https://ci.appveyor.com/), respectively. This makes it possible to ensure proper functionality and catch issues quickly, however, sometimes something slips through or is not yet automatically tested. We try to make sure to fix such errors as soon as possible but -ask for patience due to the small develoment team. If you have the +ask for patience due to the small development team. If you have the skills and are willing to fix problems yourself, that’s great, please take a look at the development section below. @@ -113,23 +113,23 @@ If you are interested in helping with development, that’s fantastic\! Please fork the repository and branch off from the [dev branch](https://github.com/isoverse/isoreader/tree/dev) since it contains the most up-to-date development version of -[isoreader](http://isoreader.isoverse.org/). Make sure to write +[isoreader](https://isoreader.isoverse.org/). Make sure to write [`testthat` tests](http://r-pkgs.had.co.nz/tests.html) for your work (stored in the tests/testthat directory). All tests can be run automatically and continuously during development to make it easier to spot any code problems on the go. The easiest way to run them is by running `make auto_test` in the -[isoreader](http://isoreader.isoverse.org/) directory from command line +[isoreader](https://isoreader.isoverse.org/) directory from command line (it will test everything automatically in a completely separate R session). ## Open Source -[isoreader](http://isoreader.isoverse.org/) is and will always be fully -open-source (i.e. free as in ‘freedom’ and free as in ‘free beer’) and +[isoreader](https://isoreader.isoverse.org/) is and will always be fully +open-source (i.e. free as in *freedom* and free as in *free beer*) and is provided as is. The source code is released under GPL-2. -## isoverse +## isoverse This package is part of the isoverse suite of data tools for stable isotopes. If you like the functionality that isoverse packages provide diff --git a/_pkgdown.yml b/_pkgdown.yml index d6395fb3..5cf49fe9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,5 +1,5 @@ title: Isoreader -url: http://isoreader.isoverse.org/ +url: https://isoreader.isoverse.org/ template: params: bootswatch: simplex @@ -115,6 +115,7 @@ reference: desc: > contents: + - iso_problem_functions - iso_has_problems - iso_get_problems_summary - iso_get_problems @@ -140,13 +141,23 @@ reference: - iso_is_continuous_flow - iso_is_dual_inlet - iso_debug_mode + - read_iso_file + - reread_iso_files + - set_temp - map_binary_structure - print.binary_structure_map - print.iso_file_list - print.iso_file + - vec_arith.iso_double_with_units + - vec_cast.iso_double_with_units + - vec_ptype2.iso_double_with_units - title: Moved to isoprocessor or deprecated contents: + - iso_get_data + - iso_get_resistors_info + - iso_get_standards_info + - iso_omit_files_with_problems - iso_calculate_ratios - iso_convert_signals - iso_convert_time diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 00000000..e89e7fd1 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,26 @@ +This is a new package. + +## Test environments + +* Local OS X install, R 4.0.2 +* Mac OS X 10.15.6 (on GitHub), R 4.0.2 (release), R 4.1.0 (devel) +* Ubuntu 16.04 (on GitHub), R 4.0.2 (release) +* Windows Server 2019 (on GitHub), R 4.0.2 (release) +* Win-builder (release and devel) + +## R CMD check results + +There were no ERRORs or WARNINGs. + +There was 1 NOTE: + +> checking installed package size ... NOTE + installed size is 5.1Mb + sub-directories of 1Mb or more: + extdata 3.4Mb + +This package provides an interface to various file formats commonly used in the scientific field of isotope geochemistry. The `extdata` folder holds 12 example files for 8 different file formats adding up to 3.4Mb. These files are used in the vignettes and function examples and are included to make it easier for users of this package to explore its functionality. + +## Downstream dependencies + +There are currently no downstream dependencies for this package. diff --git a/man/extract_data.Rd b/man/extract_data.Rd index 804a5376..7fb540b8 100644 --- a/man/extract_data.Rd +++ b/man/extract_data.Rd @@ -4,24 +4,24 @@ \alias{extract_data} \title{Overview of text data extraction functions} \description{ -The following functions are intened to make it easy to extract relevant information from textual data. -These functions are primarily intended for use in \code{\link{iso_mutate_file_info}} and inside the filtering conditions passed to \code{\link{iso_filter_files}}. However, they can of course also be used stand-alone and in regular \code{\link[dplyr]{mutate}} or \code{\link[dplyr]{filter}} calls on the data frames returned by the data retrievel functions (\code{\link{iso_get_raw_data}}, \code{\link{iso_get_file_info}}, \code{\link{iso_get_vendor_data_table}}, etc.). Not that all the \code{parse_} functions are used in \code{\link{iso_parse_file_info}} for easy type conversions. +The following functions are intended to make it easy to extract relevant information from textual data. +These functions are primarily intended for use in \code{\link{iso_mutate_file_info}} and inside the filtering conditions passed to \code{\link{iso_filter_files}}. However, they can of course also be used stand-alone and in regular \code{\link[dplyr]{mutate}} or \code{\link[dplyr]{filter}} calls on the data frames returned by the data retrieval functions (\code{\link{iso_get_raw_data}}, \code{\link{iso_get_file_info}}, \code{\link{iso_get_vendor_data_table}}, etc.). Not that all the \code{parse_} functions are used in \code{\link{iso_parse_file_info}} for easy type conversions. } \details{ For simultaneous extraction of pure text data into multiple columns, please see the \code{\link[tidyr]{extract}} function from the \link{tidyr} package. \itemize{ -\item \code{\link{extract_substring}} is a generic convience function to extract parts of textual data (based on regular expression matches). +\item \code{\link{extract_substring}} is a generic convenience function to extract parts of textual data (based on regular expression matches). Can be used in combination with the parsing functions to turn extracted substrings into numerical or logical data. -\item \code{\link{extract_word}} is a more specific convenience function to extract the 1st/2nd/3rd word from textual data. +\item \code{\link{extract_word}} is a more specific convenience function to extract the 1st/2nd/3rd word from textual data. -\item \code{\link[readr:parse_atomic]{parse_number}} is a convenience function to extract a number even if it is surrouded by text (re-exported from the \link{readr} package). +\item \code{\link[readr:parse_atomic]{parse_number}} is a convenience function to extract a number even if it is surrounded by text (re-exported from the \link{readr} package). -\item \code{\link[readr:parse_atomic]{parse_double}} parses text that holds double (decimal) numerical values without any extraneous text around - +\item \code{\link[readr:parse_atomic]{parse_double}} parses text that holds double (decimal) numerical values without any extraneous text around - use \code{\link[readr:parse_atomic]{parse_number}} instead if this is not the case (re-exported from the \link{readr} package) -\item \code{\link[readr:parse_atomic]{parse_integer}} parses text that holds integer (whole number) numerical values without any extraneous text around - +\item \code{\link[readr:parse_atomic]{parse_integer}} parses text that holds integer (whole number) numerical values without any extraneous text around - use \code{\link[readr:parse_atomic]{parse_number}} instead if this is not the case (re-exported from the \link{readr} package) \item \code{\link[readr:parse_atomic]{parse_logical}} parses text that holds logical (boolean, i.e. TRUE/FALSE) values (re-exported from the \link{readr} package) diff --git a/man/extract_substring.Rd b/man/extract_substring.Rd index b9135095..ad204aab 100644 --- a/man/extract_substring.Rd +++ b/man/extract_substring.Rd @@ -19,7 +19,7 @@ extract_substring( \item{capture_n}{within each string, which match of the \code{pattern} should be extracted? e.g. if the pattern searches for words, should the first, second or third word be captured?} -\item{capture_bracket}{for the captured match, which capture group should be extracted? i.e. which parentheses-enclosed segment of the \code{pattern}? +\item{capture_bracket}{for the captured match, which capture group should be extracted? i.e. which parentheses-enclosed segment of the \code{pattern}? by default captures the whole pattern (\code{capture_bracket = 0}).} \item{missing}{what to replace missing values with? Note that values can be missing because there are not enough captured matches or because the actual capture_bracket is empty.} @@ -28,7 +28,7 @@ by default captures the whole pattern (\code{capture_bracket = 0}).} character vector of same length as \code{string} with the extracted substrings } \description{ -This is a convenience function to capture substrings from textual data. +This is a convenience function to capture substrings from textual data. Uses \code{\link[stringr:str_match]{str_match_all}} internally but instead of returning everything, always returns only one single part of the match, depending on parameters \code{capture_n} and \code{capture_group}. } \seealso{ diff --git a/man/extract_word.Rd b/man/extract_word.Rd index 6ebf931a..9b6f539b 100644 --- a/man/extract_word.Rd +++ b/man/extract_word.Rd @@ -33,11 +33,11 @@ extract_word( \item{missing}{what to replace missing values with? Note that values can be missing because there are not enough captured matches or because the actual capture_bracket is empty.} } \description{ -This extracts words from text, by default looks for continuous sequences of numbers and/or letters. +This extracts words from text, by default looks for continuous sequences of numbers and/or letters. Can adjust whether characters such as "_", "-", " ", and "." should be counted as part of a word or separate them and whether numbers should be included. } \examples{ -x_text <- extract_word(c("sample number16.2", "sample number7b"), +x_text <- extract_word(c("sample number16.2", "sample number7b"), capture_n = 2, include_colon = TRUE) # "number16.2" "number7b" x_num <- parse_number(x_text) diff --git a/man/file_readers.Rd b/man/file_readers.Rd index 0c693e58..21cdeb3c 100644 --- a/man/file_readers.Rd +++ b/man/file_readers.Rd @@ -46,7 +46,7 @@ iso_register_scan_file_reader( \item{description}{what is this file type about?} -\item{software}{what is the software program that creates this filetype?} +\item{software}{what is the software program that creates this file type?} \item{cacheable}{whether this file type is cacheable. If \code{TRUE} (the default), user requests to cache the file will be honored. If \code{FALSE}, this file type will never be cached no matter what the user requests.} diff --git a/man/iso_add_file_info.Rd b/man/iso_add_file_info.Rd index 6e0b1e27..7e0af117 100644 --- a/man/iso_add_file_info.Rd +++ b/man/iso_add_file_info.Rd @@ -17,7 +17,7 @@ iso_add_file_info(...) \item{new_file_info}{data frame with new file information to add to the isofiles} -\item{...}{each parameter specifies a set of \code{join_by} column(s) to add the \code{new_file_info} to the existing file information. The provided paramters are applied sequentially. At least one must be specified.} +\item{...}{each parameter specifies a set of \code{join_by} column(s) to add the \code{new_file_info} to the existing file information. The provided parameters are applied sequentially. At least one must be specified.} \item{quiet}{whether to display (quiet=FALSE) or silence (quiet = TRUE) information messages. Set parameter to overwrite global defaults for this function or set global defaults with calls to \link[=iso_info_messages]{iso_turn_info_message_on} and \link[=iso_info_messages]{iso_turn_info_message_off}} @@ -30,7 +30,7 @@ the original iso files or data frame with the new file info added in. This function makes it easy to add additional file info (\code{\link{iso_get_file_info}}) to isofile objects and data frames by a single \code{\link[dplyr:mutate-joins]{left_join}} or multiple sequential \code{\link[dplyr:mutate-joins]{left_join}} operations. The function provides a detailed summary of the information that was added unless \code{quiet = TRUE}. Note that one-to-many joins are not permitted (and will fail with an informative error) since this would lead to likely unintended data duplication in the isofiles. However, one-to-one and many-to-one joins are fully supported and should cover all needed use cases for this function. Also note that for each join, only the \code{new_file_info} rows that have defined non-NA, non-empty ("") values in all \code{join_by} columns will be considered for the join and that only \code{new_file_info} columns that do NOT already exist in ANY file information will be added. For changing the values of existing file information, please use \code{\link{iso_mutate_file_info}} instead. } \details{ -Single \code{\link[dplyr:mutate-joins]{left_join}}: this is the most common use of this function and basically a simple left join operation (with some additional safety checks). Specify a single \code{join_by} in the \code{...}, such as e.g. \code{c("file_id")} to add additional file information joining by the \code{file_id} column. +Single \code{\link[dplyr:mutate-joins]{left_join}}: this is the most common use of this function and basically a simple left join operation (with some additional safety checks). Specify a single \code{join_by} in the \code{...}, such as e.g. \code{c("file_id")} to add additional file information joining by the \code{file_id} column. Multiple sequential \code{\link[dplyr:mutate-joins]{left_join}}: this use case is for applying a set of increasingly more specific \code{join_by} rules. For example, \code{... = c("Identifier 1", "Identifier 2"), c("file_id")} would serve to first add one set of new file information for all isofiles based on their \code{Identifier 1} and \code{Identifier 2} columns and then overwrite the new information with more specific details for a subset of isofiles based on their \code{file_id} column, all based on a single overview \code{new_file_info} data frame. Basically, each set of \code{join_by} conditions specified in \code{...} must describe a valid \code{\link[dplyr:mutate-joins]{left_join}} \code{join_by} parameter to merge the \code{new_file_info} with the existing file info. Each set of \code{new_file_info} data can overwrite the previous \code{join_by} matches such that the last set of \code{join_by} column(s) provided in \code{...} will overwrite all previous matches for which it applies, even if they have already been a match for a previous column. } diff --git a/man/iso_expand_paths.Rd b/man/iso_expand_paths.Rd index 7ac57308..43148a33 100644 --- a/man/iso_expand_paths.Rd +++ b/man/iso_expand_paths.Rd @@ -17,7 +17,7 @@ iso_expand_paths(path, extensions = c(), root = ".") data frame with columns \code{root} (\code{root} as provided) and \code{path} of all the found files. } \description{ -Helper function to expand the provided paths to find data files in folders and subfolders that match any of the specified extensions. Filepaths will be kept as is, only folders will be expanded. Note that this function is rarely called directly. It is used automatically by \code{\link{iso_read_dual_inlet}} and \code{\link{iso_read_continuous_flow}} to identify fiels of interest based on the file paths provided. +Helper function to expand the provided paths to find data files in folders and subfolders that match any of the specified extensions. Filepaths will be kept as is, only folders will be expanded. Note that this function is rarely called directly. It is used automatically by \code{\link{iso_read_dual_inlet}} and \code{\link{iso_read_continuous_flow}} to identify files of interest based on the file paths provided. } \seealso{ Other file system functions: diff --git a/man/iso_export_to_excel.Rd b/man/iso_export_to_excel.Rd index 8bdfe7bc..d87a9000 100644 --- a/man/iso_export_to_excel.Rd +++ b/man/iso_export_to_excel.Rd @@ -40,7 +40,7 @@ iso_export_to_excel( \item{include_method_info}{deprecated in favor of the more specific include_standards and include_resistors} -\item{with_ratios}{deprecated, please use the \code{select} paramter to explicitly include or exclude ratio columns} +\item{with_ratios}{deprecated, please use the \code{select} parameter to explicitly include or exclude ratio columns} \item{quiet}{whether to display (quiet=FALSE) or silence (quiet = TRUE) information messages. Set parameter to overwrite global defaults for this function or set global defaults with calls to \link[=iso_info_messages]{iso_turn_info_message_on} and \link[=iso_info_messages]{iso_turn_info_message_off}} } @@ -48,7 +48,7 @@ iso_export_to_excel( returns the iso_files object invisibly for use in pipelines } \description{ -This function exports the passed in iso_files to Excel. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate tabs within the excel file. Use the various \code{include_...} parameters to specifiy what information to include. Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in the excel export. +This function exports the passed in iso_files to Excel. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate tabs within the excel file. Use the various \code{include_...} parameters to specify what information to include. Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in the excel export. } \seealso{ Other export functions: diff --git a/man/iso_find_absolute_path_roots.Rd b/man/iso_find_absolute_path_roots.Rd index dab2c558..d9aabef3 100644 --- a/man/iso_find_absolute_path_roots.Rd +++ b/man/iso_find_absolute_path_roots.Rd @@ -17,7 +17,7 @@ iso_find_absolute_path_roots(path, root = ".", check_existence = TRUE) a data frame with the root directories and paths relative to the root - order of input paths is preserved } \description{ -Helper function to find the roots of absolute paths. Tries to put absolute paths into the context of the relative root. For those that this is not possible (because they are not in fact a sub-path of the relative roots), identifies the greatest common denominator for absolute paths as their root. Does not change relative paths but does check wheter they do exist if \code{check_existence = TRUE} (the default). To modify relative paths, use \link{iso_shorten_relative_paths} prior to calling this function. +Helper function to find the roots of absolute paths. Tries to put absolute paths into the context of the relative root. For those that this is not possible (because they are not in fact a sub-path of the relative roots), identifies the greatest common denominator for absolute paths as their root. Does not change relative paths but does check whether they do exist if \code{check_existence = TRUE} (the default). To modify relative paths, use \link{iso_shorten_relative_paths} prior to calling this function. } \seealso{ Other file system functions: diff --git a/man/iso_get_all_data.Rd b/man/iso_get_all_data.Rd index 721367f7..cf081f7b 100644 --- a/man/iso_get_all_data.Rd +++ b/man/iso_get_all_data.Rd @@ -40,7 +40,7 @@ iso_get_all_data( \item{with_units}{this parameter has been DEPRECATED with the introduction of unit-data types (see \code{\link{iso_double_with_units}}) and will be removed in future versions of isoreader. Please use \code{with_explicit_units} instead if you really want columns to have units explicitly in the column name. Alternatively, consider working with the new implicit unit system and convert vendor data tables as needed with \code{\link{iso_make_units_explicit}} and \code{\link{iso_make_units_implicit}}.} -\item{with_ratios}{deprecated, please use the \code{select} paramter to explicitly include or exclude ratio columns} +\item{with_ratios}{deprecated, please use the \code{select} parameter to explicitly include or exclude ratio columns} \item{quiet}{whether to display (quiet=FALSE) or silence (quiet = TRUE) information messages. Set parameter to overwrite global defaults for this function or set global defaults with calls to \link[=iso_info_messages]{iso_turn_info_message_on} and \link[=iso_info_messages]{iso_turn_info_message_off}} } diff --git a/man/iso_get_file_info.Rd b/man/iso_get_file_info.Rd index f55fe193..dc0b1a25 100644 --- a/man/iso_get_file_info.Rd +++ b/man/iso_get_file_info.Rd @@ -27,7 +27,7 @@ iso_get_file_info( Combine file information from multiple iso_files. By default all information is included but specific columns can be targeted using the \code{select} parameter to select and/or rename columns. File information beyond \code{file_id}, \code{file_root}, \code{file_path}, \code{file_datetime} and \code{file_size} (in bytes) is only available if the \code{iso_files} were read with parameter \code{read_file_info=TRUE}. } \note{ -this function used to allow selecting/renaming different file_info_columns in different files to the same column. This was a significant speed impediment and only covered very rare use cases. It is still available in the related function \code{\link{iso_select_file_info}} with a special flag but is no longer the default and not incouraged for use in the frequently called \code{iso_get_file_info}. +this function used to allow selecting/renaming different file_info_columns in different files to the same column. This was a significant speed impediment and only covered very rare use cases. It is still available in the related function \code{\link{iso_select_file_info}} with a special flag but is no longer the default and not encouraged for use in the frequently called \code{iso_get_file_info}. } \seealso{ Other data retrieval functions: diff --git a/man/iso_get_resistors.Rd b/man/iso_get_resistors.Rd index a4186540..0dd00a0a 100644 --- a/man/iso_get_resistors.Rd +++ b/man/iso_get_resistors.Rd @@ -21,7 +21,7 @@ iso_get_resistors( \item{quiet}{whether to display (quiet=FALSE) or silence (quiet = TRUE) information messages. Set parameter to overwrite global defaults for this function or set global defaults with calls to \link[=iso_info_messages]{iso_turn_info_message_on} and \link[=iso_info_messages]{iso_turn_info_message_off}} } \description{ -Aggregates the resistor information recovered from the provided iso_files. This information is only available if the iso_files were read with parameter \code{read_method_info=TRUE} and only linked to specific masses if the iso_files were additionally read with parametr \code{read_raw_data=TRUE}. +Aggregates the resistor information recovered from the provided iso_files. This information is only available if the iso_files were read with parameter \code{read_method_info=TRUE} and only linked to specific masses if the iso_files were additionally read with parameter \code{read_raw_data=TRUE}. } \seealso{ Other data retrieval functions: diff --git a/man/iso_get_standards.Rd b/man/iso_get_standards.Rd index e2d8f208..591243ed 100644 --- a/man/iso_get_standards.Rd +++ b/man/iso_get_standards.Rd @@ -19,12 +19,12 @@ iso_get_standards( \item{include_file_info}{which file information to include (see \code{\link{iso_get_file_info}}). Use \code{c(...)} to select multiple, supports all \link[dplyr]{select} syntax including renaming columns.} -\item{with_ratios}{deprecated, please use the \code{select} paramter to explicitly include or exclude ratio columns} +\item{with_ratios}{deprecated, please use the \code{select} parameter to explicitly include or exclude ratio columns} \item{quiet}{whether to display (quiet=FALSE) or silence (quiet = TRUE) information messages. Set parameter to overwrite global defaults for this function or set global defaults with calls to \link[=iso_info_messages]{iso_turn_info_message_on} and \link[=iso_info_messages]{iso_turn_info_message_off}} } \description{ -Aggregates the isotopic standard information recovered from the provided iso_files. Can aggregate just the standards' delta values or combine the delta values with the recovered ratios (if any). Use paramter \code{select} to exclude/include the ratios. All standards info is only available if the iso_files were read with parameter \code{read_method_info=TRUE}. +Aggregates the isotopic standard information recovered from the provided iso_files. Can aggregate just the standards' delta values or combine the delta values with the recovered ratios (if any). Use parameter \code{select} to exclude/include the ratios. All standards info is only available if the iso_files were read with parameter \code{read_method_info=TRUE}. } \seealso{ Other data retrieval functions: diff --git a/man/iso_make_units_explicit.Rd b/man/iso_make_units_explicit.Rd index 102564d1..224b480c 100644 --- a/man/iso_make_units_explicit.Rd +++ b/man/iso_make_units_explicit.Rd @@ -19,13 +19,13 @@ This function is intended for data frames / tibbles only and makes the units of \examples{ # a data frame with implicit units df <- tibble(peak = 1:5, height = iso_double_with_units(1:5, "V")) -df +df # show with explicit units -iso_make_units_explicit(df) +iso_make_units_explicit(df) # show with explicit units (custom prefix & suffix) -iso_make_units_explicit(df, prefix = ".", suffix = "") +iso_make_units_explicit(df, prefix = ".", suffix = "") } \seealso{ Other functions for values with units: diff --git a/man/iso_make_units_implicit.Rd b/man/iso_make_units_implicit.Rd index 2f29c912..d4ef41b1 100644 --- a/man/iso_make_units_implicit.Rd +++ b/man/iso_make_units_implicit.Rd @@ -7,7 +7,7 @@ iso_make_units_implicit(df, prefix = " [", suffix = "]") } \arguments{ -\item{df}{the data frame in which to make the units implicit/eplicit} +\item{df}{the data frame in which to make the units implicit/explicit} \item{prefix}{the prefix for the units} @@ -19,14 +19,14 @@ This function is intended for data frames /tibbles only and tries to figure out \examples{ # generate implicit units df <- tibble(peak = 1:5, `height [V]` = 1:5) -iso_make_units_implicit(df) +iso_make_units_implicit(df) # convert back and forth -iso_make_units_implicit(df) \%>\% iso_make_units_explicit() +iso_make_units_implicit(df) \%>\% iso_make_units_explicit() # implicit units from custom prefix & suffix df <- tibble(peak = 1:5, height.V = 1:5) -iso_make_units_implicit(df, prefix = ".", suffix = "") +iso_make_units_implicit(df, prefix = ".", suffix = "") } \seealso{ Other functions for values with units: diff --git a/man/iso_read_files.Rd b/man/iso_read_files.Rd index 5ca2f54c..6d16a62f 100644 --- a/man/iso_read_files.Rd +++ b/man/iso_read_files.Rd @@ -33,7 +33,7 @@ iso_read_files( \item{read_options}{vector of read options to be stored in the data structure (e.g. \code{c(read_vendor_data_table = FALSE)}). The \code{read_} prefix is optional.} -\item{reader_options}{list of paramters to be passed on to the reader} +\item{reader_options}{list of parameters to be passed on to the reader} \item{discard_duplicates}{whether to automatically discard files with duplicate file IDs (i.e. duplicate file names). If \code{TRUE} (the default), only the first files are kept and any files with the same file ID are discarded. If \code{FALSE}, all duplicate files are kept but their file IDs are appended with suffix \code{#1}, \code{#2}, etc.} @@ -57,7 +57,7 @@ iso_read_files( single iso_file object (if single file) or list of iso_files (iso_file_list) } \description{ -This function takes care of extracting basic information about iso_files, dealing with problems and making sure only valid fire formats are processed. -This function is not typicaly called directly but indirectly by calling \link{iso_read_dual_inlet}, \link{iso_read_continuous_flow} and \link{iso_read_scan}. +This function takes care of extracting basic information about iso_files, dealing with problems and making sure only valid fire formats are processed. +This function is not typically called directly but indirectly by calling \link{iso_read_dual_inlet}, \link{iso_read_continuous_flow} and \link{iso_read_scan}. It is made available outside the package because it can be very useful for testing new file readers. } diff --git a/man/iso_reread_files.Rd b/man/iso_reread_files.Rd index eb003bc3..9005f8c7 100644 --- a/man/iso_reread_files.Rd +++ b/man/iso_reread_files.Rd @@ -71,7 +71,7 @@ To re-read files that have been modified on disc, please use \code{iso_reread_ch \code{iso_reread_outdated_files} re-reads all files that were read with an outdated version of isoreader. -\code{iso_reread_problem_files} re-reads all files that have had errors the last time they were read by isoreader (set \code{reread_files_with_warnings = TRUE} to also re-read those that have warninigs). +\code{iso_reread_problem_files} re-reads all files that have had errors the last time they were read by isoreader (set \code{reread_files_with_warnings = TRUE} to also re-read those that have warnings). \code{iso_reread_storage} is deprecated. diff --git a/man/iso_select_file_info.Rd b/man/iso_select_file_info.Rd index 69a62996..70d9e395 100644 --- a/man/iso_select_file_info.Rd +++ b/man/iso_select_file_info.Rd @@ -14,7 +14,7 @@ iso_select_file_info( \arguments{ \item{iso_files}{collection of iso_file objects} -\item{...}{dplyr-style \link[dplyr]{select} conditions applied based on each file's file_info (see \code{\link{iso_get_file_info}}). Note that the \code{file_id} column will always be kept, no matter the selection criteria, and cannot be renamed to protect from unexpected behaviour.} +\item{...}{dplyr-style \link[dplyr]{select} conditions applied based on each file's file_info (see \code{\link{iso_get_file_info}}). Note that the \code{file_id} column will always be kept, no matter the selection criteria, and cannot be renamed to protect from unexpected behavior.} \item{file_specific}{whether to run the select criteria (\code{...}) specifically within each individual file rather than on all files jointly. This is a lot slower but makes it possible to select different columns in different iso_files depending on what exists in each file and is mostly of use when working with data from multiple instruments.} diff --git a/man/iso_set_default_read_parameters.Rd b/man/iso_set_default_read_parameters.Rd index 3953354e..e006762e 100644 --- a/man/iso_set_default_read_parameters.Rd +++ b/man/iso_set_default_read_parameters.Rd @@ -22,7 +22,7 @@ iso_set_default_read_parameters( \item{read_method_info}{if provided, set as the default for `read_method_info` parameters} -\item{read_vendor_data_table}{if provided, set as the default for `read_vendor_data_tabl` parameters} +\item{read_vendor_data_table}{if provided, set as the default for `read_vendor_data_table` parameters} \item{quiet}{whether to display (quiet=FALSE) or silence (quiet = TRUE) information messages. Set parameter to overwrite global defaults for this function or set global defaults with calls to \link[=iso_info_messages]{iso_turn_info_message_on} and \link[=iso_info_messages]{iso_turn_info_message_off}} } diff --git a/man/iso_shorten_relative_paths.Rd b/man/iso_shorten_relative_paths.Rd index 736422b9..d757517c 100644 --- a/man/iso_shorten_relative_paths.Rd +++ b/man/iso_shorten_relative_paths.Rd @@ -15,7 +15,7 @@ iso_shorten_relative_paths(path, root = ".") a data frame with the root directories and paths relative to the root - order of input paths is preserved } \description{ -Convenience function to shorten relative paths based on overlap with the provided root(s). Also simplifies current directory repeats (e.g. "././." becomes ".") for better legiblity. Does not check whether the original or resulting paths point to valid files or folders. Relative paths that do not start with the supplied \code{root} default back to the current working directory (\code{.}). Absolute paths are allowed but are returned as is without attempts at shortening. See \code{iso_find_absolute_path_roots} for rooting absolute paths. +Convenience function to shorten relative paths based on overlap with the provided root(s). Also simplifies current directory repeats (e.g. "././." becomes ".") for better legibility. Does not check whether the original or resulting paths point to valid files or folders. Relative paths that do not start with the supplied \code{root} default back to the current working directory (\code{.}). Absolute paths are allowed but are returned as is without attempts at shortening. See \code{iso_find_absolute_path_roots} for rooting absolute paths. } \examples{ iso_shorten_relative_paths(file.path("A", "B", "C"), "A") # root = "A", path = B/C diff --git a/man/isoreader-package.Rd b/man/isoreader-package.Rd index 579d9583..51d8c37a 100644 --- a/man/isoreader-package.Rd +++ b/man/isoreader-package.Rd @@ -4,9 +4,9 @@ \name{isoreader-package} \alias{isoreader} \alias{isoreader-package} -\title{isoreader: Read IRMS Data Files} +\title{isoreader: Read Stable Isotope Data Files} \description{ -R interface to IRMS (isotope ratio mass spectrometry) file formats typically used in stable isotope geochemistry. +R interface to isotope ratio mass spectrometry file formats used in stable isotope geochemistry. } \seealso{ Useful links: diff --git a/man/read_iso_file.Rd b/man/read_iso_file.Rd index d122f8bb..206cde3c 100644 --- a/man/read_iso_file.Rd +++ b/man/read_iso_file.Rd @@ -30,7 +30,7 @@ read_iso_file( \item{path}{file path} -\item{file_n}{numer of processsed file for info messages} +\item{file_n}{number of processed file for info messages} \item{files_n}{total number of files for info messages} @@ -52,7 +52,7 @@ read_iso_file( \item{reader_fun}{file reader function} -\item{reader_options}{list of paramters to be passed on to the reader} +\item{reader_options}{list of parameters to be passed on to the reader} \item{reader_fun_env}{where to find the reader function} } diff --git a/vignettes/continuous_flow.Rmd b/vignettes/continuous_flow.Rmd index cb208484..b0404866 100644 --- a/vignettes/continuous_flow.Rmd +++ b/vignettes/continuous_flow.Rmd @@ -1,7 +1,7 @@ --- title: "Continuous Flow Examples" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette: html_document: code_folding: show @@ -27,7 +27,7 @@ knitr::opts_chunk$set( # Introduction -Isoreader supports several continuous flow IRMS data formats. This vignette shows some of the functionality for continuous flow files. For additional information on operations more generally (caching, combining read files, data export, etc.), please consult the [operations vignette](http://isoreader.isoverse.org/articles/operations.html). For details on downstream data processing and visualization, see the [isoprocessor package](https://isoprocessor.isoverse.org). +Isoreader supports several continuous flow IRMS data formats. This vignette shows some of the functionality for continuous flow files. For additional information on operations more generally (caching, combining read files, data export, etc.), please consult the [operations vignette](https://isoreader.isoverse.org/articles/operations.html). For details on downstream data processing and visualization, see the [isoprocessor package](https://isoprocessor.isoverse.org). ```{r, message=FALSE} # load isoreader package @@ -45,7 +45,7 @@ iso_get_reader_examples() %>% rmarkdown::paged_table() ```{r} # read a few of the continuous flow examples -cf_files <- +cf_files <- iso_read_continuous_flow( iso_get_reader_example("continuous_flow_example.cf"), iso_get_reader_example("continuous_flow_example.iarc"), @@ -59,7 +59,7 @@ cf_files <- The `cf_files` variable now contains a set of isoreader objects, one for each file. Take a look at what information was retrieved from the files using the `iso_get_data_summary()` function. ```{r} -cf_files %>% iso_get_data_summary() %>% rmarkdown::paged_table() +cf_files %>% iso_get_data_summary() %>% rmarkdown::paged_table() ``` ## Problems @@ -79,11 +79,11 @@ Detailed file information can be aggregated for all isofiles using the `iso_get_ # all file information cf_files %>% iso_get_file_info(select = c(-file_root)) %>% rmarkdown::paged_table() # select file information -cf_files %>% +cf_files %>% iso_get_file_info( select = c( # rename sample id columns from the different file types to a new ID column - ID = `Identifier 1`, ID = `Name`, + ID = `Identifier 1`, ID = `Name`, # select columns without renaming Analysis, `Peak Center`, `H3 Factor`, # select the time stamp and rename it to `Date & Time` @@ -91,7 +91,7 @@ cf_files %>% ), # explicitly allow for file specific rename (for the new ID column) file_specific = TRUE - ) %>% rmarkdown::paged_table() + ) %>% rmarkdown::paged_table() ``` ## Select/Rename @@ -100,10 +100,10 @@ Rather than retrieving specific file info columns using the above example of `is ```{r} # select + rename specific file info columns -cf_files2 <- cf_files %>% +cf_files2 <- cf_files %>% iso_select_file_info( - ID = `Identifier 1`, ID = `Name`, Analysis, `Peak Center`, `H3 Factor`, - `Date & Time` = file_datetime, + ID = `Identifier 1`, ID = `Name`, Analysis, `Peak Center`, `H3 Factor`, + `Date & Time` = file_datetime, # recode to the same name in different files `Sample Weight` = `Identifier 2`, `Sample Weight` = `EA Sample Weight`, file_specific = TRUE @@ -115,28 +115,28 @@ cf_files2 %>% iso_get_file_info() %>% rmarkdown::paged_table() ## Filter -Any collection of isofiles can also be filtered based on the available file information using the function `iso_filter_files`. This function can operate on any column available in the file information and supports full [dplyr](https://dplyr.tidyverse.org/reference/filter.html) syntax. +Any collection of isofiles can also be filtered based on the available file information using the function `iso_filter_files`. This function can operate on any column available in the file information and supports full [dplyr](https://dplyr.tidyverse.org/reference/filter.html) syntax. ```{r} # find files that have 'linearity' in the new ID field -cf_files2 %>% iso_filter_files(grepl("linearity", ID)) %>% - iso_get_file_info() %>% +cf_files2 %>% iso_filter_files(grepl("linearity", ID)) %>% + iso_get_file_info() %>% rmarkdown::paged_table() # find files that were run since 2015 -cf_files2 %>% - iso_filter_files(`Date & Time` > "2015-01-01") %>% - iso_get_file_info() %>% +cf_files2 %>% + iso_filter_files(`Date & Time` > "2015-01-01") %>% + iso_get_file_info() %>% rmarkdown::paged_table() ``` ## Mutate -The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. It can also be used in conjuction with `iso_with_unit` to generate values with implicit units. +The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. It can also be used in conjunction with `iso_with_unit` to generate values with implicit units. ```{r} -cf_files3 <- - cf_files2 %>% +cf_files3 <- + cf_files2 %>% iso_mutate_file_info( # update existing column ID = paste("ID:", ID), @@ -144,11 +144,11 @@ cf_files3 <- `Run since 2015?` = `Date & Time` > "2015-01-01", # parse weight as a number and turn into a column with units `Sample Weight` = `Sample Weight` %>% parse_number() %>% iso_with_units("mg") - ) + ) -cf_files3 %>% - iso_get_file_info() %>% - iso_make_units_explicit() %>% +cf_files3 %>% + iso_get_file_info() %>% + iso_make_units_explicit() %>% rmarkdown::paged_table() ``` @@ -158,7 +158,7 @@ Additionally, a wide range of new file information can be added in the form of a ```{r} # this kind of information data frame is frequently read in from a csv or xlsx file -new_info <- +new_info <- dplyr::bind_rows( # new information based on new vs. old samples dplyr::tribble( @@ -175,20 +175,20 @@ new_info <- new_info %>% rmarkdown::paged_table() # adding it to the isofiles -cf_files3 %>% - iso_add_file_info(new_info, by1 = "Run since 2015?", by2 = "file_id") %>% - iso_get_file_info(select = !!names(new_info)) %>% +cf_files3 %>% + iso_add_file_info(new_info, by1 = "Run since 2015?", by2 = "file_id") %>% + iso_get_file_info(select = !!names(new_info)) %>% rmarkdown::paged_table() ``` ## Parse -Most file information is initially read as text to avoid cumbersome specifications during the read process and compatibility issues between different IRMS file formats. However, many file info columns are not easily processed as text. The isoreader package therefore provides several parsing and data extraction functions to facilitate processing the text-based data (some via functionality implemented by the [readr](http://readr.tidyverse.org) package). See code block below for examples. For a complete overview, see the `?extract_data` and `?iso_parse_file_info` documentation. +Most file information is initially read as text to avoid cumbersome specifications during the read process and compatibility issues between different IRMS file formats. However, many file info columns are not easily processed as text. The isoreader package therefore provides several parsing and data extraction functions to facilitate processing the text-based data (some via functionality implemented by the [readr](https://readr.tidyverse.org) package). See code block below for examples. For a complete overview, see the `?extract_data` and `?iso_parse_file_info` documentation. ```{r} # use parsing and extraction in iso_mutate_file_info -cf_files2 %>% +cf_files2 %>% iso_mutate_file_info( # change type of Peak Center to logical `Peak Center` = parse_logical(`Peak Center`), @@ -198,24 +198,24 @@ cf_files2 %>% file_id_2nd = extract_word(file_id, 2), # retrieve file extension from the file_id using regular expression name = extract_substring(ID, "(\\w+)-?(.*)?", capture_bracket = 1) - ) %>% - iso_get_file_info(select = c(matches("file_id"), ID, name, `Peak Center`)) %>% + ) %>% + iso_get_file_info(select = c(matches("file_id"), ID, name, `Peak Center`)) %>% rmarkdown::paged_table() # use parsing in iso_filter_file_info -cf_files2 %>% - iso_filter_files(parse_number(`H3 Factor`) > 2) %>% - iso_get_file_info() %>% +cf_files2 %>% + iso_filter_files(parse_number(`H3 Factor`) > 2) %>% + iso_get_file_info() %>% rmarkdown::paged_table() # use iso_parse_file_info for simplified parsing of column data types -cf_files2 %>% +cf_files2 %>% iso_parse_file_info( - integer = Analysis, + integer = Analysis, number = `H3 Factor`, logical = `Peak Center` - ) %>% - iso_get_file_info() %>% + ) %>% + iso_get_file_info() %>% rmarkdown::paged_table() ``` @@ -247,55 +247,55 @@ The raw data read from the IRMS files can be retrieved similarly using the `iso_ # get raw data with default selections (all raw data, no additional file info) cf_files %>% iso_get_raw_data() %>% head(n=10) %>% rmarkdown::paged_table() # get specific raw data and add some file information -cf_files %>% +cf_files %>% iso_get_raw_data( # select just time and the m/z 2 and 3 ions select = c(time.s, v2.mV, v3.mV), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% + ) %>% # look at first few records only head(n=10) %>% rmarkdown::paged_table() ``` # Data Processing -The isoreader package is intended to make raw stable isotope data easily accessible. However, as with most analytical data, there is significant downstream processing required to turn these raw intensity chromatograms into peak-specific, properly referenced isotopic measurements. This and similar functionality as well as data visualization is part of the [isoprocessor package](https://isoprocessor.isoverse.org) which takes isotopic data through the various corrections in a transparent, efficient and reproducible manner. +The isoreader package is intended to make raw stable isotope data easily accessible. However, as with most analytical data, there is significant downstream processing required to turn these raw intensity chromatograms into peak-specific, properly referenced isotopic measurements. This and similar functionality as well as data visualization is part of the [isoprocessor package](https://isoprocessor.isoverse.org) which takes isotopic data through the various corrections in a transparent, efficient and reproducible manner. -That said, most vendor software also performs some of these calculations and it can be useful to be able to compare new data reduction procecures against those implemented in the vendor software. For this purpose, isoreader retrieves vendor computed data tables whenver possible, as illustrated below. +That said, most vendor software also performs some of these calculations and it can be useful to be able to compare new data reduction procedures against those implemented in the vendor software. For this purpose, isoreader retrieves vendor computed data tables whenever possible, as illustrated below. ## Vendor Data Table -As with most data retrieval funtions, the `iso_get_vendor_data_table()` function also allows specific column selection (by default, all columns are selected) and easy addition of file information via the `include_file_info` parameter (by default, none is included). +As with most data retrieval functions, the `iso_get_vendor_data_table()` function also allows specific column selection (by default, all columns are selected) and easy addition of file information via the `include_file_info` parameter (by default, none is included). ```{r} # entire vendor data table cf_files %>% iso_get_vendor_data_table() %>% rmarkdown::paged_table() # get specific parts and add some file information -cf_files %>% +cf_files %>% iso_get_vendor_data_table( # select peak number, ret. time, overall intensity and all H delta columns select = c(Nr., Rt, area = `rIntensity All`, matches("^d \\d+H")), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% - rmarkdown::paged_table() + ) %>% + rmarkdown::paged_table() # the data table also provides units if included in the original data file # which can be made explicit using the function iso_make_units_explicit() -cf_files %>% +cf_files %>% iso_get_vendor_data_table( # select peak number, ret. time, overall intensity and all H delta columns select = c(Nr., Rt, area = `rIntensity All`, matches("^d \\d+H")), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% + ) %>% # make column units explicit - iso_make_units_explicit() %>% - rmarkdown::paged_table() + iso_make_units_explicit() %>% + rmarkdown::paged_table() ``` -# For expert users: retrieving all data +# For expert users: retrieving all data For users familiar with the nested data frames from the [tidyverse](https://www.tidyverse.org/) (particularly [tidyr](https://tidyr.tidyverse.org/)'s `nest` and `unnest`), there is an easy way to retrieve all data from the iso file objects in a single nested data frame: @@ -306,7 +306,7 @@ all_data <- cf_files %>% iso_get_all_data() # Saving collections -Saving entire collections of isofiles for retrieval at a later point is easily done using the `iso_save` function which stores collections or individual isoreader file objects in the efficient R data storage format `.rds` (if not specified, the extension `.cf.rds` will be automatically appended). These saved collections can be convientiently read back using the same `iso_read_continuous_flow` command used for raw data files. +Saving entire collections of isofiles for retrieval at a later point is easily done using the `iso_save` function which stores collections or individual isoreader file objects in the efficient R data storage format `.rds` (if not specified, the extension `.cf.rds` will be automatically appended). These saved collections can be conveniently read back using the same `iso_read_continuous_flow` command used for raw data files. ```{r} # export to R data archive @@ -337,5 +337,3 @@ cf_files %>% iso_export_to_feather("cf_files_export") # exported feather files list.files(pattern = ".cf.feather") ``` - - diff --git a/vignettes/development.Rmd b/vignettes/development.Rmd index 11eab9d8..1e09883b 100644 --- a/vignettes/development.Rmd +++ b/vignettes/development.Rmd @@ -1,7 +1,7 @@ --- title: "Development features of isoreader" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette: html_document: code_folding: show @@ -14,8 +14,8 @@ editor_options: chunk_output_type: console vignette: > %\VignetteIndexEntry{Development features of isoreader} - %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} @@ -30,7 +30,7 @@ This vignette introduces some of the development features of the isoreader packa # Adding new file format readers -Testing out new file format readers is easiest by registering a new reader function for a specific file extension using `iso_register_dual_inlet_file_reader` and `iso_register_continuous_flow_file_reader`, respectively. Both require an extension (e.g. `".ext"`), name of the new reader function (`"new_reader"`), and optionally a description. Both functions automatically return a data frame with a list of all registered reader. Overwriting of existing readers with a different function requires an explicit `overwrite = TRUE` flag. All reader functions must accept an isoreader data stucture object (`ds`) as the first argument, a list of reader specific options as the second argument (`options`), and should return the structure with data filled in for downstream isoreader operations to work smoothly. The following minimal example illustrates how to do this with the `new_reader` function simply printing out the layout of the provided data structure skeleton `ds`. +Testing out new file format readers is easiest by registering a new reader function for a specific file extension using `iso_register_dual_inlet_file_reader` and `iso_register_continuous_flow_file_reader`, respectively. Both require an extension (e.g. `".ext"`), name of the new reader function (`"new_reader"`), and optionally a description. Both functions automatically return a data frame with a list of all registered reader. Overwriting of existing readers with a different function requires an explicit `overwrite = TRUE` flag. All reader functions must accept an isoreader data structure object (`ds`) as the first argument, a list of reader specific options as the second argument (`options`), and should return the structure with data filled in for downstream isoreader operations to work smoothly. The following minimal example illustrates how to do this with the `new_reader` function simply printing out the layout of the provided data structure skeleton `ds`. ```{r} new_reader <- function(ds, options = list()) { @@ -51,42 +51,42 @@ iso_read_dual_inlet("example.new.did", read_cache = FALSE) file.remove("example.new.did") ``` -Note that for parallel processing to work during the read process (`parallel = TRUE`), isoreader needs to know where to find the new reader function. It will figure this out automatically as long as the function name is unique but if this fails (or to be on the safe side), please specify e.g. `env = "R_GlobalEnv"` or `env = "newpackage"` during the reader registration. Also note that isoreader will not automatically know where to find all functions called from within the new reader function if they are not part of base R and it is recommended to make all outside calls explicit (e.g. `dplyr::filter(...)`) to pre-empt this potential problem. For info messages and warnings to work with the progress bar and in parallel reads, make sure to use `isoreader:::log_message(...)` and `isoreader:::log_warning(...)` instead of base R's `message(...)` and `warning(...)`. +Note that for parallel processing to work during the read process (`parallel = TRUE`), isoreader needs to know where to find the new reader function. It will figure this out automatically as long as the function name is unique but if this fails (or to be on the safe side), please specify e.g. `env = "R_GlobalEnv"` or `env = "newpackage"` during the reader registration. Also note that isoreader will not automatically know where to find all functions called from within the new reader function if they are not part of base R and it is recommended to make all outside calls explicit (e.g. `dplyr::filter(...)`) to preempt this potential problem. For info messages and warnings to work with the progress bar and in parallel reads, make sure to use `isoreader:::log_message(...)` and `isoreader:::log_warning(...)` instead of base R's `message(...)` and `warning(...)`. If you have designed and tested a new reader, please consider contributing it to the `isoreader` github repository via pull request. # Processing hooks -Isoreader defines two processing hooks at the beginning and end of reading an individual file. This is useful for integration into pipelines that require additional output (such as GUIs) but is also sometimes useful for debuggin purposes. The expressions are evaluated in the context of the `isoreader:::read_iso_file` function and have access to all parameters passed to this function, such as e.g. `file_n` and `path`. Same as for new readers: for info messages and warnings to work with the progress bar and in parallel reads, make sure to use `isoreader:::log_message(...)` and `isoreader:::log_warning(...)` instead of base R's `message(...)` and `warning(...)`. The main difference between the two is that `log_message()` will honor the `quiet = TRUE` flag passed to the main `iso_read...()` call whereas `log_warning()` will always show its message no matter the `quiet` setting. +Isoreader defines two processing hooks at the beginning and end of reading an individual file. This is useful for integration into pipelines that require additional output (such as GUIs) but is also sometimes useful for debugging purposes. The expressions are evaluated in the context of the `isoreader:::read_iso_file` function and have access to all parameters passed to this function, such as e.g. `file_n` and `path`. Same as for new readers: for info messages and warnings to work with the progress bar and in parallel reads, make sure to use `isoreader:::log_message(...)` and `isoreader:::log_warning(...)` instead of base R's `message(...)` and `warning(...)`. The main difference between the two is that `log_message()` will honor the `quiet = TRUE` flag passed to the main `iso_read...()` call whereas `log_warning()` will always show its message no matter the `quiet` setting. ```{r} -isoreader:::set_read_file_event_expr({ - isoreader:::log_message(sprintf("starting file #%.d, named '%s'", file_n, basename(path))) +isoreader:::set_read_file_event_expr({ + isoreader:::log_message(sprintf("starting file #%.d, named '%s'", file_n, basename(path))) }) -isoreader:::set_finish_file_event_expr({ - isoreader:::log_message(sprintf("finished file #%.d", file_n)) +isoreader:::set_finish_file_event_expr({ + isoreader:::log_message(sprintf("finished file #%.d", file_n)) }) c( iso_get_reader_example("dual_inlet_example.did"), iso_get_reader_example("dual_inlet_example.caf") ) %>% iso_read_dual_inlet(read_cache = FALSE) - + isoreader:::initialize_options() # reset all isoreader options ``` # Debugging isoreader -The best way to start debugging an isoreader call is to switch the package into debug mode. This is done using the internal `iso_turn_debug_on()` function. This enables debug messags, turns caching off by default so files are always read anew, and makes the package keep more information in the isofile objects. It continues to catch errors inside file readers (keeping track of them in the [problems](operations.html#dealing-with-file-read-problems)) unless you set `iso_turn_debug_on(catch_errors = FALSE)`, in which case no errors are caught and stop the processing so you get the full traceback and debugging options of your IDE. +The best way to start debugging an isoreader call is to switch the package into debug mode. This is done using the internal `iso_turn_debug_on()` function. This enables debug messages, turns caching off by default so files are always read anew, and makes the package keep more information in the isofile objects. It continues to catch errors inside file readers (keeping track of them in the [problems](operations.html#dealing-with-file-read-problems)) unless you set `iso_turn_debug_on(catch_errors = FALSE)`, in which case no errors are caught and stop the processing so you get the full traceback and debugging options of your IDE. ## Debugging binary file reads (Isodat) -Errors during the binary file reads usually indicate the approximate position in the file where the error was encountered. The easiest way to get started on figuring out what the file looks like at that position is to use a binary file editor and jump to the position. For a sense of the interpreted structure around that position, one can use the internal function `map_binary_structure` which tries to apply all frequently occuring binary patterns recognized by isoreader. The binary representation of the source file is only available if in debug mode but if debug mode is ON, it can be accessed as follows: +Errors during the binary file reads usually indicate the approximate position in the file where the error was encountered. The easiest way to get started on figuring out what the file looks like at that position is to use a binary file editor and jump to the position. For a sense of the interpreted structure around that position, one can use the internal function `map_binary_structure` which tries to apply all frequently occurring binary patterns recognized by isoreader. The binary representation of the source file is only available if in debug mode but if debug mode is ON, it can be accessed as follows: ```{r} # turn on debug mode -isoreader:::iso_turn_debug_on() +isoreader:::iso_turn_debug_on() # read example file ex <- iso_get_reader_example("dual_inlet_example.did") %>% iso_read_dual_inlet(quiet = TRUE) @@ -103,22 +103,21 @@ This structure representation shows recognized control elements in `<...>` and d For an overview of all the control elements that are currently consider, use the internal `get_ctrl_blocks_config_df()` function. ```{r} -isoreader:::get_ctrl_blocks_config_df() %>% +isoreader:::get_ctrl_blocks_config_df() %>% rmarkdown::paged_table() ``` -Additional information can be gleaned from the so-called control blocks, which are larger structural elements of Isodat binary files and are kept in a data frame wihin the binary object (again only available in debug mode). +Additional information can be gleaned from the so-called control blocks, which are larger structural elements of Isodat binary files and are kept in a data frame within the binary object (again only available in debug mode). ```{r} -bin$C_blocks %>% +bin$C_blocks %>% rmarkdown::paged_table() ``` Same as for specific byte positions, one can use the control blocks to navigate the file and `map_binary_structure`. ```{r} -bin %>% - isoreader:::move_to_C_block("CMethod") %>% +bin %>% + isoreader:::move_to_C_block("CMethod") %>% isoreader:::map_binary_structure(length = 200) ``` - diff --git a/vignettes/dual_inlet.Rmd b/vignettes/dual_inlet.Rmd index 21ae3aae..cb94218f 100644 --- a/vignettes/dual_inlet.Rmd +++ b/vignettes/dual_inlet.Rmd @@ -1,7 +1,7 @@ --- title: "Dual Inlet Examples" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette: html_document: code_folding: show @@ -28,7 +28,7 @@ knitr::opts_chunk$set( # Introduction -Isoreader supports several dual inlet IRMS data formats. This vignette shows some of the functionality for dual inlet data files. For additional information on operations more generally (caching, combining read files, data export, etc.), please consult the [operations vignette](http://isoreader.isoverse.org/articles/operations.html). For details on downstream data processing and visualization, see the [isoprocessor package](https://isoprocessor.isoverse.org). +Isoreader supports several dual inlet IRMS data formats. This vignette shows some of the functionality for dual inlet data files. For additional information on operations more generally (caching, combining read files, data export, etc.), please consult the [operations vignette](https://isoreader.isoverse.org/articles/operations.html). For details on downstream data processing and visualization, see the [isoprocessor package](https://isoprocessor.isoverse.org). ```{r, message=FALSE} @@ -48,7 +48,7 @@ iso_get_reader_examples() %>% rmarkdown::paged_table() ```{r} # read dual inlet examples -di_files <- +di_files <- iso_read_dual_inlet( iso_get_reader_example("dual_inlet_example.did"), iso_get_reader_example("dual_inlet_example2.did"), @@ -83,11 +83,11 @@ Detailed file information can be aggregated for all isofiles using the `iso_get_ # all file information di_files %>% iso_get_file_info(select = c(-file_root)) %>% rmarkdown::paged_table() # select file information -di_files %>% +di_files %>% iso_get_file_info( select = c( # rename sample id columns from the different file types to a new ID column - ID = `Identifier 1`, ID = `Sample Name`, + ID = `Identifier 1`, ID = `Sample Name`, # select columns without renaming Analysis, Method, `Peak Center`, # select the time stamp and rename it to `Date & Time` @@ -96,7 +96,7 @@ di_files %>% `Sample Weight`, `Sample Weight` = `Weight [mg]` ), # explicitly allow for file specific rename (for the new ID column) - file_specific = TRUE + file_specific = TRUE ) %>% rmarkdown::paged_table() ``` @@ -106,12 +106,12 @@ Rather than retrieving specific file info columns using the above example of `is ```{r} # select + rename specific file info columns -di_files2 <- di_files %>% +di_files2 <- di_files %>% iso_select_file_info( - ID = `Identifier 1`, ID = `Sample Name`, Analysis, Method, + ID = `Identifier 1`, ID = `Sample Name`, Analysis, Method, `Peak Center`, `Date & Time` = file_datetime, `Sample Weight`, `Sample Weight` = `Weight [mg]`, - file_specific = TRUE + file_specific = TRUE ) # fetch all file info @@ -120,27 +120,27 @@ di_files2 %>% iso_get_file_info() %>% rmarkdown::paged_table() ## Filter -Any collection of isofiles can also be filtered based on the available file information using the function `iso_filter_files`. This function can operate on any column available in the file information and supports full [dplyr](https://dplyr.tidyverse.org/reference/filter.html) syntax. +Any collection of isofiles can also be filtered based on the available file information using the function `iso_filter_files`. This function can operate on any column available in the file information and supports full [dplyr](https://dplyr.tidyverse.org/reference/filter.html) syntax. ```{r} # find files that have 'CIT' in the new ID field -di_files2 %>% iso_filter_files(grepl("CIT", ID)) %>% - iso_get_file_info() %>% +di_files2 %>% iso_filter_files(grepl("CIT", ID)) %>% + iso_get_file_info() %>% rmarkdown::paged_table() # find files that were run in 2017 -di_files2 %>% - iso_filter_files(`Date & Time` > "2017-01-01" & `Date & Time` < "2018-01-01") %>% - iso_get_file_info() %>% +di_files2 %>% + iso_filter_files(`Date & Time` > "2017-01-01" & `Date & Time` < "2018-01-01") %>% + iso_get_file_info() %>% rmarkdown::paged_table() ``` ## Mutate -The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on/overwrite any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. It can also be used in conjuction with `iso_with_unit` to generate values with implicit units. +The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on/overwrite any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. It can also be used in conjunction with `iso_with_unit` to generate values with implicit units. ```{r} -di_files3 <- di_files2 %>% +di_files3 <- di_files2 %>% iso_mutate_file_info( # update existing column ID = paste("ID:", ID), @@ -148,11 +148,11 @@ di_files3 <- di_files2 %>% `Run in 2017?` = `Date & Time` > "2017-01-01" & `Date & Time` < "2018-01-01", # parse weight as a number and turn into a column with units `Sample Weight` = `Sample Weight` %>% parse_number() %>% iso_with_units("mg") - ) + ) -di_files3 %>% - iso_get_file_info() %>% - iso_make_units_explicit() %>% +di_files3 %>% + iso_get_file_info() %>% + iso_make_units_explicit() %>% rmarkdown::paged_table() ``` @@ -162,7 +162,7 @@ Additionally, a wide range of new file information can be added in the form of a ```{r} # this kind of information data frame is frequently read in from a csv or xlsx file -new_info <- +new_info <- dplyr::bind_rows( # new information based on new vs. old samples dplyr::tribble( @@ -179,20 +179,20 @@ new_info <- new_info %>% rmarkdown::paged_table() # adding it to the isofiles -di_files3 %>% - iso_add_file_info(new_info, by1 = "Run in 2017?", by2 = "Analysis") %>% - iso_get_file_info(select = !!names(new_info)) %>% +di_files3 %>% + iso_add_file_info(new_info, by1 = "Run in 2017?", by2 = "Analysis") %>% + iso_get_file_info(select = !!names(new_info)) %>% rmarkdown::paged_table() ``` ## Parse -Most file information is initially read as text to avoid cumbersome specifications during the read process and compatibility issues between different IRMS file formats. However, many file info columns are not easily processed as text. The isoreader package therefore provides several parsing and data extraction functions to facilitate processing the text-based data (some via functionality implemented by the [readr](http://readr.tidyverse.org) package). See code block below for examples. For a complete overview, see the `?extract_data` and `?iso_parse_file_info` documentation. +Most file information is initially read as text to avoid cumbersome specifications during the read process and compatibility issues between different IRMS file formats. However, many file info columns are not easily processed as text. The isoreader package therefore provides several parsing and data extraction functions to facilitate processing the text-based data (some via functionality implemented by the [readr](https://readr.tidyverse.org) package). See code block below for examples. For a complete overview, see the `?extract_data` and `?iso_parse_file_info` documentation. ```{r} # use parsing and extraction in iso_mutate_file_info -di_files2 %>% +di_files2 %>% iso_mutate_file_info( # change type of Peak Center to logical `Peak Center` = parse_logical(`Peak Center`), @@ -202,24 +202,24 @@ di_files2 %>% Method_2nd = extract_word(Method, 2), # retrieve file extension from the file_id using regular expression extension = extract_substring(file_id, "\\.(\\w+)$", capture_bracket = 1) - ) %>% - iso_get_file_info(select = c(extension, `Peak Center`, matches("Method"))) %>% + ) %>% + iso_get_file_info(select = c(extension, `Peak Center`, matches("Method"))) %>% rmarkdown::paged_table() # use parsing in iso_filter_file_info -di_files2 %>% - iso_filter_files(parse_integer(Analysis) > 1500) %>% - iso_get_file_info() %>% +di_files2 %>% + iso_filter_files(parse_integer(Analysis) > 1500) %>% + iso_get_file_info() %>% rmarkdown::paged_table() # use iso_parse_file_info for simplified parsing of column data types -di_files2 %>% +di_files2 %>% iso_parse_file_info( - integer = Analysis, + integer = Analysis, number = `Sample Weight`, logical = `Peak Center` - ) %>% - iso_get_file_info() %>% + ) %>% + iso_get_file_info() %>% rmarkdown::paged_table() ``` @@ -250,13 +250,13 @@ The raw data read from the IRMS files can be retrieved similarly using the `iso_ # get raw data with default selections (all raw data, no additional file info) di_files %>% iso_get_raw_data() %>% head(n=10) %>% rmarkdown::paged_table() # get specific raw data and add some file information -di_files %>% +di_files %>% iso_get_raw_data( # select just time and the two ions select = c(type, cycle, v28.mV, v29.mV), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% + ) %>% # look at first few records only head(n=10) %>% rmarkdown::paged_table() ``` @@ -264,19 +264,19 @@ di_files %>% # Data Processing -The isoreader package is intended to make raw stable isotope data easily accessible. However, as with most analytical data, there is significant downstream processing required to turn these raw signal intensities into properly referenced isotopic measurement. This and similar functionality as well as data visualization is part of the [isoprocessor package](https://isoprocessor.isoverse.org) which takes isotopic data through the various corrections in a transparent, efficient and reproducible manner. +The isoreader package is intended to make raw stable isotope data easily accessible. However, as with most analytical data, there is significant downstream processing required to turn these raw signal intensities into properly referenced isotopic measurement. This and similar functionality as well as data visualization is part of the [isoprocessor package](https://isoprocessor.isoverse.org) which takes isotopic data through the various corrections in a transparent, efficient and reproducible manner. -That said, most vendor software also performs some of these calculations and it can be useful to be able to compare new data reduction procecures against those implemented in the vendor software. For this purpose, isoreader retrieves vendor computed data tables whenver possible, as illustrated below. +That said, most vendor software also performs some of these calculations and it can be useful to be able to compare new data reduction procedures against those implemented in the vendor software. For this purpose, isoreader retrieves vendor computed data tables whenever possible, as illustrated below. ## Vendor Data Table -As with most data retrieval funtions, the `iso_get_vendor_data_table()` function also allows specific column selection (by default, all columns are selected) and easy addition of file information via the `include_file_info` parameter (by default, none is included). +As with most data retrieval functions, the `iso_get_vendor_data_table()` function also allows specific column selection (by default, all columns are selected) and easy addition of file information via the `include_file_info` parameter (by default, none is included). ```{r} # entire vendor data table di_files %>% iso_get_vendor_data_table() %>% rmarkdown::paged_table() # get specific parts and add some file information -di_files %>% +di_files %>% iso_get_vendor_data_table( # select cycle and all carbon columns select = c(cycle, matches("C")), @@ -285,7 +285,7 @@ di_files %>% ) %>% rmarkdown::paged_table() ``` -# For expert users: retrieving all data +# For expert users: retrieving all data For users familiar with the nested data frames from the [tidyverse](https://www.tidyverse.org/) (particularly [tidyr](https://tidyr.tidyverse.org/)'s `nest` and `unnest`), there is an easy way to retrieve all data from the iso file objects in a single nested data frame: @@ -296,7 +296,7 @@ all_data <- di_files %>% iso_get_all_data() # Saving collections -Saving entire collections of isofiles for retrieval at a later point is easily done using the `iso_save` function which stores collections or individual isoreader file objects in the efficient R data storage format `.rds` (if not specified, the extension `.di.rds` will be automatically appended). These saved collections can be convientiently read back using the same `iso_read_dual_inlet` command used for raw data files. +Saving entire collections of isofiles for retrieval at a later point is easily done using the `iso_save` function which stores collections or individual isoreader file objects in the efficient R data storage format `.rds` (if not specified, the extension `.di.rds` will be automatically appended). These saved collections can be conveniently read back using the same `iso_read_dual_inlet` command used for raw data files. ```{r} # export to R data archive @@ -326,6 +326,3 @@ di_files %>% iso_export_to_feather("di_files_export") # exported feather files list.files(pattern = ".di.feather") ``` - - - diff --git a/vignettes/operations.Rmd b/vignettes/operations.Rmd index a2db4b2d..a2bedce3 100644 --- a/vignettes/operations.Rmd +++ b/vignettes/operations.Rmd @@ -1,7 +1,7 @@ --- title: "Operations" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette: html_document: code_folding: show @@ -39,8 +39,8 @@ library(isoreader) ```{r} # list all suported file types -iso_get_supported_file_types() %>% - dplyr::select(extension, software, description, type) %>% +iso_get_supported_file_types() %>% + dplyr::select(extension, software, description, type) %>% knitr::kable() ``` @@ -50,25 +50,25 @@ By default, isoreader is quite verbose to let the user know what is happening. H ```{r} # read a file in the default verbose mode -iso_get_reader_example("dual_inlet_example.did") %>% - iso_read_dual_inlet() %>% - iso_select_file_info(file_datetime, `Identifier 1`) %>% - iso_get_file_info() %>% +iso_get_reader_example("dual_inlet_example.did") %>% + iso_read_dual_inlet() %>% + iso_select_file_info(file_datetime, `Identifier 1`) %>% + iso_get_file_info() %>% knitr::kable() # read the same file but make the read process quiet -iso_get_reader_example("dual_inlet_example.did") %>% - iso_read_dual_inlet(quiet = TRUE) %>% - iso_select_file_info(file_datetime, `Identifier 1`) %>% - iso_get_file_info() %>% +iso_get_reader_example("dual_inlet_example.did") %>% + iso_read_dual_inlet(quiet = TRUE) %>% + iso_select_file_info(file_datetime, `Identifier 1`) %>% + iso_get_file_info() %>% knitr::kable() # read the same file but turn all isoreader messages off iso_turn_info_messages_off() -iso_get_reader_example("dual_inlet_example.did") %>% - iso_read_dual_inlet(quiet = TRUE) %>% - iso_select_file_info(file_datetime, `Identifier 1`) %>% - iso_get_file_info() %>% +iso_get_reader_example("dual_inlet_example.did") %>% + iso_read_dual_inlet(quiet = TRUE) %>% + iso_select_file_info(file_datetime, `Identifier 1`) %>% + iso_get_file_info() %>% knitr::kable() # turn message back on @@ -84,18 +84,18 @@ By default, isoreader caches files as R objects to make access faster in the fut iso_cleanup_reader_cache() # read a new file (notice the time elapsed) -cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% +cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% iso_read_continuous_flow() # re-read the same file much faster (it will be read from cache) -cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% +cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% iso_read_continuous_flow() # turn reader caching off iso_turn_reader_caching_off() # re-read the same file (it will NOT be read from cache) -cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% +cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% iso_read_continuous_flow() # turn reader caching back on @@ -104,13 +104,13 @@ iso_turn_reader_caching_on() # Parallel processing -Isoreader supports parallel processing of data files based on the number of processors available in a computer simply by setting the `parallel = TRUE` flag in any file read operation. This makes it possible to read large quantities of data files much more quickly on a multi-core system (i.e. most modern laptops). +Isoreader supports parallel processing of data files based on the number of processors available in a computer simply by setting the `parallel = TRUE` flag in any file read operation. This makes it possible to read large quantities of data files much more quickly on a multi-core system (i.e. most modern laptops). -However, whether parallel processing yields signifcant improvemens in read speeds depends on the number of available processors, file types and operating system. In theory, parallel processing always reduces computation time but in practice this is offset by various factors including the size of the data that needs to be sent back and forth between the processors, file system read/write speed, and the spin-up time for new processes. Generally speaking, parallel processing can provide significant improvements in speed with larger number of files (~10+) and more complex read operations (e.g. continuous flow > dual inlet > scan file). Reading from cache is so efficient that there are rarely gains from parallel processing and it is usually faster NOT to read in parallel once a set of files is already cached. +However, whether parallel processing yields significant improvements in read speeds depends on the number of available processors, file types and operating system. In theory, parallel processing always reduces computation time but in practice this is offset by various factors including the size of the data that needs to be sent back and forth between the processors, file system read/write speed, and the spin-up time for new processes. Generally speaking, parallel processing can provide significant improvements in speed with larger number of files (~10+) and more complex read operations (e.g. continuous flow > dual inlet > scan file). Reading from cache is so efficient that there are rarely gains from parallel processing and it is usually faster NOT to read in parallel once a set of files is already cached. ```{r} # read 3 files in parallel (note that this is usually not a large enough file number to be worth it) -di_files <- +di_files <- iso_read_dual_inlet( iso_get_reader_example("dual_inlet_example.did"), iso_get_reader_example("dual_inlet_example2.did"), @@ -122,7 +122,7 @@ di_files <- # Combining / subsetting isofiles -All isoreader objects are lists that can be combined or subset to work with only specific files or create a larger collection. +All isoreader objects are lists that can be combined or subset to work with only specific files or create a larger collection. ```{r} # all 3 di_files read above @@ -158,7 +158,7 @@ Isoreader is designed to catch problems during file reading without crashing the ```{r} # read two files, one of which is erroneous -iso_files <- +iso_files <- iso_read_continuous_flow( iso_get_reader_example("continuous_flow_example.dxf"), system.file("errdata", "cf_without_data.dxf", package = "isoreader") @@ -176,9 +176,9 @@ iso_files <- iso_files %>% iso_filter_files_with_problems() # Re-reading files -If a file has changed (e.g. is edited through the vendor software) and the changes should be loaded in isoreader, it is easy to re-read and update just those files within a file collection by using the `iso_reread_changed_files()` function. If some of the files are no longer accessible at their original location, it will throw a warning. If the location for all files has changed, it can be easily adjusted by modifying the `file_root` file info parameter using `iso_set_file_root()`. +If a file has changed (e.g. is edited through the vendor software) and the changes should be loaded in isoreader, it is easy to re-read and update just those files within a file collection by using the `iso_reread_changed_files()` function. If some of the files are no longer accessible at their original location, it will throw a warning. If the location for all files has changed, it can be easily adjusted by modifying the `file_root` file info parameter using `iso_set_file_root()`. -Similar functions can be used to re-read outdated files from an older isoreader version (`iso_reread_outdated_files()`), attempt to re-read problematic files that had read errors/warnings (`iso_reread_problem_files()`), or simply re-read all files in a colleciton (`iso_reread_all_files()`). +Similar functions can be used to re-read outdated files from an older isoreader version (`iso_reread_outdated_files()`), attempt to re-read problematic files that had read errors/warnings (`iso_reread_problem_files()`), or simply re-read all files in a collection (`iso_reread_all_files()`). ```{r} # re-read the 3 dual inlet files from their original location if any have changed @@ -187,36 +187,36 @@ di_files %>% # update the file_root for the files before re-read (in this case to a location # that does not hold these files and hence will lead to a warning) -di_files %>% +di_files %>% iso_set_file_root(root = ".") %>% iso_reread_all_files() ``` # Units -Isoreader provides a built in data type with units (`iso_with_units`) that can be used to easily keep track of units inside data frame. These units can be made explicit (=included in the column header), stripped altogher, or turned back to be implicit. +Isoreader provides a built in data type with units (`iso_with_units`) that can be used to easily keep track of units inside data frame. These units can be made explicit (=included in the column header), stripped altogether, or turned back to be implicit. ```{r} # strip all units -cf_file %>% - iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) %>% +cf_file %>% + iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) %>% iso_strip_units() %>% head(3) # make units explicit -cf_file %>% - iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) %>% +cf_file %>% + iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) %>% iso_make_units_explicit() %>% head(3) # introduce new unit columns e.g. in the file info -cf_file %>% - iso_mutate_file_info(weight = iso_with_units(0.42, "mg")) %>% +cf_file %>% + iso_mutate_file_info(weight = iso_with_units(0.42, "mg")) %>% iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`), include_file_info = weight) %>% iso_make_units_explicit() %>% head(3) # or turn a column e.g. with custom format units in the header into implicit units -cf_file %>% - iso_mutate_file_info(weight.mg = 0.42) %>% +cf_file %>% + iso_mutate_file_info(weight.mg = 0.42) %>% iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`), include_file_info = weight.mg) %>% iso_make_units_implicit(prefix = ".", suffix = "") %>% head(3) @@ -229,36 +229,34 @@ Formatting data into text is easily achieved with the built in R function `sprin ```{r} # concatenation example with single values iso_format( - pi = 3.14159, - x = iso_with_units(42, "mg"), - ID = "ABC", - signif = 4, + pi = 3.14159, + x = iso_with_units(42, "mg"), + ID = "ABC", + signif = 4, sep = " | " ) # example inside a data frame -cf_file %>% - iso_get_vendor_data_table(select = c(`Nr.`, `Ampl 28`, `d 15N/14N`)) %>% - dplyr::select(-file_id) %>% +cf_file %>% + iso_get_vendor_data_table(select = c(`Nr.`, `Ampl 28`, `d 15N/14N`)) %>% + dplyr::select(-file_id) %>% head(3) %>% # introduce new label columns using iso_format dplyr::mutate( # default concatenation of values label_default = iso_format( - `Nr.`, `Ampl 28`, `d 15N/14N`, + `Nr.`, `Ampl 28`, `d 15N/14N`, sep = ", " ), # concatenate with custom names for each value label_named = iso_format( - `#` = `Nr.`, A = `Ampl 28`, d15 = `d 15N/14N`, + `#` = `Nr.`, A = `Ampl 28`, d15 = `d 15N/14N`, sep = ", " ), # concatenate just the values and increase significant digits label_value = iso_format( - `Nr.`, `Ampl 28`, `d 15N/14N`, + `Nr.`, `Ampl 28`, `d 15N/14N`, sep = ", ", format_names = NULL, signif = 6 ) ) ``` - - diff --git a/vignettes/scan.Rmd b/vignettes/scan.Rmd index a15f33d4..9c2dbe4d 100644 --- a/vignettes/scan.Rmd +++ b/vignettes/scan.Rmd @@ -1,7 +1,7 @@ --- title: "Scan Examples" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette: html_document: code_folding: show @@ -28,7 +28,7 @@ knitr::opts_chunk$set( # Introduction -Isoreader supports several dual inlet IRMS data formats. This vignette shows some of the functionality for scan data files. For additional information on operations more generally (caching, combining read files, data export, etc.), please consult the [operations vignette](http://isoreader.isoverse.org/articles/operations.html). For details on downstream data processing and visualization, see the [isoprocessor package](https://isoprocessor.isoverse.org). +Isoreader supports several dual inlet IRMS data formats. This vignette shows some of the functionality for scan data files. For additional information on operations more generally (caching, combining read files, data export, etc.), please consult the [operations vignette](https://isoreader.isoverse.org/articles/operations.html). For details on downstream data processing and visualization, see the [isoprocessor package](https://isoprocessor.isoverse.org). Note: this vignette is still a work in progress. @@ -50,7 +50,7 @@ iso_get_reader_examples() %>% rmarkdown::paged_table() ```{r} # read scan examples -scan_files <- +scan_files <- iso_read_scan( iso_get_reader_example("peak_shape_scan_example.scn"), iso_get_reader_example("background_scan_example.scn"), @@ -78,7 +78,7 @@ scan_files %>% iso_get_problems() %>% rmarkdown::paged_table() # File Information -Detailed file information can be aggregated for all isofiles using the `iso_get_file_info()` function which supports the full [select syntax](https://dplyr.tidyverse.org/reference/select.html) of the [dplyr](https://dplyr.tidyverse.org/) package to specify which columns are of interest (by default, all file information is retrieved). +Detailed file information can be aggregated for all isofiles using the `iso_get_file_info()` function which supports the full [select syntax](https://dplyr.tidyverse.org/reference/select.html) of the [dplyr](https://dplyr.tidyverse.org/) package to specify which columns are of interest (by default, all file information is retrieved). ```{r} # all file information @@ -91,8 +91,8 @@ File information can also be modified across an entire collection of isofiles us ```{r} # select + rename specific file info columns -scan_files2 <- scan_files %>% - iso_select_file_info(-file_root) %>% +scan_files2 <- scan_files %>% + iso_select_file_info(-file_root) %>% iso_rename_file_info(`Date & Time` = file_datetime) # fetch all file info @@ -102,29 +102,29 @@ scan_files2 %>% iso_get_file_info() %>% rmarkdown::paged_table() ## Filter -Any collection of isofiles can also be filtered based on the available file information using the function `iso_filter_files`. This function can operate on any column available in the file information and supports full [dplyr](https://dplyr.tidyverse.org/reference/filter.html) syntax. +Any collection of isofiles can also be filtered based on the available file information using the function `iso_filter_files`. This function can operate on any column available in the file information and supports full [dplyr](https://dplyr.tidyverse.org/reference/filter.html) syntax. ```{r} # find files that have 'CIT' in the new ID field -scan_files2 %>% - iso_filter_files(type == "High Voltage") %>% - iso_get_file_info() %>% +scan_files2 %>% + iso_filter_files(type == "High Voltage") %>% + iso_get_file_info() %>% rmarkdown::paged_table() ``` ## Mutate -The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. +The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. ```{r} -scan_files3 <- scan_files2 %>% +scan_files3 <- scan_files2 %>% iso_mutate_file_info( # introduce new column `Run in 2019?` = `Date & Time` > "2019-01-01" & `Date & Time` < "2020-01-01" - ) + ) -scan_files3 %>% - iso_get_file_info() %>% +scan_files3 %>% + iso_get_file_info() %>% rmarkdown::paged_table() ``` @@ -144,18 +144,18 @@ The raw data read from the scan files can be retrieved similarly using the `iso_ # get raw data with default selections (all raw data, no additional file info) scan_files %>% iso_get_raw_data() %>% head(n=10) %>% rmarkdown::paged_table() # get specific raw data and add some file information -scan_files %>% +scan_files %>% iso_get_raw_data( # select just time and the two ions select = c(x, x_units, v44.mV, v45.mV), # include the scan type and rename the column include_file_info = c(`Scan Type` = type) - ) %>% + ) %>% # look at first few records only head(n=10) %>% rmarkdown::paged_table() ``` -# For expert users: retrieving all data +# For expert users: retrieving all data For users familiar with the nested data frames from the [tidyverse](https://www.tidyverse.org/) (particularly [tidyr](https://tidyr.tidyverse.org/)'s `nest` and `unnest`), there is an easy way to retrieve all data from the iso file objects in a single nested data frame: @@ -167,14 +167,14 @@ all_data <- scan_files %>% iso_get_all_data() # Saving collections -Saving entire collections of isofiles for retrieval at a later point is easily done using the `iso_save` function which stores collections or individual isoreader file objects in the efficient R data storage format `.rds` (if not specified, the extension `.scan.rds` will be automatically appended). These saved collections can be convientiently read back using the same `iso_read_scan` command used for raw data files. +Saving entire collections of isofiles for retrieval at a later point is easily done using the `iso_save` function which stores collections or individual isoreader file objects in the efficient R data storage format `.rds` (if not specified, the extension `.scan.rds` will be automatically appended). These saved collections can be conveniently read back using the same `iso_read_scan` command used for raw data files. ```{r} # export to R data archive scan_files %>% iso_save("scan_files_export.scan.rds") # read back the exported R data storage -iso_read_scan("scan_files_export.scan.rds") +iso_read_scan("scan_files_export.scan.rds") ``` # Data Export @@ -196,5 +196,3 @@ scan_files %>% iso_export_to_feather("scan_files_export") # exported feather files list.files(pattern = ".scan.feather") ``` - -