From 39e5fa24519a0989755a2031daa857f487fa1201 Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 15:20:47 +1000 Subject: [PATCH 01/11] update roxygen documention, website --- R/align_taxa.R | 76 +++++-- R/create_species_state_origin_matrix.R | 7 +- R/create_taxonomic_update_lookup.R | 78 +++++++- R/load_taxonomic_resources.R | 20 +- R/match_taxa.R | 21 +- R/native_anywhere_in_australia.R | 17 +- R/standardise_names.R | 37 ++-- R/state_diversity_counts.R | 13 +- R/strip_names.R | 33 +-- R/update_taxonomy.R | 116 +++++++---- R/word.R | 1 + _pkgdown.yml | 16 +- man/APCalign.Rd | 3 +- man/align_taxa.Rd | 213 +++++++++++++++----- man/create_species_state_origin_matrix.Rd | 12 +- man/create_taxonomic_update_lookup.Rd | 176 ++++++++++++---- man/load_taxonomic_resources.Rd | 33 ++- man/native_anywhere_in_australia.Rd | 31 ++- man/standardise_names.Rd | 20 +- man/standardise_taxon_rank.Rd | 13 +- man/state_diversity_counts.Rd | 23 ++- man/strip_names.Rd | 22 +- man/strip_names_extra.Rd | 17 +- man/update_taxonomy.Rd | 119 ++++++++--- man/word.Rd | 29 --- vignettes/articles/function_notes.Rmd | 232 ---------------------- 26 files changed, 820 insertions(+), 558 deletions(-) delete mode 100644 man/word.Rd delete mode 100644 vignettes/articles/function_notes.Rmd diff --git a/R/align_taxa.R b/R/align_taxa.R index b35654bc..5d36616e 100644 --- a/R/align_taxa.R +++ b/R/align_taxa.R @@ -1,19 +1,52 @@ +#' @title Align Australian plant scientific names to the APC or APNI +#' +#' @description #' For a list of Australian plant names, find taxonomic or scientific name -#' alignments to the APC or APNI through standardizing formatting -#' and fixing spelling errors +#' alignments to the APC or APNI through standardizing formatting and fixing +#' spelling errors. +#' +#' Usage case: Users will run this function if they wish to see the details +#' of the matching algorithms, the many output columns that the matching +#' function compares to as it seeks the best alignment. They may also select +#' this function if they want to adjust the “fuzziness” level for fuzzy +#' matches, options not allowed in create_taxonomic_update_lookup. This +#' function is the first half of create_taxonomic_update_lookup. #' -#' This function finds taxonomic alignments in APC or -#' scientific name alignments in APNI. -#' It uses the internal function `match_taxa` to attempt to match input strings -#' to taxon names in the APC/APNI. -#' It sequentially searches for matches against more than 20 different string +#' @details +#' - This function finds taxonomic alignments in APC or scientific name +#' alignments in APNI. +#' - It uses the internal function `match_taxa` to attempt to match input +#' strings to taxon names in the APC/APNI. +#' - It sequentially searches for matches against more than 20 different string #' patterns, prioritising exact matches (to accepted names as well as -#' synonyms, orthographic variants) over fuzzy matches. -#' It prioritises matches to taxa in the APC over names in the APNI. -#' It identifies string patterns in input names that suggest a name can only be -#' aligned to a genus (hybrids that are not in the APC/ANI; graded species; +#' synonyms, orthographic variants) over fuzzy matches. +#' - It prioritises matches to taxa in the APC over names in the APNI. +#' - It identifies string patterns in input names that suggest a name can only +#' be aligned to a genus (hybrids that are not in the APC/ANI; graded species; #' taxa not identified to species), and indicates these names only have a #' genus-rank match. +#' +#' Notes: +#' +#' - If you will be running the function APCalign::create_taxonomic_update_lookup +#' many times, it is best to load the taxonomic resources separately using +#' resources <- load_taxonomic_resources(), then add the argument +#' resources = resources +#' - The name Banksia cerrata does not align as the fuzzy matching algorithm +#' does not allow the first letter of the genus and species epithet to change. +#' - With this function you have the option of changing the fuzzy matching +#' parameters. The defaults, with fuzzy matches only allowing changes of 3 +#' (or fewer) characters AND 20% (or less) of characters has been carefully +#' calibrated to catch just about all typos, but very, very rarely mis-align +#' a name. If you wish to introduce less conservative fuzzy matching it is +#' recommended you manually check the aligned names. +#' - It is recommended that you begin with imprecise_fuzzy_matches = FALSE (the +#' default), as quite a few of the less precise fuzzy matches are likely to be +#' erroneous. This argument should be turned on only if you plan to check all +#' alignments manually. +#' - The argument identifier allows you to add a fix text string to all genus- +#' and family- level names, such as identifier = "Royal NP" would return "Acacia +#' sp. \[Royal NP]". #' #' @param original_name A list of names to query for taxonomic alignments. #' @param output (optional) The name of the file to save the results to. @@ -121,8 +154,25 @@ #' @export #' #' @examples -#' \donttest{align_taxa(c("Poa annua", "Abies alba"))} -#' +#' \donttest{ +#' resources <- load_taxonomic_resources() +#' +#' # example 1 +#' align_taxa(c("Poa annua", "Abies alba"), resources = resources) +#' +#' # example 2 +#' input <- c("Banksia serrata", "Banksia serrate", "Banksia cerrata", +#' "Banksia serrrrata", "Dryandra sp.", "Banksia big red flowers") +#' +#' aligned_taxa <- +#' APCalign::align_taxa( +#' original_name = input, +#' identifier = "APCalign test", +#' full = TRUE, +#' resources = resources +#' ) +#' +#' } #' #' #' @seealso diff --git a/R/create_species_state_origin_matrix.R b/R/create_species_state_origin_matrix.R index b6f409fc..c3e20bac 100644 --- a/R/create_species_state_origin_matrix.R +++ b/R/create_species_state_origin_matrix.R @@ -1,5 +1,8 @@ -#' Use the taxon distribution data from the APC to determine state level -#' native and introduced origin status +#' @title State level native and introduced origin status +#' +#' @description +#' This function uses the taxon distribution data from the APC to determine +#' state level native and introduced origin status. #' #' This function processes the geographic data available in the APC and #' returns state level native, introduced and more complicated origins status for all taxa. diff --git a/R/create_taxonomic_update_lookup.R b/R/create_taxonomic_update_lookup.R index 046666ef..caa71f72 100644 --- a/R/create_taxonomic_update_lookup.R +++ b/R/create_taxonomic_update_lookup.R @@ -1,12 +1,40 @@ -#' Create a lookup table with the best-possible scientific name match for a -#' list of Australian plant names +#' @title Create a table with the best-possible scientific name match for +#' Australian plant names #' -#' This function takes a list of Australian plant names that need to be -#' reconciled with current taxonomy and -#' generates a lookup table of the best-possible scientific name match for -#' each input name. -#' It uses first the function `align_taxa`, then the function `update_taxonomy` -#' to achieve the output. +#' @description +#' This function takes a list of Australian plant names that need to be +#' reconciled with current taxonomy and generates a lookup table of the +#' best-possible scientific name match for each input name. +#' +#' Usage case: This is APCalign’s core function, merging together the alignment +#' and updating of taxonomy. +#' +#' @details +#' - It uses first the function `align_taxa`, then the function `update_taxonomy` +#' to achieve the output. The aligned name is plant name that has been aligned +#' to a taxon name in the APC or APNI by the align_taxa function. +#' +#' Notes: +#' +#' - If you will be running the function APCalign::create_taxonomic_update_lookup +#' many times, it is best to load the taxonomic resources separately using +#' `resources <- load_taxonomic_resources()`, then add the argument +#' resources = resources +#' - The name Banksia cerrata does not align as the fuzzy matching algorithm +#' does not allow the first letter of the genus and species epithet to change. +#' - The argument taxonomic_splits allows you to choose the outcome for updating +#' the names of taxa with ambiguous taxonomic histories; this applies to +#' scientific names that were once attached to a more broadly circumscribed +#' taxon concept, that was then split into several more narrowly circumscribed +#' taxon concepts, one of which retains the original name. There are three +#' options: most_likely_species returns the name that is retained, with +#' alternative names documented in square brackets; return_all adds additional +#' rows to the output, one for each possible taxon concept; +#' collapse_to_higher_taxon returns the genus with possible names in square +#' brackets. +#' - The argument identifier allows you to add a fix text string to all genus- +#' and family- level names, such as identifier = "Royal NP" would return +#' `Acacia sp. \[Royal NP]`. #' #' @family taxonomic alignment functions #' @@ -93,13 +121,41 @@ #' #' @seealso \code{\link{load_taxonomic_resources}} #' @examples -#' \donttest{resources <- load_taxonomic_resources() +#' \donttest{ +#' resources <- load_taxonomic_resources() +#' +#' # example 1 #' create_taxonomic_update_lookup(c("Eucalyptus regnans", #' "Acacia melanoxylon", #' "Banksia integrifolia", #' "Not a species"), -#' resources=resources) -#'} +#' resources = resources) +#' +#' # example 2 +#' input <- c("Banksia serrata", "Banksia serrate", "Banksia cerrata", +#' "Banksea serrata", "Banksia serrrrata", "Dryandra") +#' +#' create_taxonomic_update_lookup( +#' taxa = input, +#' identifier = "APCalign test", +#' full = TRUE, +#' resources = resources +#' ) +#' +#' # example 3 +#' taxon_list <- +#' readr::read_csv(here("inst/", "extdata", "test_taxa.csv"), +#' show_col_types = FALSE +#' ) +#' +#' create_taxonomic_update_lookup( +#' taxa = taxon_list$original_name, +#' identifier = taxon_list$notes, +#' full = TRUE, +#' resources = resources +#' ) +#' } +#' create_taxonomic_update_lookup <- function(taxa, stable_or_current_data = "stable", version = default_version(), diff --git a/R/load_taxonomic_resources.R b/R/load_taxonomic_resources.R index a3fc9250..91b2acde 100644 --- a/R/load_taxonomic_resources.R +++ b/R/load_taxonomic_resources.R @@ -1,11 +1,15 @@ -#' Load taxonomic resources from either stable or current versions of APC and APNI -#' +#' @title Load taxonomic reference lists, APC & APNI +#' +#' @description #' This function loads two taxonomic datasets for Australia's vascular plants, -#' the APC and APNI, into the global environment. -#' It accesses taxonomic data from a dataset using the provided version number +#' the APC and APNI, into the global environment. It creates several data frames +#' by filtering and selecting data from the loaded lists. +#' +#' @details +#' - It accesses taxonomic data from a dataset using the provided version number #' or the default version. -#' The function creates several data frames by filtering and selecting data -#' from the loaded lists. +#' - The output is several dataframes that include subsets of the APC/APNI based +#' on taxon rank and taxonomic status. #' #' @param stable_or_current_data Type of dataset to access. #' The default is "stable", which loads the dataset from a github archived file. @@ -21,7 +25,9 @@ #' @export #' #' @examples -#' \donttest{load_taxonomic_resources(stable_or_current_data="stable", version="0.0.2.9000")} +#' \donttest{ +#' load_taxonomic_resources(stable_or_current_data="stable", +#' version="0.0.2.9000")} #' load_taxonomic_resources <- diff --git a/R/match_taxa.R b/R/match_taxa.R index 408d95db..d7050765 100644 --- a/R/match_taxa.R +++ b/R/match_taxa.R @@ -1,15 +1,22 @@ -#' Match taxonomic names to accepted names in list +#' @title Match taxonomic names to names in the APC/APNI #' -#' This function attempts to match input strings to a list of allowable -#' taxonomic names. -#' It cycles through more than 20 different string patterns, sequentially +#' @description +#' This function attempts to match input strings to Australia's reference lists +#' for vascular plants, the APC and APNI. It attempts: +#' 1. perfect matches and fuzzy matches +#' 2. matches to infraspecies, species, genus, and family names +#' 3. matches to the entire input string and subsets there-of +#' 4. searches for string patterns that suggest a specific taxon rank +#' +#' @details +#' - It cycles through more than 20 different string patterns, sequentially #' searching for additional match patterns. -#' It identifies string patterns in input names that suggest a name can only be +#' - It identifies string patterns in input names that suggest a name can only be #' aligned to a genus (hybrids that are not accepted names; graded species; #' taxa not identified to species). -#' It prioritises matches that do not require fuzzy matching (i.e. synonyms, +#' - It prioritises matches that do not require fuzzy matching (i.e. synonyms, #' orthographic variants) over those that do. -#' If prioritises matches to taxa in the APC over names in the APNI. +#' - If prioritises matches to taxa in the APC over names in the APNI. #' #' @param taxa The list of taxa requiring checking # diff --git a/R/native_anywhere_in_australia.R b/R/native_anywhere_in_australia.R index 026bfc8b..cd7aba60 100644 --- a/R/native_anywhere_in_australia.R +++ b/R/native_anywhere_in_australia.R @@ -1,14 +1,17 @@ -#' For a vector of taxon names in to the APC, check if the species are -#' native anywhere in Australia -#' +#' @title Native anywhere in Australia +#' +#' @description #' This function checks which species from a list is thought to be native anywhere in #' Australia according to the APC. -#' Important caveats: this will not detect within-Australia introductions, +#' +#' @details +#' Important caveats: +#' - This function will not detect within-Australia introductions, #' e.g. if a species is from Western Australia and is invasive on the east coast. -#' Also, very recent invasions are unlikely to be documented yet in APC. -#' Ideally check spelling and taxonomy updates first via +#' - Very recent invasions are unlikely to be documented yet in APC. +#' - Ideally check spelling and taxonomy updates first via #' \link{create_taxonomic_update_lookup}. -#' For the complete matrix of species by states that also represents +#' - For the complete matrix of species by states that also represents #' within-Australia invasions, use \link{create_species_state_origin_matrix}. #' #' @family diversity methods diff --git a/R/standardise_names.R b/R/standardise_names.R index 9f6207f6..a619734f 100644 --- a/R/standardise_names.R +++ b/R/standardise_names.R @@ -1,16 +1,26 @@ -#' Standardises taxon names by performing a series of text substitutions to remove common inconsistencies in taxonomic nomenclature. -#' +#' @title Standardise taxon names +#' +#' @description +#' Standardises taxon names by performing a series of text substitutions to +#' remove common inconsistencies in taxonomic nomenclature. +#' #' The function takes a character vector of taxon names as input and -#' returns a character vector of taxon names using standardised taxonomic syntax as output. -#' In particular it standardises taxon rank abbreviations and qualifiers (subsp., var., f.), as people use many variants of these terms. -#' It also standardises or removes a few additional filler words used within taxon names (affinis becomes aff.; s.l. and s.s. are removed). +#' returns a character vector of taxon names using standardised taxonomic syntax +#' as output. +#' +#' @details +#' - It removes stray punctuation at the start and end of a character string. +#' - It standardises unusual characters and symbols to ASCII equivalents. +#' - It standardises taxon rank abbreviations and qualifiers (subsp., var., f.), +#' as people use many variants of these terms. +#' - It standardises or removes a few additional filler words used within +#' taxon names (affinis becomes aff.; s.l. and s.s. are removed). #' #' @param taxon_names A character vector of taxon names that need to be standardised. #' #' @return A character vector of standardised taxon names. #' -#' #' @examples #' standardise_names(c("Quercus suber", #' "Eucalyptus sp.", @@ -149,15 +159,18 @@ extract_genus <- function(taxon_name) { } -#' Standardise taxon ranks from latin into english. -#' -#' The function takes a character vector of taxon ranks as input and -#' returns a character vector of taxon ranks using standardised english terms. +#' @title Standardise taxon ranks +#' +#' @description +#' Standardise taxon ranks from Latin into English. #' -#' @param taxon_rank A character vector of taxon ranks that need to be standardised. +#' @details +#' The function takes a character vector of Latin taxon ranks as input and +#' returns a character vector of taxon ranks using standardised English terms. #' -#' @return A character vector of standardised taxon names. +#' @param taxon_rank A character vector of Latin taxon ranks. #' +#' @return A character vector of English taxon ranks. #' #' @examples #' standardise_taxon_rank(c("regnum", "kingdom", "classis", "class")) diff --git a/R/state_diversity_counts.R b/R/state_diversity_counts.R index bb9c5bcc..ad0ce302 100644 --- a/R/state_diversity_counts.R +++ b/R/state_diversity_counts.R @@ -1,10 +1,9 @@ -#' For Australian states and territories, use data from the APC to calculate -#' state-level diversity for native, introduced, -#' and more complicated species origins -#' -#' This function calculates state-level diversity for native, introduced, -#' and more complicated species origins -#' based on the geographic data available in the APC. +#' @title State- and territory-level diversity +#' +#' @description +#' For Australian states and territories, use geographic distribution data from +#' the APC to calculate state-level diversity for native, introduced, +#' and more complicated species origins #' #' @family diversity methods #' @param state A character string indicating the Australian state or diff --git a/R/strip_names.R b/R/strip_names.R index 07f29296..bb1a365d 100644 --- a/R/strip_names.R +++ b/R/strip_names.R @@ -1,9 +1,15 @@ +#' @title Strip taxon names +#' +#' @description #' Strip taxonomic names of taxon rank abbreviations and qualifiers #' and special characters #' -#' Given a vector of taxonomic names, this function removes -#' subtaxa designations ("subsp.", "var.", "f.", and "ser"), -#' special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. +#' @details +#' Given a vector of taxonomic names, this function removes: +#' - subtaxa designations ("subsp.", "var.", "f.", and "ser") +#' - special characters (e.g., "-", ".", "(", ")", "?") +#' - extra whitespace +#' #' The resulting vector of names is also converted to lowercase. #' #' @param taxon_names A character vector of taxonomic names to be stripped. @@ -42,20 +48,21 @@ strip_names <- function(taxon_names) { stringr::str_to_lower() } -#' Strip taxonomic names of taxon rank abbreviations and qualifiers, -#' filler words and special characters -#' -#' Given a vector of taxonomic names, this function removes subtaxa -#' designations ("subsp.", "var.", "f.", and "ser"), -#' additional filler words and characters (" x " for hybrid taxa, "sp."), -#' special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. -#' The resulting vector of names is also converted to lowercase. +#' @title Strip taxon names, extra +#' +#' @description +#' Strip taxonomic names of `sp.` and hybrid symbols. This function assumes +#' that a character function has already been run through `strip_names`. +#' +#' @details +#' Given a vector of taxonomic names, this function removes additional filler +#' words (" x " for hybrid taxa, "sp.") not removed by the function +#' `strip_names` #' #' @param taxon_names A character vector of taxonomic names to be stripped. #' #' @return A character vector of stripped taxonomic names, -#' with subtaxa designations, special characters, additional filler words and -#' extra whitespace removed, and all letters converted to lowercase. +#' with `sp.` and hybrid symbols removed. #' #' #' @examples diff --git a/R/update_taxonomy.R b/R/update_taxonomy.R index e55a6737..84c5da4c 100644 --- a/R/update_taxonomy.R +++ b/R/update_taxonomy.R @@ -1,50 +1,92 @@ -#' For a list of taxon names aligned to the APC, update the name to an accepted taxon concept per the APC and add scientific name and taxon concept metadata to names aligned to either the APC or APNI. +#' @title Update to currently accepted APC name and add APC/APNI name metadata +#' +#' @description +#' For a list of taxon names aligned to the APC, update the name to an accepted +#' taxon concept per the APC and add scientific name and taxon concept metadata +#' to names aligned to either the APC or APNI. #' -#' This function uses the APC to update the taxonomy of names aligned to a taxon concept listed in the APC to the currently accepted name for the taxon concept. -#' The aligned_data data frame that is input must contain 5 columns, -#' `original_name`, `aligned_name`, `taxon_rank`, `taxonomic_dataset`, and `aligned_reason`. -#' The aligned name is a plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. +#' @details +#' - This function uses the APC to update the taxonomy of names aligned to a +#' taxon concept listed in the APC to the currently accepted name for the taxon +#' concept. +#' - The aligned_data data frame that is input must contain 5 columns, +#' `original_name`, `aligned_name`, `taxon_rank`, `taxonomic_dataset`, and +#' `aligned_reason`. (These are the columns output by the function `align_taxa`.) +#' - The aligned name is a plant name that has been aligned to a taxon name in +#' the APC or APNI by the align_taxa function. +#' +#' Notes: +#' - As the input for this function is a table with 5 columns (output by +#' align_taxa), this function will only be used when you explicitly want to +#' separate the aligment and updating components of APCalign. This function is +#' the second half of create_taxonomic_update_lookup. #' #' @family taxonomic alignment functions #' -#' @param aligned_data A tibble of plant names to update. This table must include 5 columns, original_name, aligned_name, taxon_rank, taxonomic_dataset, and aligned_reason. +#' @param aligned_data A tibble of plant names to update. This table must +#' include 5 columns, original_name, aligned_name, taxon_rank, +#' taxonomic_dataset, and aligned_reason. #' These columns are created by the function `align_taxa`. -#' The columns `original_name` and `aligned_name` must be in the format of the scientific name, with genus and species, -#' and may contain additional qualifiers such as subspecies or varieties. The names are case insensitive. -#' -#' @param taxonomic_splits Variable that determines what protocol to use to update taxon names that are ambiguous due to taxonomic splits. +#' The columns `original_name` and `aligned_name` must be in the format of the +#' scientific name, with genus and species, +#' and may contain additional qualifiers such as subspecies or varieties. The +#' names are case insensitive. +#' @param taxonomic_splits Variable that determines what protocol to use to +#' update taxon names that are ambiguous due to taxonomic splits. #' The three options are: -#' most_likely_species, which returns the species name in use before the split; alternative names are returned in a separate column -#' return_all, which returns all possible names -#' collapse_to_higher_taxon, which declares that an ambiguous name cannot be aligned to an accepted species/infraspecific name and the name is demoted to genus rank -#' @param quiet Logical to indicate whether to display messages while updating taxa. -#' @param output (optional) Name of the file where results are saved. The default is NULL and no file is created. -#' If specified, the output will be saved in a CSV file with the given name. -#' -#' @param resources the taxonomic resources required to make the summary statistics. Loading this can be slow, so call load_taxonomic_resources separately to greatly speed this function up and pass the resources in. +#' - `most_likely_species`, which returns the species name in use before the +#' split; alternative names are returned in a separate column +#' - `return_all`, which returns all possible names +#' - `collapse_to_higher_taxon`, which declares that an ambiguous name cannot +#' be aligned to an accepted species/infraspecific name and the name is +#' demoted to genus rank +#' @param quiet Logical to indicate whether to display messages while updating +#' taxa. +#' @param output (optional) Name of the file where results are saved. The +#' default is NULL and no file is created. If specified, the output will be +#' saved in a CSV file with the given name. +#' @param resources the taxonomic resources required to make the summary +#' statistics. Loading this can be slow, so call load_taxonomic_resources +#' separately to greatly speed this function up and pass the resources in. #' #' -#' @return A tibble with updated taxonomy for the specified plant names. The tibble contains the following columns: +#' @return A tibble with updated taxonomy for the specified plant names. The +#' tibble contains the following columns: #' - original_name: the original plant name. -#' - aligned_name: the input plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. +#' - aligned_name: the input plant name that has been aligned to a taxon name +#' in the APC or APNI by the align_taxa function. #' - accepted_name: the APC-accepted plant name, when available. -#' - suggested_name: the suggested plant name to use. Identical to the accepted_name, when an accepted_name exists; otherwise the the suggested_name is the aligned_name. -#' - genus: the genus of the accepted (or suggested) name; only APC-accepted genus names are filled in. -#' - family: the family of the accepted (or suggested) name; only APC-accepted family names are filled in. +#' - suggested_name: the suggested plant name to use. Identical to the +#' accepted_name, when an accepted_name exists; otherwise the the suggested_name +#' is the aligned_name. +#' - genus: the genus of the accepted (or suggested) name; only APC-accepted +#' genus names are filled in. +#' - family: the family of the accepted (or suggested) name; only APC-accepted +#' family names are filled in. #' - taxon_rank: the taxonomic rank of the suggested (and accepted) name. -#' - taxonomic_dataset: the source of the suggested (and accepted) names (APC or APNI). +#' - taxonomic_dataset: the source of the suggested (and accepted) names (APC or +#' APNI). #' - taxonomic_status: the taxonomic status of the suggested (and accepted) name. -#' - taxonomic_status_aligned: the taxonomic status of the aligned name, before any taxonomic updates have been applied. -#' - aligned_reason: the explanation of a specific taxon name alignment (from an original name to an aligned name). -#' - update_reason: the explanation of a specific taxon name update (from an aligned name to an accepted or suggested name). +#' - taxonomic_status_aligned: the taxonomic status of the aligned name, before +#' any taxonomic updates have been applied. +#' - aligned_reason: the explanation of a specific taxon name alignment (from an +#' original name to an aligned name). +#' - update_reason: the explanation of a specific taxon name update (from an +#' aligned name to an accepted or suggested name). #' - subclass: the subclass of the accepted name. -#' - taxon_distribution: the distribution of the accepted name; only filled in if an APC accepted_name is available. -#' - scientific_name_authorship: the authorship information for the accepted (or synonymous) name; available for both APC and APNI names. -#' - taxon_ID: the unique taxon concept identifier for the accepted_name; only filled in if an APC accepted_name is available. -#' - taxon_ID_genus: an identifier for the genus; only filled in if an APC-accepted genus name is available. -#' - scientific_name_ID: an identifier for the nomenclatural (not taxonomic) details of a scientific name; available for both APC and APNI names. +#' - taxon_distribution: the distribution of the accepted name; only filled in +#' if an APC accepted_name is available. +#' - scientific_name_authorship: the authorship information for the accepted +#' (or synonymous) name; available for both APC and APNI names. +#' - taxon_ID: the unique taxon concept identifier for the accepted_name; only +#' filled in if an APC accepted_name is available. +#' - taxon_ID_genus: an identifier for the genus; only filled in if an +#' APC-accepted genus name is available. +#' - scientific_name_ID: an identifier for the nomenclatural (not taxonomic) +#' details of a scientific name; available for both APC and APNI names. #' - row_number: the row number of a specific original_name in the input. -#' - number_of_collapsed_taxa: when taxonomic_splits == "collapse_to_higher_taxon", the number of possible taxon names that have been collapsed. +#' - number_of_collapsed_taxa: when taxonomic_splits == "collapse_to_higher_taxon", +#' the number of possible taxon names that have been collapsed. #' #' #' @seealso load_taxonomic_resources @@ -53,13 +95,17 @@ #' #' @examples #' # Update taxonomy for two plant names and print the result -#' \donttest{update_taxonomy( +#' \donttest{ +#' resources <- load_taxonomic_resources() +#' +#' update_taxonomy( #' dplyr::tibble( #' original_name = c("Dryandra preissii", "Banksia acuminata"), #' aligned_name = c("Dryandra preissii", "Banksia acuminata"), #' taxon_rank = c("species", "species"), #' taxonomic_dataset = c("APC", "APC"), -#' aligned_reason = NA_character_ +#' aligned_reason = NA_character_, +#' resources = resources #' ) #' ) #' } diff --git a/R/word.R b/R/word.R index 78f45b6e..0a762479 100644 --- a/R/word.R +++ b/R/word.R @@ -12,6 +12,7 @@ #' spp <- c("Banksia serrata", "Actinotus helanthii") #' APCalign:::word(spp, 1) #' APCalign:::word(spp, 2) +#' @noRd word <- function(string, start = 1L, end = start, sep = " ") { if(end == start) { stringr::str_split_i(string, " ", start) diff --git a/_pkgdown.yml b/_pkgdown.yml index 82ab72e0..da091977 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -16,27 +16,23 @@ navbar: articles: text: Articles menu: - - text: "Data providers" - - text: APC and APNI + - text: Data sources href: articles/articles/data-providers.html - - text: "Functions" - - text: Details on the 10 exported functions, including examples of usage - href: articles/function_notes.html - - text: "Taxon matching" - - text: Our fuzzy matching algorithm + - text: Taxon matching href: articles/updating-taxon-names.html - - text: "Reproducibility with APCalign" + - text: Using APC versions for reproducibility href: articles/reproducibility.html reference: -- subtitle: Standardise plant taxon names +- subtitle: Align and update taxon names - contents: - load_taxonomic_resources - default_version - create_taxonomic_update_lookup - align_taxa - update_taxonomy +- subtitle: Standardise and simplify plant taxon names - standardise_names - standardise_taxon_rank - strip_names @@ -46,6 +42,6 @@ reference: - create_species_state_origin_matrix - state_diversity_counts - native_anywhere_in_australia -- title: Data +- subtitle: Data - contents: - gbif_lite diff --git a/man/APCalign.Rd b/man/APCalign.Rd index 7d4907fb..9936d5a7 100644 --- a/man/APCalign.Rd +++ b/man/APCalign.Rd @@ -32,7 +32,8 @@ the established status of plant taxa across different states/territories. \references{ If you have any questions, comments or suggestions, please -submit an issue at our \href{https://github.com/traitecoevo/APCalign/issues}{GitHub repository} +submit an issue at our +\href{https://github.com/traitecoevo/APCalign/issues}{GitHub repository} } \seealso{ Useful links: diff --git a/man/align_taxa.Rd b/man/align_taxa.Rd index d4b5150b..c16a3240 100644 --- a/man/align_taxa.Rd +++ b/man/align_taxa.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/align_taxa.R \name{align_taxa} \alias{align_taxa} -\title{For a list of Australian plant names, find taxonomic or scientific name alignments to the APC or APNI through standardizing formatting and fixing spelling errors} +\title{Align Australian plant scientific names to the APC or APNI} \usage{ align_taxa( original_name, @@ -25,74 +25,187 @@ align_taxa( \item{full}{Parameter to determine how many columns are output} -\item{resources}{the taxonomic resources used to align the taxa names. Loading this can be slow, -so call \code{\link{load_taxonomic_resources}} separately to greatly speed this function up -and pass the resources in.} +\item{resources}{the taxonomic resources used to align the taxa names. +Loading this can be slow, so call \code{\link{load_taxonomic_resources}} +separately to greatly speed this function up and pass the resources in.} -\item{quiet}{Logical to indicate whether to display messages while aligning taxa.} +\item{quiet}{Logical to indicate whether to display messages while +aligning taxa.} -\item{fuzzy_abs_dist}{The number of characters allowed to be different for a fuzzy match.} +\item{fuzzy_abs_dist}{The number of characters allowed to be different for a +fuzzy match.} -\item{fuzzy_rel_dist}{The proportion of characters allowed to be different for a fuzzy match.} +\item{fuzzy_rel_dist}{The proportion of characters allowed to be different +for a fuzzy match.} -\item{fuzzy_matches}{Fuzzy matches are turned on as a default. The relative and absolute distances -allowed for fuzzy matches to species and infraspecific taxon names are defined by the parameters -\code{fuzzy_abs_dist} and \code{fuzzy_rel_dist}} +\item{fuzzy_matches}{Fuzzy matches are turned on as a default. +The relative and absolute distances allowed for fuzzy matches to species and +infraspecific taxon names are defined by the parameters \code{fuzzy_abs_dist} +and \code{fuzzy_rel_dist}} -\item{imprecise_fuzzy_matches}{Imprecise fuzzy matches uses the fuzzy matching function -with lenient levels set (absolute distance of 5 characters; relative distance = 0.25). -It offers a way to get a wider range of possible names, possibly corresponding to very distant spelling mistakes. -This is FALSE as default and all outputs should be checked as it often makes erroneous matches.} +\item{imprecise_fuzzy_matches}{Imprecise fuzzy matches uses the +fuzzy matching function with lenient levels set (absolute distance of +5 characters; relative distance = 0.25). +It offers a way to get a wider range of possible names, possibly +corresponding to very distant spelling mistakes. +This is FALSE as default and all outputs should be checked as it often +makes erroneous matches.} -\item{APNI_matches}{Name matches to the APNI (Australian Plant Names Index) are turned on as a default.} +\item{APNI_matches}{Name matches to the APNI (Australian Plant Names Index) +are turned on as a default.} -\item{identifier}{A dataset, location or other identifier, which defaults to NA.} +\item{identifier}{A dataset, location or other identifier, +which defaults to NA.} } \value{ -A tibble with columns that include original_name, aligned_name, taxonomic_dataset, taxon_rank, aligned_reason, alignment_code. +A tibble with columns that include original_name, aligned_name, +taxonomic_dataset, taxon_rank, aligned_reason, alignment_code. \itemize{ \item original_name: the original plant name input. -\item aligned_name: the original plant name after the function standardise_names has standardised the syntax of infraspecific taxon designations. +\item aligned_name: the original plant name after the function standardise_names +has standardised the syntax of infraspecific taxon designations. \item taxonomic_dataset: the source of the aligned names (APC or APNI). \item taxon_rank: the taxonomic rank of the aligned name. -\item aligned_reason: the explanation of a specific taxon name alignment (from an original name to an aligned name). -\item alignment_code: a code that accompanies the aligned_reason, indicating the relative sequence of the match during the alignment process. -\item cleaned_name: original name with punctuation and infraspecific taxon designation terms standardised by the function standardise_names; streamlines exact matches. -\item stripped_name: cleaned name with punctuation and infraspecific taxon designation terms removed by the function strip_names; improves fuzzy matches. -\item stripped_name2: cleaned name with punctuation, infraspecific taxon designation terms, and other filler words removed by the function strip_names_2; required for matches to \verb{first two word} and \verb{first three words}. -\item trinomial: the first three words in \code{stripped_name2}, required for matches that ignore all other text in the original_name; improves phrase name matches. -\item binomial: the first two words in \code{stripped_name2}, required for matches that ignore all other text in the original_name; improves phrase name matches. -\item genus: the first two words in \code{cleaned_name}; required for genus-rank matches and reprocessing of genus-rank names. -\item fuzzy_match_genus: fuzzy match of genus column to best match among APC-accepted names; required for fuzzy matches of genus-rank names. -\item fuzzy_match_genus_synonym: fuzzy match of genus column to best match among APC-synonymous names, only considering different matches to those documented under APC-accepted genera; required for fuzzy matches of genus-rank names. -\item fuzzy_match_genus_APNI: fuzzy match of genus column to best match among APNI names, only considering different matches to those documented under APC-accepted and APC-known genera; required for fuzzy matches of genus-rank names. -\item fuzzy_match_family: fuzzy match of genus column to best match among APC-accepted family names; required for fuzzy matches of family-rank names. -\item fuzzy_match_family_synonym: fuzzy match of genus column to best match among APC-synonymous family names; required for fuzzy matches of family-rank names. -\item fuzzy_match_cleaned_APC: fuzzy match of stripped_name to APC-accepted names; created for yet-to-be-aligned names at the match step 07a in the function \code{match_taxa}. -\item fuzzy_match_cleaned_APC_synonym: fuzzy match of stripped_name to APC-synonymous names; created for yet-to-be-aligned names at the match step 07b in the function \code{match_taxa}. -\item fuzzy_match_cleaned_APC_imprecise: imprecise fuzzy match of stripped_name to APC-accepted names; created for yet-to-be-aligned names at the match step 10a in the function \code{match_taxa}. -\item fuzzy_match_cleaned_APC_synonym_imprecise: imprecise fuzzy match of stripped_name to APC-accepted names; created for yet-to-be-aligned names at the match step 10b in the function \code{match_taxa}. -\item fuzzy_match_binomial: fuzzy match of binomial column to best match among APC-accepted names; created for yet-to-be-aligned names at match step 15a in the function \code{match_taxa}. -\item fuzzy_match_binomial_APC_synonym: fuzzy match of binomial column to best match among APC-synonymous names; created for yet-to-be-aligned names at match step 15a in the function \code{match_taxa}. -\item fuzzy_match_trinomial: fuzzy match of trinomial column to best match among APC-accepted names; created for yet-to-be-aligned names at match step 16a in the function \code{match_taxa}. -\item fuzzy_match_trinomial_synonym: fuzzy match of trinomial column to best match among APC-synonymous names; created for yet-to-be-aligned names at match step 16b in the function \code{match_taxa}. -\item fuzzy_match_cleaned_APNI: fuzzy match of stripped_name to APNI names; created for yet-to-be-aligned names at the match step 16a in the function \code{match_taxa}. -\item fuzzy_match_cleaned_APNI_imprecise: imprecise fuzzy match of stripped_name to APNI names; created for yet-to-be-aligned names at the match step 17a in the function \code{match_taxa}. +\item aligned_reason: the explanation of a specific taxon name alignment +(from an original name to an aligned name). +\item alignment_code: a code that accompanies the aligned_reason, indicating the +relative sequence of the match during the alignment process. +\item cleaned_name: original name with punctuation and infraspecific taxon +designation terms standardised by the function standardise_names; +streamlines exact matches. +\item stripped_name: cleaned name with punctuation and infraspecific taxon +designation terms removed by the function strip_names; +improves fuzzy matches. +\item stripped_name2: cleaned name with punctuation, infraspecific taxon +designation terms, and other filler words removed by +the function \code{strip_names_extra}; +required for matches to \verb{first two word} and \verb{first three words}. +\item trinomial: the first three words in \code{stripped_name2}, required for matches +that ignore all other text in the original_name; +improves phrase name matches. +\item binomial: the first two words in \code{stripped_name2}, required for matches +that ignore all other text in the original_name; +improves phrase name matches. +\item genus: the first two words in \code{cleaned_name}; +required for genus-rank matches and reprocessing of genus-rank names. +\item fuzzy_match_genus: fuzzy match of genus column to best match among +APC-accepted names; +required for fuzzy matches of genus-rank names. +\item fuzzy_match_genus_synonym: fuzzy match of genus column to best match among +APC-synonymous names, only considering different matches to those documented +under APC-accepted genera; required for fuzzy matches of genus-rank names. +\item fuzzy_match_genus_APNI: fuzzy match of genus column to best match among +APNI names, only considering different matches to those documented under +APC-accepted and APC-known genera; required for fuzzy matches of +genus-rank names. +\item fuzzy_match_family: fuzzy match of genus column to best match among +APC-accepted family names; required for fuzzy matches of family-rank names. +\item fuzzy_match_family_synonym: fuzzy match of genus column to best match +among APC-synonymous family names; required for fuzzy matches of +family-rank names. +\item fuzzy_match_cleaned_APC: fuzzy match of stripped_name to APC-accepted +names; created for yet-to-be-aligned names at the match step 05a +in the function \code{match_taxa}. +\item fuzzy_match_cleaned_APC_synonym: fuzzy match of stripped_name to +APC-synonymous names; created for yet-to-be-aligned names at the +match step 05b in the function \code{match_taxa}. +\item fuzzy_match_cleaned_APC_imprecise: imprecise fuzzy match of stripped_name +to APC-accepted names; created for yet-to-be-aligned names at the +match step 07a in the function \code{match_taxa}. +\item fuzzy_match_cleaned_APC_synonym_imprecise: imprecise fuzzy match of +stripped_name to APC-accepted names; created for yet-to-be-aligned names +at the match step 07b in the function \code{match_taxa}. +\item fuzzy_match_binomial: fuzzy match of binomial column to best match among +APC-accepted names; created for yet-to-be-aligned names at +match step 10c in the function \code{match_taxa}. +\item fuzzy_match_binomial_APC_synonym: fuzzy match of binomial column to best +match among APC-synonymous names; created for yet-to-be-aligned names at +match step 10d in the function \code{match_taxa}. +\item fuzzy_match_trinomial: fuzzy match of trinomial column to best match +among APC-accepted names; created for yet-to-be-aligned names at +match step 09c in the function \code{match_taxa}. +\item fuzzy_match_trinomial_synonym: fuzzy match of trinomial column to best +match among APC-synonymous names; created for yet-to-be-aligned names at +match step 09d in the function \code{match_taxa}. +\item fuzzy_match_cleaned_APNI: fuzzy match of stripped_name to APNI names; +created for yet-to-be-aligned names at the match step 11a in the +function \code{match_taxa}. +\item fuzzy_match_cleaned_APNI_imprecise: imprecise fuzzy match of +stripped_name to APNI names; created for yet-to-be-aligned names +at the match step 11b in the function \code{match_taxa}. } } \description{ -This function finds taxonomic alignments in APC or scientific name alignments in APNI. -It uses the internal function \code{match_taxa} to attempt to match input strings to taxon names in the APC/APNI. -It sequentially searches for matches against more than 20 different string patterns, -prioritising exact matches (to accepted names as well as synonyms, orthographic variants) over fuzzy matches. -It prioritises matches to taxa in the APC over names in the APNI. -It identifies string patterns in input names that suggest a name can only be aligned to a genus -(hybrids that are not in the APC/ANI; graded species; taxa not identified to species), -and indicates these names only have a genus-rank match. +For a list of Australian plant names, find taxonomic or scientific name +alignments to the APC or APNI through standardizing formatting and fixing +spelling errors. + +Usage case: Users will run this function if they wish to see the details +of the matching algorithms, the many output columns that the matching +function compares to as it seeks the best alignment. They may also select +this function if they want to adjust the “fuzziness” level for fuzzy +matches, options not allowed in create_taxonomic_update_lookup. This +function is the first half of create_taxonomic_update_lookup. +} +\details{ +\itemize{ +\item This function finds taxonomic alignments in APC or scientific name +alignments in APNI. +\item It uses the internal function \code{match_taxa} to attempt to match input +strings to taxon names in the APC/APNI. +\item It sequentially searches for matches against more than 20 different string +patterns, prioritising exact matches (to accepted names as well as +synonyms, orthographic variants) over fuzzy matches. +\item It prioritises matches to taxa in the APC over names in the APNI. +\item It identifies string patterns in input names that suggest a name can only +be aligned to a genus (hybrids that are not in the APC/ANI; graded species; +taxa not identified to species), and indicates these names only have a +genus-rank match. +} + +Notes: +\itemize{ +\item If you will be running the function APCalign::create_taxonomic_update_lookup +many times, it is best to load the taxonomic resources separately using +resources <- load_taxonomic_resources(), then add the argument +resources = resources +\item The name Banksia cerrata does not align as the fuzzy matching algorithm +does not allow the first letter of the genus and species epithet to change. +\item With this function you have the option of changing the fuzzy matching +parameters. The defaults, with fuzzy matches only allowing changes of 3 +(or fewer) characters AND 20\% (or less) of characters has been carefully +calibrated to catch just about all typos, but very, very rarely mis-align +a name. If you wish to introduce less conservative fuzzy matching it is +recommended you manually check the aligned names. +\item It is recommended that you begin with imprecise_fuzzy_matches = FALSE (the +default), as quite a few of the less precise fuzzy matches are likely to be +erroneous. This argument should be turned on only if you plan to check all +alignments manually. +\item The argument identifier allows you to add a fix text string to all genus- +and family- level names, such as identifier = "Royal NP" would return "Acacia +sp. [Royal NP]". +} } \examples{ -\donttest{align_taxa(c("Poa annua", "Abies alba"))} +\donttest{ +resources <- load_taxonomic_resources() + +# example 1 +align_taxa(c("Poa annua", "Abies alba"), resources = resources) +# example 2 +input <- c("Banksia serrata", "Banksia serrate", "Banksia cerrata", +"Banksia serrrrata", "Dryandra sp.", "Banksia big red flowers") + +aligned_taxa <- + APCalign::align_taxa( + original_name = input, + identifier = "APCalign test", + full = TRUE, + resources = resources + ) + +} } diff --git a/man/create_species_state_origin_matrix.Rd b/man/create_species_state_origin_matrix.Rd index ed019678..d427bdd5 100644 --- a/man/create_species_state_origin_matrix.Rd +++ b/man/create_species_state_origin_matrix.Rd @@ -2,17 +2,23 @@ % Please edit documentation in R/create_species_state_origin_matrix.R \name{create_species_state_origin_matrix} \alias{create_species_state_origin_matrix} -\title{Use the taxon distribution data from the APC to determine state level native and introduced origin status} +\title{State level native and introduced origin status} \usage{ create_species_state_origin_matrix(resources = load_taxonomic_resources()) } \arguments{ -\item{resources}{the taxonomic resources required to make the summary statistics. Loading this can be slow, so call load_taxonomic_resources separately to greatly speed this function up and pass the resources in.} +\item{resources}{the taxonomic resources required to make the summary statistics. +Loading this can be slow, so call load_taxonomic_resources separately to greatly +speed this function up and pass the resources in.} } \value{ -A tibble with columns representing each state and rows representing each species. The values in each cell represent the origin of the species in that state. +A tibble with columns representing each state and rows representing each +species. The values in each cell represent the origin of the species in that state. } \description{ +This function uses the taxon distribution data from the APC to determine +state level native and introduced origin status. + This function processes the geographic data available in the APC and returns state level native, introduced and more complicated origins status for all taxa. } diff --git a/man/create_taxonomic_update_lookup.Rd b/man/create_taxonomic_update_lookup.Rd index 6698e8ef..a0fd9db2 100644 --- a/man/create_taxonomic_update_lookup.Rd +++ b/man/create_taxonomic_update_lookup.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/create_taxonomic_update_lookup.R \name{create_taxonomic_update_lookup} \alias{create_taxonomic_update_lookup} -\title{Create a lookup table with the best-possible scientific name match for a list of Australian plant names} +\title{Create a table with the best-possible scientific name match for +Australian plant names} \usage{ create_taxonomic_update_lookup( taxa, @@ -22,75 +23,176 @@ create_taxonomic_update_lookup( ) } \arguments{ -\item{taxa}{A list of Australian plant species that needs to be reconciled with current taxonomy.} +\item{taxa}{A list of Australian plant species that needs to be reconciled +with current taxonomy.} -\item{stable_or_current_data}{either "stable" for a consistent version, or "current" for the leading edge version.} +\item{stable_or_current_data}{either "stable" for a consistent version, +or "current" for the leading edge version.} \item{version}{The version number of the dataset to use.} -\item{taxonomic_splits}{How to handle one_to_many taxonomic matches. Default is "return_all". The other options are "collapse_to_higher_taxon" and "most_likely_species". most_likely_species defaults to the original_name if that name is accepted by the APC; this will be right for certain species subsets, but make errors in other cases, use with caution.} +\item{taxonomic_splits}{How to handle one_to_many taxonomic matches. +Default is "return_all". The other options are "collapse_to_higher_taxon" +and "most_likely_species". most_likely_species defaults to the original_name +if that name is accepted by the APC; this will be right for certain species +subsets, but make errors in other cases, use with caution.} -\item{full}{logical for whether the full lookup table is returned or just key columns} +\item{full}{logical for whether the full lookup table is returned or +just key columns} -\item{fuzzy_abs_dist}{The number of characters allowed to be different for a fuzzy match.} +\item{fuzzy_abs_dist}{The number of characters allowed to be different for +a fuzzy match.} -\item{fuzzy_rel_dist}{The proportion of characters allowed to be different for a fuzzy match.} +\item{fuzzy_rel_dist}{The proportion of characters allowed to be different +for a fuzzy match.} -\item{fuzzy_matches}{Fuzzy matches are turned on as a default. The relative and absolute distances allowed for fuzzy matches to species and infraspecific taxon names are defined by the parameters \code{fuzzy_abs_dist} and \code{fuzzy_rel_dist}} +\item{fuzzy_matches}{Fuzzy matches are turned on as a default. The relative +and absolute distances allowed for fuzzy matches to species and +infraspecific taxon names are defined by the parameters \code{fuzzy_abs_dist} +and \code{fuzzy_rel_dist}.} -\item{APNI_matches}{Name matches to the APNI (Australian Plant Names Index) are turned off as a default.} +\item{APNI_matches}{Name matches to the APNI (Australian Plant Names Index) +are turned off as a default.} -\item{imprecise_fuzzy_matches}{Imprecise fuzzy matches uses the fuzzy matching function -with lenient levels set (absolute distance of 5 characters; relative distance = 0.25). -It offers a way to get a wider range of possible names, possibly corresponding to very distant spelling mistakes. -This is FALSE as default and all outputs should be checked as it often makes erroneous matches.} +\item{imprecise_fuzzy_matches}{Imprecise fuzzy matches uses the fuzzy +matching function with lenient levels set (absolute distance of +5 characters; relative distance = 0.25). +It offers a way to get a wider range of possible names, possibly +corresponding to very distant spelling mistakes. +This is FALSE as default and all outputs should be checked as it often +makes erroneous matches.} -\item{identifier}{A dataset, location or other identifier, which defaults to NA.} +\item{identifier}{A dataset, location or other identifier, +which defaults to NA.} -\item{resources}{These are the taxonomic resources used for cleaning, this will default to loading them from a local place on your computer. If this is to be called repeatedly, it's much faster to load the resources using \code{\link{load_taxonomic_resources}} separately and pass the data in.} +\item{resources}{These are the taxonomic resources used for cleaning, this +will default to loading them from a local place on your computer. If this is +to be called repeatedly, it's much faster to load the resources using +\code{\link{load_taxonomic_resources}} separately and pass the data in.} -\item{quiet}{Logical to indicate whether to display messages while aligning taxa.} +\item{quiet}{Logical to indicate whether to display messages while +aligning taxa.} -\item{output}{file path to save the output. If this file already exists, this function will check if it's a subset of the species passed in and try to add to this file. This can be useful for large and growing projects.} +\item{output}{file path to save the output. If this file already exists, +this function will check if it's a subset of the species passed in and try +to add to this file. This can be useful for large and growing projects.} } \value{ -A lookup table containing the accepted and suggested names for each original name input, and additional taxonomic information such as taxon rank, taxonomic status, taxon IDs and genera. +A lookup table containing the accepted and suggested names for each +original name input, and additional taxonomic information such as taxon +rank, taxonomic status, taxon IDs and genera. \itemize{ \item original_name: the original plant name. -\item aligned_name: the input plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. +\item aligned_name: the input plant name that has been aligned to a taxon name in +the APC or APNI by the align_taxa function. \item accepted_name: the APC-accepted plant name, when available. -\item suggested_name: the suggested plant name to use. Identical to the accepted_name, when an accepted_name exists; otherwise the the suggested_name is the aligned_name. -\item genus: the genus of the accepted (or suggested) name; only APC-accepted genus names are filled in. -\item family: the family of the accepted (or suggested) name; only APC-accepted family names are filled in. +\item suggested_name: the suggested plant name to use. Identical to the +accepted_name, when an accepted_name exists; +otherwise the the suggested_name is the aligned_name. +\item genus: the genus of the accepted (or suggested) name; +only APC-accepted genus names are filled in. +\item family: the family of the accepted (or suggested) name; +only APC-accepted family names are filled in. \item taxon_rank: the taxonomic rank of the suggested (and accepted) name. -\item taxonomic_dataset: the source of the suggested (and accepted) names (APC or APNI). +\item taxonomic_dataset: the source of the suggested (and accepted) names +(APC or APNI). \item taxonomic_status: the taxonomic status of the suggested (and accepted) name. -\item taxonomic_status_aligned: the taxonomic status of the aligned name, before any taxonomic updates have been applied. -\item aligned_reason: the explanation of a specific taxon name alignment (from an original name to an aligned name). -\item update_reason: the explanation of a specific taxon name update (from an aligned name to an accepted or suggested name). +\item taxonomic_status_aligned: the taxonomic status of the aligned name, +before any taxonomic updates have been applied. +\item aligned_reason: the explanation of a specific taxon name alignment +(from an original name to an aligned name). +\item update_reason: the explanation of a specific taxon name update +(from an aligned name to an accepted or suggested name). \item subclass: the subclass of the accepted name. -\item taxon_distribution: the distribution of the accepted name; only filled in if an APC accepted_name is available. -\item scientific_name_authorship: the authorship information for the accepted (or synonymous) name; available for both APC and APNI names. -\item taxon_ID: the unique taxon concept identifier for the accepted_name; only filled in if an APC accepted_name is available. -\item taxon_ID_genus: an identifier for the genus; only filled in if an APC-accepted genus name is available. -\item scientific_name_ID: an identifier for the nomenclatural (not taxonomic) details of a scientific name; available for both APC and APNI names. +\item taxon_distribution: the distribution of the accepted name; +only filled in if an APC accepted_name is available. +\item scientific_name_authorship: the authorship information for the accepted +(or synonymous) name; available for both APC and APNI names. +\item taxon_ID: the unique taxon concept identifier for the accepted_name; +only filled in if an APC accepted_name is available. +\item taxon_ID_genus: an identifier for the genus; +only filled in if an APC-accepted genus name is available. +\item scientific_name_ID: an identifier for the nomenclatural (not taxonomic) +details of a scientific name; available for both APC and APNI names. \item row_number: the row number of a specific original_name in the input. -\item number_of_collapsed_taxa: when taxonomic_splits == "collapse_to_higher_taxon", the number of possible taxon names that have been collapsed. +\item number_of_collapsed_taxa: when taxonomic_splits == "collapse_to_higher_taxon", +the number of possible taxon names that have been collapsed. } } \description{ -This function takes a list of Australian plant names that need to be reconciled with current taxonomy and -generates a lookup table of the best-possible scientific name match for each input name. -It uses first the function \code{align_taxa}, then the function \code{update_taxonomy} to achieve the output. +This function takes a list of Australian plant names that need to be +reconciled with current taxonomy and generates a lookup table of the +best-possible scientific name match for each input name. + +Usage case: This is APCalign’s core function, merging together the alignment +and updating of taxonomy. +} +\details{ +\itemize{ +\item It uses first the function \code{align_taxa}, then the function \code{update_taxonomy} +to achieve the output. The aligned name is plant name that has been aligned +to a taxon name in the APC or APNI by the align_taxa function. +} + +Notes: +\itemize{ +\item If you will be running the function APCalign::create_taxonomic_update_lookup +many times, it is best to load the taxonomic resources separately using +\code{resources <- load_taxonomic_resources()}, then add the argument +resources = resources +\item The name Banksia cerrata does not align as the fuzzy matching algorithm +does not allow the first letter of the genus and species epithet to change. +\item The argument taxonomic_splits allows you to choose the outcome for updating +the names of taxa with ambiguous taxonomic histories; this applies to +scientific names that were once attached to a more broadly circumscribed +taxon concept, that was then split into several more narrowly circumscribed +taxon concepts, one of which retains the original name. There are three +options: most_likely_species returns the name that is retained, with +alternative names documented in square brackets; return_all adds additional +rows to the output, one for each possible taxon concept; +collapse_to_higher_taxon returns the genus with possible names in square +brackets. +\item The argument identifier allows you to add a fix text string to all genus- +and family- level names, such as identifier = "Royal NP" would return +\verb{Acacia sp. \[Royal NP]}. +} } \examples{ -\donttest{resources <- load_taxonomic_resources() +\donttest{ +resources <- load_taxonomic_resources() + +# example 1 create_taxonomic_update_lookup(c("Eucalyptus regnans", "Acacia melanoxylon", "Banksia integrifolia", "Not a species"), - resources=resources) + resources = resources) + +# example 2 +input <- c("Banksia serrata", "Banksia serrate", "Banksia cerrata", +"Banksea serrata", "Banksia serrrrata", "Dryandra") + +create_taxonomic_update_lookup( + taxa = input, + identifier = "APCalign test", + full = TRUE, + resources = resources + ) + +# example 3 +taxon_list <- + readr::read_csv(here("inst/", "extdata", "test_taxa.csv"), + show_col_types = FALSE + ) + +create_taxonomic_update_lookup( + taxa = taxon_list$original_name, + identifier = taxon_list$notes, + full = TRUE, + resources = resources + ) } + } \seealso{ \code{\link{load_taxonomic_resources}} diff --git a/man/load_taxonomic_resources.Rd b/man/load_taxonomic_resources.Rd index 7650fc4f..d3d12eba 100644 --- a/man/load_taxonomic_resources.Rd +++ b/man/load_taxonomic_resources.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/load_taxonomic_resources.R \name{load_taxonomic_resources} \alias{load_taxonomic_resources} -\title{Load taxonomic resources from either stable or current versions of APC and APNI} +\title{Load taxonomic reference lists, APC & APNI} \usage{ load_taxonomic_resources( stable_or_current_data = "stable", @@ -11,23 +11,36 @@ load_taxonomic_resources( ) } \arguments{ -\item{stable_or_current_data}{Type of dataset to access. The default is "stable", which loads the -dataset from a github archived file. If set to "current", the dataset will be loaded from -a URL which is the cutting edge version, but this may change at any time without notice.} +\item{stable_or_current_data}{Type of dataset to access. +The default is "stable", which loads the dataset from a github archived file. +If set to "current", the dataset will be loaded from a URL which is the +cutting edge version, but this may change at any time without notice.} -\item{version}{The version number of the dataset to use. Defaults to the default version.} +\item{version}{The version number of the dataset to use. +Defaults to the default version.} -\item{quiet}{A logical indicating whether to print status of loading to screen. Defaults to FALSE.} +\item{quiet}{A logical indicating whether to print status of loading to screen. +Defaults to FALSE.} } \value{ The taxonomic resources data loaded into the global environment. } \description{ -This function loads two taxonomic datasets for Australia's vascular plants, the APC and APNI, into the global environment. -It accesses taxonomic data from a dataset using the provided version number or the default version. -The function creates several data frames by filtering and selecting data from the loaded lists. +This function loads two taxonomic datasets for Australia's vascular plants, +the APC and APNI, into the global environment. It creates several data frames +by filtering and selecting data from the loaded lists. +} +\details{ +\itemize{ +\item It accesses taxonomic data from a dataset using the provided version number +or the default version. +\item The output is several dataframes that include subsets of the APC/APNI based +on taxon rank and taxonomic status. +} } \examples{ -\donttest{load_taxonomic_resources(stable_or_current_data="stable",version="0.0.2.9000")} +\donttest{ +load_taxonomic_resources(stable_or_current_data="stable", +version="0.0.2.9000")} } diff --git a/man/native_anywhere_in_australia.Rd b/man/native_anywhere_in_australia.Rd index 537d4b28..f4e63c25 100644 --- a/man/native_anywhere_in_australia.Rd +++ b/man/native_anywhere_in_australia.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/native_anywhere_in_australia.R \name{native_anywhere_in_australia} \alias{native_anywhere_in_australia} -\title{For a vector of taxon names in to the APC, check if the species are native anywhere in Australia} +\title{Native anywhere in Australia} \usage{ native_anywhere_in_australia(species, resources = load_taxonomic_resources()) } @@ -10,19 +10,30 @@ native_anywhere_in_australia(species, resources = load_taxonomic_resources()) \item{species}{A character string typically representing the binomial for the species.} \item{resources}{An optional list of taxonomic resources to use for the lookup. -If not provided, the function will load default taxonomic resources using the \code{load_taxonomic_resources()} function.} +If not provided, the function will load default taxonomic resources using the +\code{load_taxonomic_resources()} function.} } \value{ -A tibble with two columns: \code{species}, which is the same as the unique values of the input \code{species}, -and \code{native_anywhere_in_aus}, a vector indicating whether each species is native anywhere in Australia, introduced by humans from elsewhere, or unknown with respect to the APC resource. +A tibble with two columns: \code{species}, which is the same as the unique values of +the input \code{species}, and \code{native_anywhere_in_aus}, a vector indicating whether each +species is native anywhere in Australia, introduced by humans from elsewhere, or +unknown with respect to the APC resource. } \description{ -This function checks which species from a list is thought to be native anywhere in Australia according to the APC. -Important caveats: this will not detect within-Australia introductions, e.g. if a species is from Western Australia and is invasive on the east coast. -Also, very recent invasions are unlikely to be documented yet in APC. -Ideally check spelling and taxonomy updates first via \link{create_taxonomic_update_lookup}. -For the complete matrix of species by states that also represents within-Australia invasions, -use \link{create_species_state_origin_matrix}. +This function checks which species from a list is thought to be native anywhere in +Australia according to the APC. +} +\details{ +Important caveats: +\itemize{ +\item This function will not detect within-Australia introductions, +e.g. if a species is from Western Australia and is invasive on the east coast. +\item Very recent invasions are unlikely to be documented yet in APC. +\item Ideally check spelling and taxonomy updates first via +\link{create_taxonomic_update_lookup}. +\item For the complete matrix of species by states that also represents +within-Australia invasions, use \link{create_species_state_origin_matrix}. +} } \examples{ \donttest{native_anywhere_in_australia(c("Eucalyptus globulus","Pinus radiata","Banksis notaspecies"))} diff --git a/man/standardise_names.Rd b/man/standardise_names.Rd index fc691262..7a9ad0b5 100644 --- a/man/standardise_names.Rd +++ b/man/standardise_names.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/standardise_names.R \name{standardise_names} \alias{standardise_names} -\title{Standardises taxon names by performing a series of text substitutions to remove common inconsistencies in taxonomic nomenclature.} +\title{Standardise taxon names} \usage{ standardise_names(taxon_names) } @@ -13,10 +13,22 @@ standardise_names(taxon_names) A character vector of standardised taxon names. } \description{ +Standardises taxon names by performing a series of text substitutions to +remove common inconsistencies in taxonomic nomenclature. + The function takes a character vector of taxon names as input and -returns a character vector of taxon names using standardised taxonomic syntax as output. -In particular it standardises taxon rank abbreviations and qualifiers (subsp., var., f.), as people use many variants of these terms. -It also standardises or removes a few additional filler words used within taxon names (affinis becomes aff.; s.l. and s.s. are removed). +returns a character vector of taxon names using standardised taxonomic syntax +as output. +} +\details{ +\itemize{ +\item It removes stray punctuation at the start and end of a character string. +\item It standardises unusual characters and symbols to ASCII equivalents. +\item It standardises taxon rank abbreviations and qualifiers (subsp., var., f.), +as people use many variants of these terms. +\item It standardises or removes a few additional filler words used within +taxon names (affinis becomes aff.; s.l. and s.s. are removed). +} } \examples{ standardise_names(c("Quercus suber", diff --git a/man/standardise_taxon_rank.Rd b/man/standardise_taxon_rank.Rd index 73b6f2b0..23af4949 100644 --- a/man/standardise_taxon_rank.Rd +++ b/man/standardise_taxon_rank.Rd @@ -2,19 +2,22 @@ % Please edit documentation in R/standardise_names.R \name{standardise_taxon_rank} \alias{standardise_taxon_rank} -\title{Standardise taxon ranks from latin into english.} +\title{Standardise taxon ranks} \usage{ standardise_taxon_rank(taxon_rank) } \arguments{ -\item{taxon_rank}{A character vector of taxon ranks that need to be standardised.} +\item{taxon_rank}{A character vector of Latin taxon ranks.} } \value{ -A character vector of standardised taxon names. +A character vector of English taxon ranks. } \description{ -The function takes a character vector of taxon ranks as input and -returns a character vector of taxon ranks using standardised english terms. +Standardise taxon ranks from Latin into English. +} +\details{ +The function takes a character vector of Latin taxon ranks as input and +returns a character vector of taxon ranks using standardised English terms. } \examples{ standardise_taxon_rank(c("regnum", "kingdom", "classis", "class")) diff --git a/man/state_diversity_counts.Rd b/man/state_diversity_counts.Rd index 9f2e3f68..1d5f0332 100644 --- a/man/state_diversity_counts.Rd +++ b/man/state_diversity_counts.Rd @@ -2,22 +2,31 @@ % Please edit documentation in R/state_diversity_counts.R \name{state_diversity_counts} \alias{state_diversity_counts} -\title{For Australian states and territories, use data from the APC to calculate state-level diversity for native, introduced, and more complicated species origins} +\title{State- and territory-level diversity} \usage{ state_diversity_counts(state, resources = load_taxonomic_resources()) } \arguments{ -\item{state}{A character string indicating the Australian state or territory to calculate the diversity for. Possible values are "NSW", "NT", "Qld", "WA", "ChI", "SA", "Vic", "Tas", "ACT", "NI", "LHI", "MI", "HI", "MDI", "CoI", "CSI", and "AR".} +\item{state}{A character string indicating the Australian state or +territory to calculate the diversity for. Possible values are "NSW", "NT", +"Qld", "WA", "ChI", "SA", "Vic", "Tas", "ACT", "NI", "LHI", "MI", "HI", +"MDI", "CoI", "CSI", and "AR".} -\item{resources}{the taxonomic resources required to make the summary statistics. loading this can be slow, so call load_taxonomic_resources separately to greatly speed this function up and pass the resources in.} +\item{resources}{the taxonomic resources required to make the summary +statistics. loading this can be slow, so call load_taxonomic_resources +separately to greatly speed this function up and pass the resources in.} } \value{ -A tibble of diversity counts for the specified state or territory, including native, introduced, and more complicated species origins. -The tibble has three columns: "origin" indicating the origin of the species, "state" indicating the Australian state or territory, and "num_species" indicating the number of species for that origin and state. +A tibble of diversity counts for the specified state or territory, +including native, introduced, and more complicated species origins. +The tibble has three columns: "origin" indicating the origin of the +species, "state" indicating the Australian state or territory, and +"num_species" indicating the number of species for that origin and state. } \description{ -This function calculates state-level diversity for native, introduced, and more complicated species origins -based on the geographic data available in the APC. +For Australian states and territories, use geographic distribution data from +the APC to calculate state-level diversity for native, introduced, +and more complicated species origins } \examples{ \donttest{state_diversity_counts(state = "NSW")} diff --git a/man/strip_names.Rd b/man/strip_names.Rd index 459288c4..ea26df1e 100644 --- a/man/strip_names.Rd +++ b/man/strip_names.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/strip_names.R \name{strip_names} \alias{strip_names} -\title{Strip taxonomic names of taxon rank abbreviations and qualifiers and special characters} +\title{Strip taxon names} \usage{ strip_names(taxon_names) } @@ -10,13 +10,23 @@ strip_names(taxon_names) \item{taxon_names}{A character vector of taxonomic names to be stripped.} } \value{ -A character vector of stripped taxonomic names, with subtaxa designations, special -characters, and extra whitespace removed, and all letters converted to lowercase. +A character vector of stripped taxonomic names, +with subtaxa designations, special characters, and extra whitespace +removed, and all letters converted to lowercase. } \description{ -Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), -special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector -of names is also converted to lowercase. +Strip taxonomic names of taxon rank abbreviations and qualifiers +and special characters +} +\details{ +Given a vector of taxonomic names, this function removes: +\itemize{ +\item subtaxa designations ("subsp.", "var.", "f.", and "ser") +\item special characters (e.g., "-", ".", "(", ")", "?") +\item extra whitespace +} + +The resulting vector of names is also converted to lowercase. } \examples{ strip_names(c("Abies lasiocarpa subsp. lasiocarpa", diff --git a/man/strip_names_extra.Rd b/man/strip_names_extra.Rd index 5c5c92c9..ff26a3ca 100644 --- a/man/strip_names_extra.Rd +++ b/man/strip_names_extra.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/strip_names.R \name{strip_names_extra} \alias{strip_names_extra} -\title{Strip taxonomic names of taxon rank abbreviations and qualifiers, filler words and special characters} +\title{Strip taxon names, extra} \usage{ strip_names_extra(taxon_names) } @@ -10,14 +10,17 @@ strip_names_extra(taxon_names) \item{taxon_names}{A character vector of taxonomic names to be stripped.} } \value{ -A character vector of stripped taxonomic names, with subtaxa designations, special -characters, additional filler words and extra whitespace removed, and all letters converted to lowercase. +A character vector of stripped taxonomic names, +with \code{sp.} and hybrid symbols removed. } \description{ -Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), -additional filler words and characters (" x " for hybrid taxa, "sp."), -special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector -of names is also converted to lowercase. +Strip taxonomic names of \code{sp.} and hybrid symbols. This function assumes +that a character function has already been run through \code{strip_names}. +} +\details{ +Given a vector of taxonomic names, this function removes additional filler +words (" x " for hybrid taxa, "sp.") not removed by the function +\code{strip_names} } \examples{ strip_names_extra(c("Abies lasiocarpa subsp. lasiocarpa", diff --git a/man/update_taxonomy.Rd b/man/update_taxonomy.Rd index 0b33a033..042624ac 100644 --- a/man/update_taxonomy.Rd +++ b/man/update_taxonomy.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/update_taxonomy.R \name{update_taxonomy} \alias{update_taxonomy} -\title{For a list of taxon names aligned to the APC, update the name to an accepted taxon concept per the APC and add scientific name and taxon concept metadata to names aligned to either the APC or APNI.} +\title{Update to currently accepted APC name and add APC/APNI name metadata} \usage{ update_taxonomy( aligned_data, @@ -13,64 +13,117 @@ update_taxonomy( ) } \arguments{ -\item{aligned_data}{A tibble of plant names to update. This table must include 5 columns, original_name, aligned_name, taxon_rank, taxonomic_dataset, and aligned_reason. +\item{aligned_data}{A tibble of plant names to update. This table must +include 5 columns, original_name, aligned_name, taxon_rank, +taxonomic_dataset, and aligned_reason. These columns are created by the function \code{align_taxa}. -The columns \code{original_name} and \code{aligned_name} must be in the format of the scientific name, with genus and species, -and may contain additional qualifiers such as subspecies or varieties. The names are case insensitive.} +The columns \code{original_name} and \code{aligned_name} must be in the format of the +scientific name, with genus and species, +and may contain additional qualifiers such as subspecies or varieties. The +names are case insensitive.} -\item{taxonomic_splits}{Variable that determines what protocol to use to update taxon names that are ambiguous due to taxonomic splits. +\item{taxonomic_splits}{Variable that determines what protocol to use to +update taxon names that are ambiguous due to taxonomic splits. The three options are: -most_likely_species, which returns the species name in use before the split; alternative names are returned in a separate column -return_all, which returns all possible names -collapse_to_higher_taxon, which declares that an ambiguous name cannot be aligned to an accepted species/infraspecific name and the name is demoted to genus rank} +\itemize{ +\item \code{most_likely_species}, which returns the species name in use before the +split; alternative names are returned in a separate column +\item \code{return_all}, which returns all possible names +\item \code{collapse_to_higher_taxon}, which declares that an ambiguous name cannot +be aligned to an accepted species/infraspecific name and the name is +demoted to genus rank +}} -\item{quiet}{Logical to indicate whether to display messages while updating taxa.} +\item{quiet}{Logical to indicate whether to display messages while updating +taxa.} -\item{output}{(optional) Name of the file where results are saved. The default is NULL and no file is created. -If specified, the output will be saved in a CSV file with the given name.} +\item{output}{(optional) Name of the file where results are saved. The +default is NULL and no file is created. If specified, the output will be +saved in a CSV file with the given name.} -\item{resources}{the taxonomic resources required to make the summary statistics. Loading this can be slow, so call load_taxonomic_resources separately to greatly speed this function up and pass the resources in.} +\item{resources}{the taxonomic resources required to make the summary +statistics. Loading this can be slow, so call load_taxonomic_resources +separately to greatly speed this function up and pass the resources in.} } \value{ -A tibble with updated taxonomy for the specified plant names. The tibble contains the following columns: +A tibble with updated taxonomy for the specified plant names. The +tibble contains the following columns: \itemize{ \item original_name: the original plant name. -\item aligned_name: the input plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. +\item aligned_name: the input plant name that has been aligned to a taxon name +in the APC or APNI by the align_taxa function. \item accepted_name: the APC-accepted plant name, when available. -\item suggested_name: the suggested plant name to use. Identical to the accepted_name, when an accepted_name exists; otherwise the the suggested_name is the aligned_name. -\item genus: the genus of the accepted (or suggested) name; only APC-accepted genus names are filled in. -\item family: the family of the accepted (or suggested) name; only APC-accepted family names are filled in. +\item suggested_name: the suggested plant name to use. Identical to the +accepted_name, when an accepted_name exists; otherwise the the suggested_name +is the aligned_name. +\item genus: the genus of the accepted (or suggested) name; only APC-accepted +genus names are filled in. +\item family: the family of the accepted (or suggested) name; only APC-accepted +family names are filled in. \item taxon_rank: the taxonomic rank of the suggested (and accepted) name. -\item taxonomic_dataset: the source of the suggested (and accepted) names (APC or APNI). +\item taxonomic_dataset: the source of the suggested (and accepted) names (APC or +APNI). \item taxonomic_status: the taxonomic status of the suggested (and accepted) name. -\item taxonomic_status_aligned: the taxonomic status of the aligned name, before any taxonomic updates have been applied. -\item aligned_reason: the explanation of a specific taxon name alignment (from an original name to an aligned name). -\item update_reason: the explanation of a specific taxon name update (from an aligned name to an accepted or suggested name). +\item taxonomic_status_aligned: the taxonomic status of the aligned name, before +any taxonomic updates have been applied. +\item aligned_reason: the explanation of a specific taxon name alignment (from an +original name to an aligned name). +\item update_reason: the explanation of a specific taxon name update (from an +aligned name to an accepted or suggested name). \item subclass: the subclass of the accepted name. -\item taxon_distribution: the distribution of the accepted name; only filled in if an APC accepted_name is available. -\item scientific_name_authorship: the authorship information for the accepted (or synonymous) name; available for both APC and APNI names. -\item taxon_ID: the unique taxon concept identifier for the accepted_name; only filled in if an APC accepted_name is available. -\item taxon_ID_genus: an identifier for the genus; only filled in if an APC-accepted genus name is available. -\item scientific_name_ID: an identifier for the nomenclatural (not taxonomic) details of a scientific name; available for both APC and APNI names. +\item taxon_distribution: the distribution of the accepted name; only filled in +if an APC accepted_name is available. +\item scientific_name_authorship: the authorship information for the accepted +(or synonymous) name; available for both APC and APNI names. +\item taxon_ID: the unique taxon concept identifier for the accepted_name; only +filled in if an APC accepted_name is available. +\item taxon_ID_genus: an identifier for the genus; only filled in if an +APC-accepted genus name is available. +\item scientific_name_ID: an identifier for the nomenclatural (not taxonomic) +details of a scientific name; available for both APC and APNI names. \item row_number: the row number of a specific original_name in the input. -\item number_of_collapsed_taxa: when taxonomic_splits == "collapse_to_higher_taxon", the number of possible taxon names that have been collapsed. +\item number_of_collapsed_taxa: when taxonomic_splits == "collapse_to_higher_taxon", +the number of possible taxon names that have been collapsed. } } \description{ -This function uses the APC to update the taxonomy of names aligned to a taxon concept listed in the APC to the currently accepted name for the taxon concept. -The aligned_data data frame that is input must contain 5 columns, -\code{original_name}, \code{aligned_name}, \code{taxon_rank}, \code{taxonomic_dataset}, and \code{aligned_reason}. -The aligned name is a plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. +For a list of taxon names aligned to the APC, update the name to an accepted +taxon concept per the APC and add scientific name and taxon concept metadata +to names aligned to either the APC or APNI. +} +\details{ +\itemize{ +\item This function uses the APC to update the taxonomy of names aligned to a +taxon concept listed in the APC to the currently accepted name for the taxon +concept. +\item The aligned_data data frame that is input must contain 5 columns, +\code{original_name}, \code{aligned_name}, \code{taxon_rank}, \code{taxonomic_dataset}, and +\code{aligned_reason}. (These are the columns output by the function \code{align_taxa}.) +\item The aligned name is a plant name that has been aligned to a taxon name in +the APC or APNI by the align_taxa function. +} + +Notes: +\itemize{ +\item As the input for this function is a table with 5 columns (output by +align_taxa), this function will only be used when you explicitly want to +separate the aligment and updating components of APCalign. This function is +the second half of create_taxonomic_update_lookup. +} } \examples{ # Update taxonomy for two plant names and print the result -\donttest{update_taxonomy( +\donttest{ +resources <- load_taxonomic_resources() + +update_taxonomy( dplyr::tibble( original_name = c("Dryandra preissii", "Banksia acuminata"), aligned_name = c("Dryandra preissii", "Banksia acuminata"), taxon_rank = c("species", "species"), taxonomic_dataset = c("APC", "APC"), - aligned_reason = NA_character_ + aligned_reason = NA_character_, + resources = resources ) ) } diff --git a/man/word.Rd b/man/word.Rd deleted file mode 100644 index 2c70bbe3..00000000 --- a/man/word.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/word.R -\name{word} -\alias{word} -\title{Extract words from a sentence. Intended as a faster -replacement for stringr::word} -\usage{ -word(string, start = 1L, end = start, sep = " ") -} -\arguments{ -\item{string}{A character vector} - -\item{start, end}{Pair of integer vectors giving range of words (inclusive) -to extract. The default value select the first word.} - -\item{sep}{Separator between words. Defaults to single space.} -} -\value{ -A character vector with the same length as \code{string}/\code{start}/\code{end}. -} -\description{ -Extract words from a sentence. Intended as a faster -replacement for stringr::word -} -\examples{ -spp <- c("Banksia serrata", "Actinotus helanthii") -APCalign:::word(spp, 1) -APCalign:::word(spp, 2) -} diff --git a/vignettes/articles/function_notes.Rmd b/vignettes/articles/function_notes.Rmd deleted file mode 100644 index 1fea0b8d..00000000 --- a/vignettes/articles/function_notes.Rmd +++ /dev/null @@ -1,232 +0,0 @@ ---- -title: "APCalign functions" -author: "Elizabeth Wenk" -date: "2024-01-22" -output: html_document ---- - -# APCalign functions - -APCalign exports [10 functions](https://traitecoevo.github.io/APCalign/reference/index.html) to facilitate the alignment of submitted plant names to scientific names on the APC and APNI lists. They are listed in order of likelihood of use. - -## Taxon name alignment and updating functions - -### create_taxonomic_update_lookup - -**description**: This function takes a list of Australian plant names that need to be reconciled with current taxonomy and generates a lookup table of the best-possible scientific name match for each input name. It uses first the function `align_taxa`, then the function `update_taxonomy` to achieve the output. The aligned name is plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. - -**usage notes**: This is APCalign's core function, merging together the alignment and updating of taxonomy. - -**arguments**: - -``` -taxa #input vector of taxon names -stable_or_current_data = "stable" -version = default_version() -taxonomic_splits = "most_likely_species" #options for names with ambiguous taxonomic histories -full = FALSE #outputs fewer (FALSE) or more (TRUE) columns -APNI_matches = TRUE #include (TRUE) or exclude (FALSE) APNI list -imprecise_fuzzy_matches = FALSE #disallow (FALSE) or allow (TRUE) imprecise fuzzy matches -identifier = NA_character_ #include a unique identifier as part of informal names -resources = load_taxonomic_resources() -output = NULL -``` - -**output**: A data frame with rows representing each taxon and columns documenting taxon metadata (*original_name, aligned_name, accepted_name, suggested_name, genus, family, taxon_rank, taxonomic_dataset, taxonomic_status, taxonomic_status_aligned, aligned_reason, update_reason, subclass, taxon_distribution, scientific_name_authorship, taxon_ID, taxon_ID_genus, scientific_name_ID, row_number, number_of_collapsed_taxa*). - -**example**: - -```{r, eval = FALSE, echo = TRUE} -input <- c("Banksia serrata", "Banksia serrate", "Banksia cerrata", "Banksea serrata", "Banksia serrrrata", "Dryandra") -resources <- load_taxonomic_resources() - -updated_taxa <- - APCalign::create_taxonomic_update_lookup( - taxa = input, - identifier = "APCalign test", - full = TRUE, - resources = resources - ) -``` - -or, start with a csv file where there is a column of taxon names to align - -```{r, eval = FALSE, echo = TRUE} -taxon_list <- #or load data through the R studio menu - readr::read_csv(here("inst/", "extdata", "test_taxa.csv"), - show_col_types = FALSE - ) -resources <- load_taxonomic_resources() - -updated_taxa <- - APCalign::create_taxonomic_update_lookup( - taxa = taxon_list$original_name, - identifier = "APCalign test", - full = TRUE, - resources = resources - ) -``` - -**notes**\ -- If you will be running the function `APCalign::create_taxonomic_update_lookup` many times, it is best to load the taxonomic resources separately using `resources <- load_taxonomic_resources()`, then add the argument `resources = resources`\ -- The name `Banksia cerrata` does not align as the fuzzy matching algorithm does not allow the first letter of the genus and species epithet to change.\ -- The argument `taxonomic_splits` allows you to choose the outcome for updating the names of taxa with ambiguous taxonomic histories; this applies to scientific names that were once attached to a more broadly circumscribed taxon concept, that was then split into several more narrowly circumscribed taxon concepts, one of which retains the original name. There are three options: `most_likely_species` returns the name that is retained, with alternative names documented in square brackets; `return_all` adds additional rows to the output, one for each possible taxon concept; `collapse_to_higher_taxon` returns the genus with possible names in square brackets.\ -- The argument `identifier` allows you to add a fix text string to all genus- and family- level names, such as `identifier = "Royal NP"` would return \`Acacia sp. [Royal NP]`. - -### align_taxa - -**description**: This function finds taxonomic alignments in the APC or APNI. It uses the internal function `match_taxa` to attempt to match input strings to taxon names in the APC/APNI. It sequentially searches for matches against more than 20 different string patterns, prioritising exact matches (to accepted names as well as synonyms, orthographic variants) over fuzzy matches. It prioritises matches to taxa in the APC over names in the APNI. It identifies string patterns in input names that suggest a name can only be aligned to a genus (hybrids that are not in the APC/ANI; graded species; taxa not identified to species), and indicates these names only have a genus-rank match. - -**usage notes**: Users will run this function if they wish to see the details of the matching algorithms, the many output columns that the matching function compares to as it seeks the best alignment. They may also select this function if they want to adjust the "fuzziness" level for fuzzy matches, options not allowed in `create_taxonomic_update_lookup`. This function is the first half of `create_taxonomic_update_lookup`. - -**arguments**: - -``` -original_name #input vector of taxon names -output = NULL -full = FALSE #outputs fewer (FALSE) or more (TRUE) columns -resources = load_taxonomic_resources() -fuzzy_abs_dist = 3 #set number of characters allowed to be different for fuzzy match -fuzzy_rel_dist = 0.2 #set proportion of characters allowed to be different for fuzzy match -fuzzy_matches = TRUE #disallow (FALSE) or allow (TRUE) any fuzzy matches -imprecise_fuzzy_matches = FALSE #disallow (FALSE) or allow (TRUE) imprecise fuzzy matches -APNI_matches = TRUE #include (TRUE) or exclude (FALSE) APNI list -identifier = NA_character #include a unique identifier as part of informal names -``` - -**output**: A data frame with rows representing each taxon and with columns documenting the alignment made, the reason for this alignment, and a selection of taxon name mutations to which the original name was compared (*original_name, aligned_name, taxonomic_dataset, taxon_rank, aligned_reason, alignment_code, cleaned_name, stripped_name, stripped_name2, trinomial, binomial, genus, fuzzy_match_genus, fuzzy_match_genus_synonym, fuzzy_match_genus_APNI, fuzzy_match_cleaned_APC, fuzzy_match_cleaned_APC_synonym, fuzzy_match_cleaned_APC_imprecise, fuzzy_match_cleaned_APC_synonym_imprecise, fuzzy_match_binomial, fuzzy_match_binomial_APC_synonym, fuzzy_match_trinomial, fuzzy_match_trinomial_synonym, fuzzy_match_cleaned_APNI, fuzzy_match_cleaned_APNI_imprecise*). - -**example**: - -```{r, eval = FALSE, echo = TRUE} -input <- c("Banksia serrata", "Banksia serrate", "Banksia cerrata", "Banksia serrrrata", "Dryandra sp.", "Banksia big red flowers") -resources <- load_taxonomic_resources() - - -aligned_taxa <- - APCalign::align_taxa( - original_name = input, - identifier = "APCalign test", - full = TRUE, - resources = resources - ) -``` - -**notes**\ -- If you will be running the function `APCalign::create_taxonomic_update_lookup` many times, it is best to load the taxonomic resources separately using `resources <- load_taxonomic_resources()`, then add the argument `resources = resources`\ -- The name `Banksia cerrata` does not align as the fuzzy matching algorithm does not allow the first letter of the genus and species epithet to change.\ -- With this function you have the option of changing the fuzzy matching parameters. The defaults, with fuzzy matches only allowing changes of 3 (or fewer) characters AND 20% (or less) of characters has been carefully calibrated to catch just about all typos, but very, very rarely mis-align a name. If you wish to introduce less conservative fuzzy matching it is recommended you manually check the aligned names.\ -- It is recommended that you begin with `imprecise_fuzzy_matches = FALSE` (the default), as quite a few of the less precise fuzzy matches are likely to be erroneous. This argument should be turned on only if you plan to check all alignments manually.\ -- The argument `identifier` allows you to add a fix text string to all genus- and family- level names, such as `identifier = "Royal NP"` would return `Acacia sp. [Royal NP]`. - -### update_taxonomy - -**description**: This function uses the APC to update the taxonomy of names aligned to a taxon concept listed in the APC to the currently accepted name for the taxon concept. The aligned_data data frame that is input must contain 5 columns, `originial_name`, `aligned_name`, `taxon_rank`, `taxonomic_dataset`, and `aligned_reason`, the columns output by the function `APCalign::align_taxa()`. The aligned name is a plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. - -**usage notes**: As the input for this function is a table with 5 columns (output by `align_taxa`), this function will only be used when you explicitly want to separate the `aligment` and `updating` components of APCalign. This function is the second half of `create_taxonomic_update_lookup`. - -**arguments**: - -``` -aligned_data #input table of aligned names and information about the aligned name -taxonomic_splits = "most_likely_species" #options for names with ambiguous taxonomic histories -output = NULL -resources = load_taxonomic_resources() -``` - -**output**: A data frame with rows representing each taxon and columns documenting taxon metadata (*original_name, aligned_name, accepted_name, suggested_name, genus, family, taxon_rank, taxonomic_dataset, taxonomic_status, taxonomic_status_aligned, aligned_reason, update_reason, subclass, taxon_distribution, scientific_name_authorship, taxon_ID, taxon_ID_genus, scientific_name_ID, row_number, number_of_collapsed_taxa*). - -## Diversity and distribution functions - -### create_species_state_origin_matrix - -**description**: This function processes the geographic data available in the APC and returns state level native, introduced and more complicated origins status for all taxa. - -**arguments**: - -``` -resources = load_taxonomic_resources() -``` - -**output**: A data frame with rows representing each species and columns for taxon name and each state . The values in each cell represent the origin of the species in that state. - -### native_anywhere_in_australia - -**description**: This function checks if the given species is native anywhere in Australia according to the APC. Note that this will not detect within-Australia introductions, e.g. if a species is from Western Australia and is invasive on the east coast. - -**arguments**: - -``` -species #input vector of taxon names -resources = load_taxonomic_resources() -``` - -**output**: A data frame with rows representing each taxon and two columns: `species`, which is the same as the unique values of the input `species`, and `native_anywhere_in_aus`, a vector indicating whether each species is native anywhere in Australia, introduced by humans from elsewhere, or unknown with respect to the APC resource. - -### state_diversity_counts - -**description**: This function calculates state-level diversity for native, introduced, and more complicated species origins based on the geographic data available in the APC. - -**arguments**: - -``` -state #state for which diversity should be summarised -resources = load_taxonomic_resources() -``` - -**output**: A data frame with three columns: "origin" indicating the origin of the species, "state" indicating the Australian state or territory, and "num_species" indicating the number of species for that origin and state. - -## Utility functions - -### load_taxonomic_resources - -**description**: This function loads two taxonomic datasets for Australia's vascular plants, the APC and APNI, into the global environment. It accesses taxonomic data from a dataset using the provided version number or the default version. The function creates several data frames by filtering and selecting data from the loaded lists. - -**usage notes**: This function is called by many other APC functions, but is unlikely to be used independently by a APCalign user. - -**arguments**: - -``` -stable_or_current_data = "stable" -version = default_version() -reload = FALSE -``` - -**output**: Several dataframes that include subsets of the APC/APNI based on taxon rank and taxonomic status. - -### standardise_names - -**description**: This function standardises taxon names by performing a series of text substitutions to remove common inconsistencies in taxonomic nomenclature. The function takes a character vector of taxon names as input and returns a character vector of taxon names using standardised taxonomic syntax as output. In particular it standardises taxon rank abbreviations and qualifiers (subsp., var., f.), as people use many variants of these terms. It also standardises or removes a few additional filler words used within taxon names (affinis becomes aff.; s.l. and s.s. are removed). - -**arguments**: - -``` -taxon_names #input vector of taxon names -``` - -**output**: A character vector of standardised taxon names. - -### strip_names - -**description**: Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector of names is also converted to lowercase. - -**arguments**: - -``` -taxon_names #input vector of taxon names -``` - -**output**: A character vector of stripped taxonomic names, with subtaxa designations, special characters, and extra whitespace removed, and all letters converted to lowercase. - -### strip_names_extra - -**description**: Suggested to run after strip_names, given a vector of taxonomic names, this function removes additional filler words and characters (" x " [hybrid taxa], "sp."). The resulting vector of names is also converted to lowercase. - -**arguments**: - -``` -taxon_names #input vector of taxon names -``` - -**output**: A character vector of stripped taxonomic names, with subtaxa designations, special characters, additional filler words and extra whitespace removed, and all letters converted to lowercase. - From 49cdd3c1d7765c66b5f260874834e45afd19c898 Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 15:27:34 +1000 Subject: [PATCH 02/11] fix typo --- _pkgdown.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index da091977..8028a94d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -16,7 +16,7 @@ navbar: articles: text: Articles menu: - - text: Data sources + - text: Data sources (APC & APNI) href: articles/articles/data-providers.html - text: Taxon matching href: articles/updating-taxon-names.html @@ -27,12 +27,11 @@ navbar: reference: - subtitle: Align and update taxon names - contents: - - load_taxonomic_resources - - default_version - create_taxonomic_update_lookup - align_taxa - update_taxonomy - subtitle: Standardise and simplify plant taxon names +- contents: - standardise_names - standardise_taxon_rank - strip_names @@ -44,4 +43,6 @@ reference: - native_anywhere_in_australia - subtitle: Data - contents: + - load_taxonomic_resources + - default_version - gbif_lite From 5fb331f6067dbace9b01c9c4759f6f1b5920f64f Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 15:40:50 +1000 Subject: [PATCH 03/11] Update match_taxa_documentation.csv add new family matches --- inst/extdata/match_taxa_documentation.csv | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/inst/extdata/match_taxa_documentation.csv b/inst/extdata/match_taxa_documentation.csv index 582811b5..16ee4e06 100644 --- a/inst/extdata/match_taxa_documentation.csv +++ b/inst/extdata/match_taxa_documentation.csv @@ -48,5 +48,8 @@ match_12a,"Detect genus, by checking the first word in the string","first word ( match_12b,"Detect genus, by checking the first word in the string","first word (""genus"")",exact,other APC taxon concepts,genus, match_12c,"Detect genus, by checking the first word in the string","first word (""genus"")",exact,APNI,genus, match_12d,"Detect family, by checking the first word in the string","first word (""genus"")",exact,APC accepted taxon concepts,family, -match_12e,"Detect genus, by checking the first word in the string","first word (""genus"")",fuzzy,APC accepted taxon concepts,genus, -match_12f,"Detect genus, by checking the first word in the string","first word (""genus"")",fuzzy,other APC taxon concepts,genus, +match_12e,"Detect family, by checking the first word in the string","first word (""genus"")",exact,other APC taxon concepts,family, +match_12f,"Detect genus, by checking the first word in the string","first word (""genus"")",fuzzy,APC accepted taxon concepts,genus, +match_12g,"Detect genus, by checking the first word in the string","first word (""genus"")",fuzzy,other APC taxon concepts,genus, +match_12h,"Detect family, by checking the first word in the string","first word (""genus"")",fuzzy,APC accepted taxon concepts,family, +match_12i,"Detect family, by checking the first word in the string","first word (""genus"")",fuzzy,other APC taxon concepts,family, From e3c7b137c8eef5ca3b92ee25fa60a7778aca98ac Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 16:22:57 +1000 Subject: [PATCH 04/11] fix example (I think this is what was failing) --- R/create_taxonomic_update_lookup.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/create_taxonomic_update_lookup.R b/R/create_taxonomic_update_lookup.R index caa71f72..d7a6615c 100644 --- a/R/create_taxonomic_update_lookup.R +++ b/R/create_taxonomic_update_lookup.R @@ -144,9 +144,7 @@ #' #' # example 3 #' taxon_list <- -#' readr::read_csv(here("inst/", "extdata", "test_taxa.csv"), -#' show_col_types = FALSE -#' ) +#' readr::read_csv("inst/extdata/test_taxa.csv", show_col_types = FALSE) #' #' create_taxonomic_update_lookup( #' taxa = taxon_list$original_name, From 5d29b420caae63280ab325532b364154d67c43c9 Mon Sep 17 00:00:00 2001 From: Will Cornwell Date: Fri, 3 May 2024 16:27:26 +1000 Subject: [PATCH 05/11] running devtools::document again --- man/create_taxonomic_update_lookup.Rd | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/man/create_taxonomic_update_lookup.Rd b/man/create_taxonomic_update_lookup.Rd index a0fd9db2..6c273cc8 100644 --- a/man/create_taxonomic_update_lookup.Rd +++ b/man/create_taxonomic_update_lookup.Rd @@ -181,9 +181,7 @@ create_taxonomic_update_lookup( # example 3 taxon_list <- - readr::read_csv(here("inst/", "extdata", "test_taxa.csv"), - show_col_types = FALSE - ) + readr::read_csv("inst/extdata/test_taxa.csv", show_col_types = FALSE) create_taxonomic_update_lookup( taxa = taxon_list$original_name, From 159035a19a03b5c41d378e0265f14fea222c42f1 Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 16:57:15 +1000 Subject: [PATCH 06/11] fix example in roxygen --- R/create_taxonomic_update_lookup.R | 3 +- inst/extdata/test_taxa.csv | 66 +++++++++++++++--------------- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/R/create_taxonomic_update_lookup.R b/R/create_taxonomic_update_lookup.R index d7a6615c..b9a5c57f 100644 --- a/R/create_taxonomic_update_lookup.R +++ b/R/create_taxonomic_update_lookup.R @@ -144,7 +144,8 @@ #' #' # example 3 #' taxon_list <- -#' readr::read_csv("inst/extdata/test_taxa.csv", show_col_types = FALSE) +#' system.file("extdata", "test_taxa.csv", package = "APCalign") %>% +#' readr::read_csv(show_col_types = FALSE) #' #' create_taxonomic_update_lookup( #' taxa = taxon_list$original_name, diff --git a/inst/extdata/test_taxa.csv b/inst/extdata/test_taxa.csv index b1dabc09..f88fc9f5 100644 --- a/inst/extdata/test_taxa.csv +++ b/inst/extdata/test_taxa.csv @@ -1,33 +1,33 @@ -original_name -Banksia serrata -Banksia serrate -Banksee serrate -Banksia cerrata -Banksia sp. -Dryandra sp. -Argyrodendron (Whyanbeel) -Argyrodendron ssp. (Whyanbeel BH 1106RFK) -Argyrodendron Whyanbeel -Argyrodendron sp. (Whyanbeel BH 1106RFK) -Argyrodendron sp. Whyanbeel (B.P.Hyland RFK 1106) -Argyrodendron sp. Whyanbeel (B.P.Hyland RFK1106) -Dryandra aurantia -Banksia aurantia -Dryandra blechnifolia -Banksia pellaeifolia -Dryandra idiogenes -Banksia idiogenes -Dryandra lindleyana -Banksia dallanneyi -Acacia aneura -Acacia minyura -Acacia paraneura -Racosperma aneurum -Acacia aneura var. intermedia -Banksia (has long pink leaves) -Dryandra (has long pink leaves) -Acacia minyura / Acacia paraneura -Acacia aphanoclada x Acacia pyrifolia var. pyrifolia -Acacia minyura x Acacia paraneura -"no clue, a monocot" -Orchidaceae (epiphtye) +original_name,notes +Banksia serrata,notes_01 +Banksia serrate,notes_02 +Banksee serrate,notes_03 +Banksia cerrata,notes_04 +Banksia sp.,notes_05 +Dryandra sp.,notes_06 +Argyrodendron (Whyanbeel) ,notes_07 +Argyrodendron ssp. (Whyanbeel BH 1106RFK) ,notes_08 +Argyrodendron Whyanbeel ,notes_09 +Argyrodendron sp. (Whyanbeel BH 1106RFK) ,notes_10 +Argyrodendron sp. Whyanbeel (B.P.Hyland RFK 1106),notes_11 +Argyrodendron sp. Whyanbeel (B.P.Hyland RFK1106) ,notes_12 +Dryandra aurantia,notes_13 +Banksia aurantia,notes_14 +Dryandra blechnifolia,notes_15 +Banksia pellaeifolia,notes_16 +Dryandra idiogenes,notes_17 +Banksia idiogenes,notes_18 +Dryandra lindleyana,notes_19 +Banksia dallanneyi,notes_20 +Acacia aneura,notes_21 +Acacia minyura,notes_22 +Acacia paraneura,notes_23 +Racosperma aneurum,notes_24 +Acacia aneura var. intermedia,notes_25 +Banksia (has long pink leaves),notes_26 +Dryandra (has long pink leaves),notes_27 +Acacia minyura / Acacia paraneura,notes_28 +Acacia aphanoclada x Acacia pyrifolia var. pyrifolia,notes_29 +Acacia minyura x Acacia paraneura,notes_30 +"no clue, a monocot",notes_31 +Orchidaceae (epiphtye),notes_32 From fd8cac904d870ce0e39c91ab1b336ae5df44db3c Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 17:05:00 +1000 Subject: [PATCH 07/11] Update create_taxonomic_update_lookup.Rd --- man/create_taxonomic_update_lookup.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/create_taxonomic_update_lookup.Rd b/man/create_taxonomic_update_lookup.Rd index 6c273cc8..f9c1463f 100644 --- a/man/create_taxonomic_update_lookup.Rd +++ b/man/create_taxonomic_update_lookup.Rd @@ -181,7 +181,8 @@ create_taxonomic_update_lookup( # example 3 taxon_list <- - readr::read_csv("inst/extdata/test_taxa.csv", show_col_types = FALSE) + system.file("extdata", "test_taxa.csv", package = "APCalign") \%>\% + readr::read_csv(show_col_types = FALSE) create_taxonomic_update_lookup( taxa = taxon_list$original_name, From 1c1fea191ceb7c34715887777c2ad46cd559defb Mon Sep 17 00:00:00 2001 From: Daniel Falster Date: Fri, 3 May 2024 17:29:29 +1000 Subject: [PATCH 08/11] Remove the pipe from example --- R/create_taxonomic_update_lookup.R | 7 ++++--- man/create_taxonomic_update_lookup.Rd | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/create_taxonomic_update_lookup.R b/R/create_taxonomic_update_lookup.R index b9a5c57f..0b3d3349 100644 --- a/R/create_taxonomic_update_lookup.R +++ b/R/create_taxonomic_update_lookup.R @@ -143,9 +143,10 @@ #' ) #' #' # example 3 -#' taxon_list <- -#' system.file("extdata", "test_taxa.csv", package = "APCalign") %>% -#' readr::read_csv(show_col_types = FALSE) +#' taxon_list <- +#' readr::read_csv( +#' system.file("extdata", "test_taxa.csv", package = "APCalign"), +#' show_col_types = FALSE) #' #' create_taxonomic_update_lookup( #' taxa = taxon_list$original_name, diff --git a/man/create_taxonomic_update_lookup.Rd b/man/create_taxonomic_update_lookup.Rd index f9c1463f..e3ae8743 100644 --- a/man/create_taxonomic_update_lookup.Rd +++ b/man/create_taxonomic_update_lookup.Rd @@ -180,9 +180,10 @@ create_taxonomic_update_lookup( ) # example 3 -taxon_list <- - system.file("extdata", "test_taxa.csv", package = "APCalign") \%>\% - readr::read_csv(show_col_types = FALSE) +taxon_list <- + readr::read_csv( + system.file("extdata", "test_taxa.csv", package = "APCalign"), + show_col_types = FALSE) create_taxonomic_update_lookup( taxa = taxon_list$original_name, From 4c88ca2cc92fa6ade1f343430080bed34266369b Mon Sep 17 00:00:00 2001 From: Daniel Falster Date: Fri, 3 May 2024 17:29:51 +1000 Subject: [PATCH 09/11] Remove rmd for word --- R/word.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/word.R b/R/word.R index 0a762479..a8f51bf3 100644 --- a/R/word.R +++ b/R/word.R @@ -2,12 +2,11 @@ #' replacement for stringr::word #' #' @param string A character vector - #' @param start,end Pair of integer vectors giving range of words (inclusive) #' to extract. The default value select the first word. #' @param sep Separator between words. Defaults to single space. #' @return A character vector with the same length as `string`/`start`/`end`. -#' +#' @noRd #' @examples #' spp <- c("Banksia serrata", "Actinotus helanthii") #' APCalign:::word(spp, 1) From b561fc5962ad0f968b25a96244e4762dd8c1f1fa Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 17:42:15 +1000 Subject: [PATCH 10/11] Revert "fix another error" --- R/update_taxonomy.R | 2 +- man/update_taxonomy.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/update_taxonomy.R b/R/update_taxonomy.R index 84c5da4c..339f4619 100644 --- a/R/update_taxonomy.R +++ b/R/update_taxonomy.R @@ -104,7 +104,7 @@ #' aligned_name = c("Dryandra preissii", "Banksia acuminata"), #' taxon_rank = c("species", "species"), #' taxonomic_dataset = c("APC", "APC"), -#' aligned_reason = NA_character_, +#' aligned_reason = c(NA_character_,NA_character_), #' resources = resources #' ) #' ) diff --git a/man/update_taxonomy.Rd b/man/update_taxonomy.Rd index 042624ac..2fc03c38 100644 --- a/man/update_taxonomy.Rd +++ b/man/update_taxonomy.Rd @@ -122,7 +122,7 @@ update_taxonomy( aligned_name = c("Dryandra preissii", "Banksia acuminata"), taxon_rank = c("species", "species"), taxonomic_dataset = c("APC", "APC"), - aligned_reason = NA_character_, + aligned_reason = c(NA_character_,NA_character_), resources = resources ) ) From 5d2ee20d2a8f13b01f7758768e8f35ce7f5f8133 Mon Sep 17 00:00:00 2001 From: Elizabeth Wenk Date: Fri, 3 May 2024 17:51:28 +1000 Subject: [PATCH 11/11] and again --- R/update_taxonomy.R | 7 ++++--- man/update_taxonomy.Rd | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/update_taxonomy.R b/R/update_taxonomy.R index 339f4619..80012ab3 100644 --- a/R/update_taxonomy.R +++ b/R/update_taxonomy.R @@ -104,9 +104,10 @@ #' aligned_name = c("Dryandra preissii", "Banksia acuminata"), #' taxon_rank = c("species", "species"), #' taxonomic_dataset = c("APC", "APC"), -#' aligned_reason = c(NA_character_,NA_character_), -#' resources = resources -#' ) +#' aligned_reason = c(NA_character_, +#' NA_character_) +#' ), +#' resources = resources #' ) #' } diff --git a/man/update_taxonomy.Rd b/man/update_taxonomy.Rd index 2fc03c38..cf9804c6 100644 --- a/man/update_taxonomy.Rd +++ b/man/update_taxonomy.Rd @@ -122,9 +122,10 @@ update_taxonomy( aligned_name = c("Dryandra preissii", "Banksia acuminata"), taxon_rank = c("species", "species"), taxonomic_dataset = c("APC", "APC"), - aligned_reason = c(NA_character_,NA_character_), - resources = resources - ) + aligned_reason = c(NA_character_, + NA_character_) + ), + resources = resources ) } }