diff --git a/inst/WORDLIST.txt b/inst/WORDLIST.txt index 700f80e7..98398c87 100644 --- a/inst/WORDLIST.txt +++ b/inst/WORDLIST.txt @@ -1,487 +1,56 @@ -aff -bayer -callout -CICD -cran -de -desc -dic -dirs -djnavarro -doch -eval -fosstodon -frami -FRAMI -frontimage -fs -genart -german -github -gsub -hört -href -https -hunspell -ìnst -jeder -JetBrains -jpg -JW -lang -lockfile -md -netlify -nur -png -pre -purrr -px -qmd -qmd's -readLines -readme -readr -renv -repo -Rproj -Rscript -StefanThoma -str -stringr -subfolders -testthat -Thoma -tidyverse -ubuntu -unlist -verowokjnsthet -versteht -versthet -von -wordlist -WORDLIST -writeLines -yaml -yml -zitat -Zitat -adam -auth -cdisc -csv -df -diffdf -dplyr -env -ga -googledrive -googlesheets -hackathon -Hackathon -init -io -nrow -prog -rstudio -sdtm -sep -shinyapps -standardised -stefan -submisisons -tempfile -tf -thoma -tibble -un -www -zxqguo -ADaM -adex -ADEX -ADPCYMG -AVAL -bdat -BDS -Cmd -exprs -frac -Kangjie -Mac -NUMCYC -param -PARAM -PARAMCD -PARAPMCD -sas -TOTDOSE -USUBJID -valuelevel -Valuelevel -Zhang -pharmaverse -Pharmaverse -signup -Straub -ui -xportr -pharmaverse -Straub -xportr -blablabla -gobledigook -iphony -blogpost -klee -pharmaverse -Straub -WIP -xportr -ADADAS -ADAE -AdaMTeamIndia +aa +Abhishek +ABLFL +ACOVFL +adadas ADADAS -ADAE -ADLBC -ADLBH -ADLBHY -ADSL -ADTTE -ADVS adae -aes -bb -ben -bindCache -bindEvent -challange -checkin -comms -cosa -Divyasneelam -edoardo -Edoardo -firstsecondthird -fluidRow -ggplot -ggtitle -gsk -GSK -ide -introR -Jagadish -jagadishkatam -Katam -linkedin -mailto -mancini -mascary -Mascary -na -Neitmann -organisations -Reactiveness -readxl -registrationData -renderUI -roche -RStudio -rStudio -sadchla -Sadchla -Sanofi -sapply -sas -straub -summarise -survivorship -Survivorship -tagList -TatianaPXL -teamspoRt -textInput -thomasneitmann -traversable -UI -useNA -ViiV -WAWA -wg -wikipedia -xlab -xlsx -ylab -zelos -Zelos -zhu -Zhu -bindCache -bindEvent -firstsecondthird -fluidRow -Reactiveness -registrationData -renderUI -sapply -tagList -textInput -UI -comms -CWG -LinkedIn -TBL -wg -gowerc -proc -tribble -wg -cosa -knitr -Boehringer -Ingelheim -insightsengineering -Moderna -PHUSE -RFICDTC -rlistings -rtables -SDTMs -Shen -tlg -TLG -TLGs -xpt -APMDOS -atorus -Atorus -britannica -dev -Encyclopædia -envsetup -Farrugia -Gottfried -Hk -Isabela -Janssen -Kalappurakal -logrx -merck -metacore -Pharma -pharmaRTF -phuse -QOlIU -qp -Rimler -rinpharma -rtf -Stackhouse -Sumesh -tidytlg -Tplyr -Velásquez's -youtube -anderson -atorus -CAMIS -ceil -dn -doesn -edu -eps -gp -Gregor -htm -IBMRounding -iml -ji -lexjansen -lhdjung -lrcon -phuse -psiaims -rdocumentation -rounde -Roundings -roundSAS -rw -SAS's -sfirke -stackoverflow -StackOverflow -thm -tidytlg -tplyr -Tplyr -trunc -ucla -unv -ut -zMachine -admiraldev -admiralRoche -apache -basel -gitlab -licence -optimised -organised -orgs -phuse -programm -Rbasel -Rimler -siloed -stefanthoma -useR +ADAE +adam +ADaM +ADaMs +AdaMTeamIndia +adex +ADEX +adlbc +ADLBC +ADLBH +ADLBHY +admiraldev +admiraldisc +admiraldiscovery admiralroche +admiralvaccine +adnca +ADPCYMG +ADPPK +ADRG +adsl +ADSL ADTM +adtte +ADTTE +ADURN +advs +ADVS ADY ae +AE +AEDECOD AEENDTC AEN -AESTDTC -args -AST -ASTDTM -datetime -Datetime -DCUTDT -dt -dtc -DTC -dtf -DTHDT -dtm -hms -lubridate -mh -MHSTDTC -mmThh -ss -tmf -TRTSDT -TRTSDTM -VSDTC -VSTPT -ymd -yyyy AENDT AENDY -ASTDTM -ASTDY -datetime -DEMOADY -DEMOEDY -dt -DT -dtm -DTM -dy -lubridate -TRTSDTM -Ari -eclinical -Knoph -Nordisk -Novo -Siggaard -pharma -reproducibility -behaviour -Farrugia -ethz -ANRHI -adadas -adlbc -adsl -adtte -André -appsilon -Appsilon -APPSILON -bc -bs -btn -caa -cder -CDER -collapseExample -config -ctd -dcf -deliverables -doi -eCTD -ee -fac -favicon -fda -formatters -getwd -Golem -ico -js -json -kmplot -modles -modularization -modularize -pkglite -rconsortium -RConsortium -rhinosubmission -Rhinoverse -Rprofile -scalable -scss -Shinytest -TLFs -toolset -Vedha -Veríssimo -Viyash -webm -Xiao -Zhao -aa -af -atoxgr -bdfef -ChatGPT -comparators -CTCAE -ctcv -dir -fpCompare -getOption -Mächler -mmaechler -NCI -packageVersion -ProgRRR -rda -signif -anrind -daids -DAIDS -defaultPageSize -Lipase -niaid -nih -PredictiveEcology -reactable -representable -Representable -resizable -rsc -AEDECOD +aes AEs AESEQ AESEV AESEVN +AESTDTC AESTDY +AETHNIC +AETHNICN +AGEU AHIFL ahilofl ahsev @@ -489,223 +58,557 @@ AHSEV ahsevfl AHSEVFL ALOFL -ASTDT -modularity -pharmaversesdtm -preprocess -stdy -STUDYID -TRTEMFL -VSDY -VSORRES -VSSEQ -VSTESTCD -adnca -adppk -ADPPK -AETHNIC -AETHNICN -ARMCD -BILI -BILIBL -bmi -BMIBL -bsa -BSABL -CKD -codelist -COHORTC -COUNTRYL -COUNTRYN -covar -CRCL -CRCLBL -creat -CREAT -CREATBL -creatu -egfr -EGFRBL -EPI -EXDOSFRM -EXROUTE -FORMN -HTBL -labsbl -LBBLFL -LBSTRESN -LBTESTCD -LBTESTCDB -metatools -Mosteller -pc -pharmacokinetic -pharmaverseadam -poppk -preconfigured -RACEN -ROUTEN -SEXN -SITEIDN -STUDYIDN -SUBJID -SUBJIDN -SUBJTYP -SUBJTYPC -TBILBL -USUBJIDN -VSBLFL -vslb -VSSTRESN -WTBL +amd analytics Analytics -csp -dS -eventID -frontend -nqJsLSLd -pageID -QHuA -thome -traceRedir -pKobZqjlXChj -si -vehIoJgdA -ABLFL -admiraldisc -admiraldiscovery -advs -AGEU +anderson +André +ANOSMIA +ANRHI +anrind +apache +APMDOS +appsilon +Appsilon +APPSILON +args +Ari +arjun +Arjun +ARMCD +aspx +AST +ASTDT +ASTDTM +ASTDY +atorus +Atorus +atoxgr +auth +AVAL AVALU +basel +bayer +bb +bc +bdat +BDS +behaviour +ben +BILI +BILIBL +bindCache +bindEvent +Biostatisticians +Biostatistics +Biostats +blogpost bmi BMI BMIBL +Boehringer +britannica +bs +bsa +BSABL +btn +caa +callout +CAMIS CANCTRFL +cder +CDER +cdisc +CDISC +ceil changelog +ChatGPT cheatsheet +checkin +CICD +CKD CMCAT +Cmd CMSEQ codebase -DTHFL -ef -EOSDT -LSTALVDT -msrc -onboarding -PRSEQ -TRTEDT -wayback -ABLFL -youtu -ADRG -amd +codelist +COHORTC +collapseExample +comms +comparators compatibilities +config +cosa +COUNTRYL +COUNTRYN +covar +COVID cowplot +cran +CRCL +CRCLBL +creat +CREAT +CREATBL +creatu +csp +csv +ctcv +ctd +CWG da daemonless +daids +DAIDS +datetime +Datetime +dcf +DCUTDT +defaultPageSize +deliverables +DEMOADY +DEMOEDY +desc +dev +df +dfs +dhivya +Dhivya +DIARRHOEA +diffdf +dir +dirs +Divya +Divyasneelam +djnavarro +dn dockerfile Dockerfile -emmeans +doesn +doi +Dony +dplyr +dS +dt +DT +dtc +DTC +dtf +DTHDT +DTHFL +dtm +DTM +dy +eclinical +eCTD +edoardo +Edoardo +edu +ee +ef +egfr +EGFRBL +Encyclopædia +env +envsetup +EOSDT +EPI +eps +ethz +eval +eventID +EXDOSE +EXDOSFRM +exprs +EXROUTE +EXSEQ +EXTRT +fac +Farrugia +favicon +fda FDA's +firstsecondthird +fluidRow +formatters +FORMN +fosstodon +fpCompare +frac +frontend +frontimage +fs +ga +genart +getOption +getwd +ggplot +ggtitle ghcr -golem -htmltools -http -httpuv -huxtable -IJ -linodeobjects -magrittr -Makowski -mb -natively -pkgload -podman -Podman -rconsortiumxappsilon -rds -rsubmission -shinylive -Shinylive -Stagg -tidyr -tippy -transformative -Tymoteusz -visR -wasm -webassembly -WebAssembly -webp -webr -webR -WebR -wfk -WnpvVgmyE -Dony -Unardi -aa -Abhishek -ADaM -ADaMs -admiralvaccine -arjun -Arjun -aspx -atorus -Biostatisticians -Biostatistics -Biostats -dhivya -Dhivya -Divya github +Github +gitlab +golem +Golem +googledrive +googlesheets +Gottfried +gowerc +gp +Gregor +gsk GSK +gsub +hackathon +Hackathon HICC +Hk +hms +href +HTBL +htm +http https +httpuv +IBMRounding +ico +ide +IJ +iml +Ingelheim +init +insightsengineering +introR io IOT +Isabela iscr ISCR +Jagadish +jagadishkatam +Janssen +ji jpg +js +json +Kalappurakal kanagaraj Kanagaraj +Kangjie kar Kar +Katam KeepCalm +klee +kmplot +knitr +Knoph kumari Kumari +labsbl +lang +LBBLFL +LBSTRESN +LBTESTCD +LBTESTCDB +lexjansen +lhdjung li +licence linkedin LinkedIn +linodeobjects +Lipase lockfile +logrx +lrcon +LSTALVDT +lubridate +Mächler mahendran Mahendran +mailto MainConf +Makowski +mancini +mascary +Mascary +mb md +merck metacore +Metacore metatools +mh +MHSTDTC Mishra +mmaechler +mmThh +Moderna +modles +modularity +modularization +modularize +Mosteller +msrc +na +nas +natively ncol +Neitmann +netlify +niaid +nih Nordisk Novo Novotel +nqJsLSLd +nrow +NUMCYC +onboarding +optimised +organisations +organised +orgs ouR +packageVersion +pageID +param +PARAM +PARAMCD +params +pc +pharma +Pharma +pharmacokinetic +pharmaRTF pharmaverse +Pharmaverse PHARMAVERSE +pharmaverseadam +pharmaversesdtm +phuse +PHUSE +pkglite +pKobZqjlXChj +png +podman +Podman pooja Pooja +poppk +pre Pre +PRE +preconfigured +PredictiveEcology +preprocess +proc +prog +ProgRRR +PRSEQ +PRURITUS +psiaims +purrr +px +QHuA +qmd +QOlIU +qp +RACEN +Rbasel +rconsortium +RConsortium +rconsortiumxappsilon +rds +reactable +Reactiveness +readLines README +readr +readxl +registrationData +renderUI renv repo +repos +representable +Representable +reproducibility +resizable +RFICDTC +rhinosubmission +Rhinoverse +Rimler +rinpharma +rlistings +roche +rounde +Roundings +roundSAS +ROUTEN +Rprofile +Rproj +rsc +rstudio +rStudio +RStudio +rsubmission +rtables +rtf Rubalingam +rw +sadchla +Sadchla +Sanofi +sapply +sas +scalable +scss +sdtm +SDTM +SDTMs +sep +SEXN +sfirke +Shen +shinyapps +shinylive +Shinylive +Shinytest +si +Siggaard +signif +siloed +SITEIDN soumitra Soumitra +ss +Stackhouse +stackoverflow +StackOverflow +Stagg +standardised +stdy +stefan +stefanthoma +StefanThoma +str +straub +Straub +stringr +STUDYID +STUDYIDN +SUBJID +SUBJIDN +SUBJTYP +SUBJTYPC +submisisons +Sumesh +summarise +survivorship +Survivorship +tagList +TatianaPXL +TBILBL +teamspoRt +tempfile +testthat +textInput +tf th +thm +thoma +Thoma +thomasneitmann +thome +tibble +tidytlg +tidyverse +TLFs +tlg +TLG +TLGs +tmf +toolset +TOTDOSE +tplyr +Tplyr +traceRedir +transformative +traversable Travese +tribble +TRTEDT +TRTEMFL +TRTSDT +TRTSDTM +trunc +Tymoteusz +ucla +ui +UI +un +Unardi +ungroup +unlist +unv +useNA +useR +USUBJID +USUBJIDN +ut +Vedha +vehIoJgdA +Velásquez's venkatachalam Venkatachalam +Veríssimo +ViiV +Viyash +VSBLFL +VSDTC +VSDY +vslb +VSORRES +VSSEQ +VSSTRESN +VSTESTCD +VSTPT +wasm +WAWA +wayback +webassembly +WebAssembly +webm +webp +webr +webR +WebR +wfk +wg +wikipedia +WnpvVgmyE +writeLines +WTBL www +XANO +Xiao +xlab +xlsx xportr +xpt +ylab +ymd +yml +youtu +youtube +yyyy +zelos +Zelos +Zhang +Zhao +zhu +Zhu +zMachine +zxqguo diff --git a/media/filter_functions_cheatsheet.png b/media/filter_functions_cheatsheet.png new file mode 100644 index 00000000..2078a9ea Binary files /dev/null and b/media/filter_functions_cheatsheet.png differ diff --git a/media/filter_shirt.png b/media/filter_shirt.png new file mode 100644 index 00000000..7dc56bea Binary files /dev/null and b/media/filter_shirt.png differ diff --git a/posts/2024-03-01_admiral_filter_functions/admiral_filter_functions.qmd b/posts/2024-03-01_admiral_filter_functions/admiral_filter_functions.qmd new file mode 100644 index 00000000..bf41b1b3 --- /dev/null +++ b/posts/2024-03-01_admiral_filter_functions/admiral_filter_functions.qmd @@ -0,0 +1,228 @@ +--- +title: "Filter out the noise!" +author: + - name: Edoardo Mancini +description: "A brief exposition of the filter_* functions in {admiral} - what they do and how to use them." +date: "2024-03-01" +# please do not use any non-default categories. +# You can find the default categories in the repository README.md +categories: [admiral] +# feel free to change the image +image: "filter_shirt.png" + +--- + + + +```{r setup, include=FALSE} +long_slug <- "2024-03-01_admiral_filter_functions" +``` + + + +# Introduction + +Filtering and merging datasets is the bread and butter of statistical programming. Whether it's on the way to an ADaM variable derivation, or in an effort to pull out a list of patients matching a specific condition for a TLG, or another task entirely, most steps in the statistical programming workflow feature some combination of these two tasks. + +The `{tidyverse}` functions `filter()`, `group_by()`, and`*_join()` are a fantastic toolset for filtering and merging, and can often suffice to carry out these sorts of operations. Often, however, this will be a multi-step process, requiring more than one set of pipe (`%>%`) chains if multiple datasets are involved. As such, the [{admiral}](https://pharmaverse.github.io/admiral/index.html) package builds on this concept by offering a very practical toolset of utility functions, henceforth referred to altogether as `filter_*()`. These are wrappers of common combinations of `{tidyverse}` function calls that enable the ADaM programmer to carry out such operations "in stride" within their ADaM workflow - in typical `{admiral}` style! + +Many of the `filter_*()` functions feature heavily within the `{admiral}` codebase, but they can be very handy in their own right. You can learn more about them from: + +* The relevant section in the [Reference page of the admiral documentation website](https://pharmaverse.github.io/admiral/reference/#utilities-for-filtering-observations); +* The short visual explanations in the second page of the [{admiral Cheat Sheet}](https://github.com/pharmaverse/admiral/blob/main/inst/cheatsheet/admiral_cheatsheet.pdf); + +![](filter_functions_cheatsheet.png){fig-align="center" width="500"} + +* ...and the rest of this blog post! + +## Required Packages + +The examples in this blog post require the following packages. + +```{r, warning=FALSE, message=FALSE} +library(admiral) +library(pharmaversesdtm) +library(dplyr, warn.conflicts = FALSE) +library(tibble) +``` + +We also create minimally viable ADSL, ADAE and EX datasets to be used where needed in the following examples. + +```{r} +adsl <- tribble( + ~USUBJID, ~AGE, ~SEX, + "01-701-1015", 63, "F", + "01-701-1034", 77, "F", + "01-701-1115", 84, "M", + "01-701-1146", 75, "F", + "01-701-1444", 63, "M" +) + +adae1 <- tribble( + ~USUBJID, ~AEDECOD, ~AESEV, ~AESTDTC, + "01-701-1015", "DIARRHOEA", "MODERATE", "2014-01-09", + "01-701-1034", "FATIGUE", "SEVERE", "2014-11-02", + "01-701-1034", "HEADACHE", "MILD", "2014-12-01", + "01-701-1034", "APPLICATION SITE PRURITUS", "MODERATE", "2014-08-27", + "01-701-1115", "FATIGUE", "MILD", "2013-01-14", + "01-701-1146", "FATIGUE", "MODERATE", "2013-06-03", + "01-701-1146", "ANOSMIA", "MODERATE", "2013-08-11" +) + +adae2 <- tribble( + ~USUBJID, ~ADY, ~ACOVFL, ~ADURN, + "01-701-1015", 10, "N", 1, + "01-701-1015", 21, "N", 50, + "01-701-1015", 23, "Y", 14, + "01-701-1015", 32, "N", 31, + "01-701-1015", 42, "N", 20, + "01-701-1034", 11, "Y", 13, + "01-701-1034", 23, "N", 2, + "01-701-1146", 13, "Y", 12, + "01-701-1444", 14, "N", 32, + "01-701-1444", 21, "N", 41 +) + + +ex <- tribble( + ~USUBJID, ~EXSEQ, ~EXDOSE, ~EXTRT, + "01-701-1015", 1, 54, "XANO", + "01-701-1015", 2, 54, "XANO", + "01-701-1015", 3, 54, "XANO", + "01-701-1034", 1, 54, "XANO", + "01-701-1034", 2, 54, "XANO", + "01-701-1115", 1, 0, "PLACEBO", + "01-701-1115", 2, 0, "PLACEBO", + "01-701-1115", 3, 0, "PLACEBO", + "01-701-1146", 1, 0, "PLACEBO", + "01-701-1146", 2, 0, "PLACEBO", + "01-701-1146", 3, 0, "PLACEBO", + "01-701-1444", 1, 54, "XANO", + "01-701-1444", 2, 54, "XANO" +) +``` + +# `filter_exist()` and `filter_not_exist()` + +Commonly we may wish to identify a set of patients from ADSL who satisfy (or do not satisfy) some condition. This condition can be relative to data found in ADSL or another ADaM dataset. For formal workflows, we would likely consider creating some sort of flag to encode this information, but for a more "quick and dirty" approach we can use [filter_exist()](https://pharmaverse.github.io/admiral/reference/filter_exist.html) or [filter_not_exist()](https://pharmaverse.github.io/admiral/reference/filter_not_exist.html). + +For instance, suppose we want to obtain demographic information for the patients who have suffered moderate or severe fatigue using the datasets created above. A simple application of `filter_exist()` suffices: firstly, we feed in `adsl` as the input dataset and `adae1` as the secondary dataset (inside which the filtering condition is applied). We make sure to specify `by_vars = USUBJID` to view the datasets patient-by-patient, and apply the condition on `dataset_add` (i.e. `adae1`) using the `filter_add` parameter. + +```{r} +filter_exist( + dataset = adsl, + dataset_add = adae1, + by_vars = exprs(USUBJID), + filter_add = AEDECOD == "FATIGUE" & AESEV %in% c("MODERATE", "SEVERE") +) +``` + +For the negation of this task, we can instead use `filter_not_exist()` with the same arguments: + +```{r} +filter_not_exist( + dataset = adsl, + dataset_add = adae1, + by_vars = exprs(USUBJID), + filter_add = AEDECOD == "FATIGUE" & AESEV %in% c("MODERATE", "SEVERE") +) +``` + +That's it! `filter_exist()` and `filter_not_exist()` are as simple as they are useful. + +# `filter_extreme()` + +Another frequent task is to select the first or last observation within a by-group. Two possible examples where this may feature are a) selecting the most recent adverse event for a patient, or b) selecting the last dose for a patient. + +We showcase below using [filter_extreme()](https://pharmaverse.github.io/admiral/reference/filter_extreme.html) for the latter example. Using `ex` as defined above, we simply feed this into the function, specifying again to group the dataset by patient using `by_vars = exprs(USUBJID)` and order observations using the selection `order = exprs(EXSEQ)`. Finally, we indicate that we are interested in the last dose for each patient through the `mode = last`: + +```{r} +filter_extreme( + dataset = ex, + by_vars = exprs(USUBJID), + order = exprs(EXSEQ), + mode = "last" +) +``` + +To select the first dose instead, an equivalent call with `mode = first` would do the trick. + +One final argument which can be useful as a fail-safe, but isn't showcased in the example above, is `check_type`. This can be set to `"none"`, `"warning"` or `"error"` and will cause `filter_extreme()` to, respectively, do nothing, throw a warning, or throw an error if the observations of the input dataset are not unique with respect to the by variables and the order. + +> *Note* it is of course possible to achieve the same result using `{tidyverse}` tools: + +```{r} +ex %>% + arrange(USUBJID, desc(EXSEQ)) %>% + group_by(USUBJID) %>% + filter(row_number() == 1) %>% + ungroup() +``` + +> but it requires a few more functions and the code is slightly less readable. + +# `filter_relative()` + +Other times we might find ourselves wanting to filter observations directly before or after the observation where a specified condition is fulfilled. Using `{tidyverse}` tools, this can quickly get quite involved. Enter [filter_relative()](https://pharmaverse.github.io/admiral/reference/filter_relative.html)! + +In the example below we showcase how `filter_relative()` extracts the AEs directly after the first occurrence of `AEDECOD == FATIGUE` in the above-generated `adae1`. As before, we pass the `dataset` and `by_vars` arguments, after which we specify to order the observations by `AESTDTC` using `order = exprs(AESTDTC)` and the condition using `condition = AEDECOD == "FATIGUE"`. Then, we specify we want records directly _after_ the condition is satisfied using `selection = after` and that we do not want the reference observations (i.e. those that satisfy the `condition`) using `inclusive = FALSE`. Moreover, with `mode = "first"` we indicate that we want to use as reference the record where the condition is satisfied for the _first_ time. Finally, we indicate that we do not want to keep the groups with no observations satisfying the `condition` with `keep_no_ref_groups = FALSE`: + +```{r} +filter_relative( + dataset = adae1, + by_vars = exprs(USUBJID), + order = exprs(AESTDTC), + condition = AEDECOD == "FATIGUE", + selection = "after", + inclusive = FALSE, + mode = "first", + keep_no_ref_groups = FALSE +) +``` + +The arguments showcased above are flexible enough that we could modify our code accordingly for different scenarios. For instance, we could use `selection = before` to pick out any AEs directly before an occurrence of `AEDECOD = "FATIGUE"`, or we could use `mode = "last"` to use as reference the record where the condition is satisfied for the _last_ time. + +# `filter_joined()` + +The functions we have seen so far in this post have had relatively well-defined remits, and so a relatively contained set of arguments. [filter_joined()](https://pharmaverse.github.io/admiral/reference/filter_joined), however, breaks that mold: this function enables one to filter observations using a condition while taking other observations (possibly from a different dataset) into account. We present a simple example below. + +Let's try using `adae2` to extract the observations with a duration longer than 30 days (`ADURN >= 30`) and on or after 7 days before a COVID AE `(ACOVFL == "Y")`. It is easier in this case to present the `filter_joined()` call and subsequently explain it: + +```{r} +filter_joined( + dataset = adae2, + dataset_add = adae2, + by_vars = exprs(USUBJID), + join_vars = exprs(ACOVFL, ADY), + join_type = "all", + order = exprs(ADY), + filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 +) +``` + +Firstly, we note that `filter_join()` has been used because our condition is two-faceted. That is, to decide whether a certain record is of interest, we want (1) a duration of 30 days or more (this is information found within the record itself) _and_ (2) proximity to a COVID AE (this is information gleaned from surrounding records - not the record itself). Because both conditions are relative to the `adae2` dataset, we have set `dataset = adae2` and `dataset_add = adae2`, i.e. we are joining `adae2` onto itself. Then, we view the dataset patient-by-patient with `by_vars = exprs(USUBJID)` and order observations by analysis day using `order = exprs(ADY)`. + +All that is left is to specify our condition and how to select records relative to it. We start by noting that portion (1) of our condition uses the variable `ADURN` from the dataset specified in the `dataset` argument, and portion (2) requires `ACOVFL` and `ADY` from the secondary dataset specified in `dataset_add`. Consequently we need to specify `join_vars = exprs(ACOVFL, ADY)` to ensure we can use the latter variables in the `filter_join` argument. Then, for `filter_join`, we specify `filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7`, which translates to: + +* Select records from `dataset` where `ADURN > 30` (these satisfy (1)); +* Only keep those records if, after combining with `dataset_add`, we can find a record in `dataset_add` where `ACOVFL == "Y"` and `ADY` is within seven days of `ADY` from `dataset` (this will satisfy (2). + +In the `filter_join` argument, notice that `.join` syntax is used to refer to the variables come from `dataset_add`, and enables us to compare the same variable across observations (`ADY >= ADY.join - 7`). Note finally that `join_type = "all"` allows the user to consider all observations in `dataset_add` when joining. + +For a much more detailed exposition of `filter_join()`, with more examples, consider visiting its [reference page](https://pharmaverse.github.io/admiral/reference/filter_joined.html) on the `{admiral}` website. + +# Conclusion + +While it is true that the `{tidyverse}` functions can, if applied correctly, achieve lots of the objectives outlined in this post, with `filter_*()` we can often do everything in one fell swoop, and feel all the more sleek! Hopefully this post has highlighted the plethora of possibilities at your fingertips gifted by `filter_*()`. + +However, remember Uncle Ben's maxim: _"with great power comes great responsibility"_. Although this was originally directed at a young Peter Parker, it is just as applicable to us ADaM programmers: always make sure to acquaint yourself with the documentation for the tool you are using - especially a behemoth like `filter_joined()` - to make sure your code does what you think it should do! + + + +```{r, echo=FALSE} +source("appendix.R") +insert_appendix( + repo_spec = "pharmaverse/blog", + name = long_slug +) +``` diff --git a/posts/2024-03-01_admiral_filter_functions/appendix.R b/posts/2024-03-01_admiral_filter_functions/appendix.R new file mode 100644 index 00000000..ece5203f --- /dev/null +++ b/posts/2024-03-01_admiral_filter_functions/appendix.R @@ -0,0 +1,78 @@ +# markdown helpers -------------------------------------------------------- + +markdown_appendix <- function(name, content) { + paste(paste("##", name, "{.appendix}"), " ", content, sep = "\n") +} +markdown_link <- function(text, path) { + paste0("[", text, "](", path, ")") +} + + + +# worker functions -------------------------------------------------------- + +insert_source <- function(repo_spec, name, + collection = "posts", + branch = "main", + host = "https://github.com", + text = "source code") { + path <- paste( + host, + repo_spec, + "tree", + branch, + collection, + name, + "code_sections.qmd", + sep = "/" + ) + return(markdown_link(text, path)) +} + +insert_timestamp <- function(tzone = Sys.timezone()) { + time <- lubridate::now(tzone = tzone) + stamp <- as.character(time, tz = tzone, usetz = TRUE) + return(stamp) +} + +insert_lockfile <- function(repo_spec, name, + collection = "posts", + branch = "main", + host = "https://github.com", + text = "R environment") { + path <- paste( + host, + repo_spec, + "tree", + branch, + collection, + name, + "renv.lock", + sep = "/" + ) + return(markdown_link(text, path)) +} + + + +# top level function ------------------------------------------------------ + +insert_appendix <- function(repo_spec, name, collection = "posts") { + appendices <- paste( + markdown_appendix( + name = "Last updated", + content = insert_timestamp() + ), + " ", + markdown_appendix( + name = "Details", + content = paste( + insert_source(repo_spec, name, collection), + insert_lockfile(repo_spec, name, collection), + sep = ", " + ) + ), + sep = "\n" + ) + knitr::asis_output(appendices) +} diff --git a/posts/2024-03-01_admiral_filter_functions/filter_functions_cheatsheet.png b/posts/2024-03-01_admiral_filter_functions/filter_functions_cheatsheet.png new file mode 100644 index 00000000..2078a9ea Binary files /dev/null and b/posts/2024-03-01_admiral_filter_functions/filter_functions_cheatsheet.png differ diff --git a/posts/2024-03-01_admiral_filter_functions/filter_shirt.png b/posts/2024-03-01_admiral_filter_functions/filter_shirt.png new file mode 100644 index 00000000..7dc56bea Binary files /dev/null and b/posts/2024-03-01_admiral_filter_functions/filter_shirt.png differ