From 7df66cf2a86f98df0e81f71ffe6d6944d56d30af Mon Sep 17 00:00:00 2001 From: Al-Murphy Date: Mon, 21 Aug 2023 11:05:40 +0000 Subject: [PATCH] Add flip_frq_as_biallelic parameter --- DESCRIPTION | 2 +- NEWS.md | 9 +++++++++ R/check_allele_flip.R | 27 ++++++++++++++++++++++++--- R/format_sumstats.R | 7 +++++++ R/validate_parameters.R | 4 ++++ man/check_allele_flip.Rd | 6 ++++++ man/format_sumstats.Rd | 6 ++++++ man/import_sumstats.Rd | 4 ++++ man/validate_parameters.Rd | 6 ++++++ vignettes/MungeSumstats.Rmd | 4 ++++ 10 files changed, 71 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6f5c71d..7fed3c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MungeSumstats Type: Package Title: Standardise summary statistics from GWAS -Version: 1.9.15 +Version: 1.9.16 Authors@R: c(person(given = "Alan", family = "Murphy", diff --git a/NEWS.md b/NEWS.md index ad57350..43ae0c3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +## CHANGES IN VERSION 1.9.16 + +### New features +* flip_frq_as_biallelic parameter added enabling frequencies of non-bi-allelic +SNPs to be flipped as if they were bi-allelic (1 - frequency) i.e. ignoring the +frequencies of other alternative alleles (assuming these will be negligible). +Note this will not be done as default as it is not fully correct but may be +useful for some users. + ## CHANGES IN VERSION 1.9.15 ### Bug fix diff --git a/R/check_allele_flip.R b/R/check_allele_flip.R index 1aa8f59..8013903 100644 --- a/R/check_allele_flip.R +++ b/R/check_allele_flip.R @@ -31,6 +31,7 @@ check_allele_flip <- function(sumstats_dt, path, allele_flip_z, allele_flip_frq, bi_allelic_filter, + flip_frq_as_biallelic, imputation_ind, log_folder_ind, check_save_out, @@ -158,7 +159,7 @@ check_allele_flip <- function(sumstats_dt, path, sumstats_dt[match_type == FALSE, A1 := tmp] sumstats_dt[, tmp := NULL] - # flip effect column(s) - BETA, OR, z, log_odds, SIGNED_SUMSTAT, FRQ + # flip effect column(s) - BETA, OR, Z, log_odds, SIGNED_SUMSTAT, FRQ effect_columns <- c("BETA", "OR", "LOG_ODDS", "SIGNED_SUMSTAT") if (allele_flip_z) { effect_columns <- c(effect_columns, "Z") @@ -178,13 +179,33 @@ check_allele_flip <- function(sumstats_dt, path, "bi_allelic_filter to TRUE so\nnon-bi-allelic SNPs are", " removed. Otherwise, set allele_flip_frq to FALSE to ", "not flip the FRQ column but note\nthis could lead to ", - "incorrect FRQ values." + "incorrect FRQ values.\nA new option added is to flip ", + "non-bi-allelic SNPs as if they were bi-allelic (1-FRQ), ", + "to do this set\nflip_frq_as_biallelic to TRUE but note ", + "these values will not be exact down to the missing ", + "frequencies of the other\nalternative allele(s)." ) if (nrow(sumstats_dt[match_type == FALSE, ]) > 0 && "FRQ" %in% effect_columns && - !bi_allelic_filter) { + !bi_allelic_filter && !flip_frq_as_biallelic) { stop(stp_msg) } + #if set flip_frq_as_biallelic = TRUE, let them know + if (nrow(sumstats_dt[match_type == FALSE, ]) > 0 && + "FRQ" %in% effect_columns && + !bi_allelic_filter && flip_frq_as_biallelic) { + print_msg <- paste0( + "Note: You have set flip_frq_as_biallelic to TRUE meaning of ", + "the ", + formatC(nrow(sumstats_dt[match_type == FALSE, ]), + big.mark = ","), + " SNPs to be flipped, ", + "any non-bi-allelic SNPs\nwill have their frequencies flipped ", + "as (1-FRQ) essentially ignoring the frequencies of any other ", + "alternative alleles." + ) + message(print_msg) + } for (eff_i in effect_columns) { # set updates quicker for DT # conversion done in case, VCF beta column may not be numeric if (eff_i == "FRQ") { diff --git a/R/format_sumstats.R b/R/format_sumstats.R index 2294c12..ce517e5 100644 --- a/R/format_sumstats.R +++ b/R/format_sumstats.R @@ -140,6 +140,10 @@ #' along with effect and z-score columns like Beta? Default TRUE. #' @param bi_allelic_filter Binary Should non-biallelic SNPs be removed. Default #' is TRUE. +#' @param flip_frq_as_biallelic Binary Should non-bi-allelic SNPs frequency +#' values be flipped as 1-p despite there being other alternative alleles? +#' Default is FALSE but if set to TRUE, this allows non-bi-allelic SNPs to be +#' kept despite needing flipping. #' @param snp_ids_are_rs_ids Binary Should the supplied SNP ID's be assumed to #' be RSIDs. If not, imputation using the SNP ID for other columns like #' base-pair position or chromosome will not be possible. If set to FALSE, the @@ -249,6 +253,7 @@ format_sumstats <- function(path, allele_flip_z = TRUE, allele_flip_frq = TRUE, bi_allelic_filter = TRUE, + flip_frq_as_biallelic = FALSE, snp_ids_are_rs_ids = TRUE, remove_multi_rs_snp = FALSE, frq_is_maf = TRUE, @@ -336,6 +341,7 @@ format_sumstats <- function(path, allele_flip_z = allele_flip_z, allele_flip_frq = allele_flip_frq, bi_allelic_filter = bi_allelic_filter, + flip_frq_as_biallelic = flip_frq_as_biallelic, snp_ids_are_rs_ids = snp_ids_are_rs_ids, remove_multi_rs_snp = remove_multi_rs_snp, frq_is_maf = frq_is_maf, @@ -702,6 +708,7 @@ format_sumstats <- function(path, allele_flip_z = allele_flip_z, allele_flip_frq = allele_flip_frq, bi_allelic_filter = bi_allelic_filter, + flip_frq_as_biallelic = flip_frq_as_biallelic, imputation_ind = imputation_ind, log_folder_ind = log_folder_ind, check_save_out = check_save_out, diff --git a/R/validate_parameters.R b/R/validate_parameters.R index 0d9686e..0e50c44 100644 --- a/R/validate_parameters.R +++ b/R/validate_parameters.R @@ -27,6 +27,7 @@ validate_parameters <- function(path, allele_flip_z, allele_flip_frq, bi_allelic_filter, + flip_frq_as_biallelic, snp_ids_are_rs_ids, remove_multi_rs_snp, frq_is_maf, @@ -242,6 +243,9 @@ validate_parameters <- function(path, if (!is.logical(bi_allelic_filter)) { stop("bi_allelic_filter must be either TRUE or FALSE") } + if (!is.logical(flip_frq_as_biallelic)) { + stop("flip_frq_as_biallelic must be either TRUE or FALSE") + } if (!is.logical(snp_ids_are_rs_ids)) { stop("snp_ids_are_rs_ids must be either TRUE or FALSE") } diff --git a/man/check_allele_flip.Rd b/man/check_allele_flip.Rd index 4e9e2d3..d536794 100644 --- a/man/check_allele_flip.Rd +++ b/man/check_allele_flip.Rd @@ -17,6 +17,7 @@ check_allele_flip( allele_flip_z, allele_flip_frq, bi_allelic_filter, + flip_frq_as_biallelic, imputation_ind, log_folder_ind, check_save_out, @@ -53,6 +54,11 @@ along with effect and z-score columns like Beta? Default TRUE.} \item{bi_allelic_filter}{Binary Should non-biallelic SNPs be removed. Default is TRUE.} +\item{flip_frq_as_biallelic}{Binary Should non-bi-allelic SNPs frequency +values be flipped as 1-p despite there being other alternative alleles? +Default is FALSE but if set to TRUE, this allows non-bi-allelic SNPs to be +kept despite needing flipping.} + \item{imputation_ind}{Binary Should a column be added for each imputation step to show what SNPs have imputed values for differing fields. This includes a field denoting SNP allele flipping (flipped). On the flipped diff --git a/man/format_sumstats.Rd b/man/format_sumstats.Rd index 7e1ab9a..55a3080 100644 --- a/man/format_sumstats.Rd +++ b/man/format_sumstats.Rd @@ -36,6 +36,7 @@ format_sumstats( allele_flip_z = TRUE, allele_flip_frq = TRUE, bi_allelic_filter = TRUE, + flip_frq_as_biallelic = FALSE, snp_ids_are_rs_ids = TRUE, remove_multi_rs_snp = FALSE, frq_is_maf = TRUE, @@ -198,6 +199,11 @@ along with effect and z-score columns like Beta? Default TRUE.} \item{bi_allelic_filter}{Binary Should non-biallelic SNPs be removed. Default is TRUE.} +\item{flip_frq_as_biallelic}{Binary Should non-bi-allelic SNPs frequency +values be flipped as 1-p despite there being other alternative alleles? +Default is FALSE but if set to TRUE, this allows non-bi-allelic SNPs to be +kept despite needing flipping.} + \item{snp_ids_are_rs_ids}{Binary Should the supplied SNP ID's be assumed to be RSIDs. If not, imputation using the SNP ID for other columns like base-pair position or chromosome will not be possible. If set to FALSE, the diff --git a/man/import_sumstats.Rd b/man/import_sumstats.Rd index 4539c5c..37c2339 100644 --- a/man/import_sumstats.Rd +++ b/man/import_sumstats.Rd @@ -164,6 +164,10 @@ not the P-value and so will be flipped i.e. default TRUE.} along with effect and z-score columns like Beta? Default TRUE.} \item{\code{bi_allelic_filter}}{Binary Should non-biallelic SNPs be removed. Default is TRUE.} + \item{\code{flip_frq_as_biallelic}}{Binary Should non-bi-allelic SNPs frequency +values be flipped as 1-p despite there being other alternative alleles? +Default is FALSE but if set to TRUE, this allows non-bi-allelic SNPs to be +kept despite needing flipping.} \item{\code{snp_ids_are_rs_ids}}{Binary Should the supplied SNP ID's be assumed to be RSIDs. If not, imputation using the SNP ID for other columns like base-pair position or chromosome will not be possible. If set to FALSE, the diff --git a/man/validate_parameters.Rd b/man/validate_parameters.Rd index ecf7a79..04b9948 100644 --- a/man/validate_parameters.Rd +++ b/man/validate_parameters.Rd @@ -29,6 +29,7 @@ validate_parameters( allele_flip_z, allele_flip_frq, bi_allelic_filter, + flip_frq_as_biallelic, snp_ids_are_rs_ids, remove_multi_rs_snp, frq_is_maf, @@ -146,6 +147,11 @@ along with effect and z-score columns like Beta? Default TRUE.} \item{bi_allelic_filter}{Binary Should non-biallelic SNPs be removed. Default is TRUE.} +\item{flip_frq_as_biallelic}{Binary Should non-bi-allelic SNPs frequency +values be flipped as 1-p despite there being other alternative alleles? +Default is FALSE but if set to TRUE, this allows non-bi-allelic SNPs to be +kept despite needing flipping.} + \item{snp_ids_are_rs_ids}{Binary Should the supplied SNP ID's be assumed to be RSIDs. If not, imputation using the SNP ID for other columns like base-pair position or chromosome will not be possible. If set to FALSE, the diff --git a/vignettes/MungeSumstats.Rmd b/vignettes/MungeSumstats.Rmd index 873b42c..3a2ad60 100644 --- a/vignettes/MungeSumstats.Rmd +++ b/vignettes/MungeSumstats.Rmd @@ -369,6 +369,10 @@ conducted by *MungeSumstats* are: Z-score columns (e.g. Beta). Default is TRUE. - **bi_allelic_filter** Binary, should non-biallelic SNPs be removed. Default is TRUE +- **flip_frq_as_biallelic** Binary, Should non-bi-allelic SNPs frequency + values be flipped as 1-p despite there being other alternative alleles? + Default is FALSE but if set to TRUE, this allows non-bi-allelic SNPs to be + kept despite needing flipping. - **snp_ids_are_rs_ids** Binary, should the SNP IDs inputted be inferred as RS IDs or some arbitrary ID. Default is TRUE.\ - **remove_multi_rs_snp** Binary Sometimes summary statistics can have