diff --git a/DESCRIPTION b/DESCRIPTION index 5676796..b70950f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MungeSumstats Type: Package Title: Standardise summary statistics from GWAS -Version: 1.13.2 +Version: 1.13.3 Authors@R: c(person(given = "Alan", family = "Murphy", diff --git a/NEWS.md b/NEWS.md index 3ee7a90..7d1c822 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +## CHANGES IN VERSION 1.13.3 + +### Bug fix +* Bug fix for check 3 in infer effect column - previously A1 & A2 were swapped +when there were more matches for the ref genome in A1 rather than A2 which was +incorrect. Corrected now so it will only be flipped when A2 has more matches to +the reference genome. + ## CHANGES IN VERSION 1.13.2 ### New features diff --git a/R/check_allele_flip.R b/R/check_allele_flip.R index 86cf1da..0914423 100644 --- a/R/check_allele_flip.R +++ b/R/check_allele_flip.R @@ -96,6 +96,7 @@ check_allele_flip <- function(sumstats_dt, path, sumstats_dt[is.na(ref_gen_allele), match_type := TRUE] sumstats_dt[A1 == ref_gen_allele, match_type := TRUE] sumstats_dt[A2 == ref_gen_allele, match_type := FALSE] + print(sumstats_dt) # drop cases that don't match either if (allele_flip_drop && nrow(sumstats_dt[A1 != ref_gen_allele & diff --git a/R/format_sumstats.R b/R/format_sumstats.R index 4a6a619..1310bb7 100644 --- a/R/format_sumstats.R +++ b/R/format_sumstats.R @@ -486,7 +486,7 @@ format_sumstats <- function(path, #### Check 40:Check for log10 p instead of p #### sumstats_return <- read_log_pval(sumstats_dt = sumstats_return$sumstats_dt) - + #### Check 2:Check for effect direction #### sumstats_return <- infer_effect_column( diff --git a/R/get_genome_build.R b/R/get_genome_build.R index 3011b60..e2b6a36 100644 --- a/R/get_genome_build.R +++ b/R/get_genome_build.R @@ -181,6 +181,7 @@ get_genome_build <- function(sumstats, on = c("SNP"="SNP","pos"="BP","seqnames"="CHR"), nomatch = FALSE ]) + if (num_37 > num_38) { ref_gen_num <- num_37 ref_genome <- "GRCH37" @@ -221,11 +222,12 @@ get_genome_build <- function(sumstats, "ref_allele"="A2","alt_alleles"="A1"), nomatch = FALSE ]) - if(num_a2>=num_a1){ + + if(num_a1>=num_a2){ message("Effect/frq column(s) relate to A2 in the inputted sumstats") #this is what MSS expects so no action required switch_req <- FALSE - }else{#num_a2=2){ @@ -95,7 +97,7 @@ infer_effect_column <- #less than 2 in total means allele info is missing which MSS can try fill #in later message("Allele columns are ambiguous, attempting to infer direction") - #get names for allele mared eff/frq columns + #get names for allele marked eff/frq columns eff_frq_allele_matches <- get_eff_frq_allele_combns() #now look for matches in sumstats fnd_allele_indicator <- @@ -107,10 +109,10 @@ infer_effect_column <- a1_mtch <- sum(grepl("A1",fnd_allele_indicator)) a2_mtch <- sum(grepl("A2",fnd_allele_indicator)) if(a2_mtch>=a1_mtch){ - message("Effect/frq column(s) relate to A2 in the inputted sumstats") + message("Effect/frq column(s) relate to A2 in the sumstats") #this is what MSS expects so no action required }else{#a2_mtch