Skip to content

Commit

Permalink
processing gwas results
Browse files Browse the repository at this point in the history
  • Loading branch information
explodecomputer committed Oct 24, 2024
1 parent f4304cb commit 7668090
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 36 deletions.
40 changes: 4 additions & 36 deletions 04c-gwas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,9 @@ exec &> >(tee ${results_dir}/04/logfile_aggregate)
nchr=$(cat ${genotype_input_list} | grep -c '^')
echo $nchr

Rscript ${results_dir}/04 $nchr ${phenotype_processed_dir}/phenolist



nphen=$(cat ${phenotype_processed_dir}/phenolist | grep -c '^')

cat ${phenotype_processed_dir}/phenolist | xargs basename

phenotype_processed_dir="/local-scratch/projects/Lifecourse-GWAS/gib/alspac/phen_proc2"
echo $phenotype_processed_dir

gwas=${phenotype_processed_dir}/$(cat ${phenotype_processed_dir}/phenolist | head -n 10 | tail -n 1)
echo $gwas
for gwas in $(cat ${phenotype_processed_dir}/phenolist)
do
bn=$(basename $gwas | sed "s/.phen$//g")
echo $bn
out=${results_dir}/04/${bn}.regenie.gz
> ${out}
echo $out
# for i in 1:nchr
for i in $(seq 1 $nchr)
do
cat ${phenotype_processed_dir}/regenie/step2_${i}_${bn}.regenie.gz >> $out
done
done

ls -l /local-scratch/projects/Lifecourse-GWAS/gib/alspac/phen_proc2/regenie/step2_*_bmi_10-11_both.regenie.gz

ls -lh $out
ls -lh $out


less

libr
echo ${results_dir}/04
echo ${phenotype_processed_dir}/phenolist

Rscript resources/genotypes/process_regenie_results.r ${results_dir}/04 $nchr ${phenotype_processed_dir}/phenolist

echo "Successfully aggregated results"
103 changes: 103 additions & 0 deletions resources/genotypes/process_regenie_results.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
library(dplyr)
library(data.table)
library(parallel)


get_lambda <- function(pvector, pl=FALSE) {
pvector <- pvector[!is.na(pvector) & !is.nan(pvector) & !is.null(pvector) & is.finite(pvector) & pvector<1 & pvector>0]
o <- -log10(sort(pvector, decreasing=FALSE))
e <- -log10(ppoints(length(pvector)))
if(pl) {
plot(e, o, xlab="Expected -log10(p)", ylab="Observed -log10(p)", pch=20, col="blue")
abline(0, 1, col="red")
}
cs <- qchisq(1-pvector, 1)
lambda <- median(cs, na.rm=TRUE) / qchisq(0.5, 1)
return(lambda)
}

parse_file <- function(gwas, resdir) {
message("Reading data")
a <- lapply(gwas$fn, data.table::fread, header=TRUE, data.table=FALSE, nThread=1) %>% bind_rows() %>% as_tibble()
# a <- fread(fn, header=TRUE, data.table=FALSE, colClasses=c()) %>% as_tibble()
# a[,2] <- as.integer(a[,2, drop=TRUE])
# a[,6] <- as.numeric(a[,6, drop=TRUE])
# a[,7] <- as.numeric(a[,7, drop=TRUE])
# a[,8] <- as.numeric(a[,8, drop=TRUE])
# a[,10] <- as.numeric(a[,10, drop=TRUE])
# a[,11] <- as.numeric(a[,11, drop=TRUE])
# a[,12] <- as.numeric(a[,12, drop=TRUE])
# a[,13] <- as.numeric(a[,13, drop=TRUE])

message("Aligning")
a <- subset(a, !is.na(a$GENPOS))
flip <- a$ALLELE1 > a$ALLELE0
temp <- a$ALLELE1[flip]
a$ALLELE1[flip] <- a$ALLELE0[flip]
a$ALLELE0[flip] <- temp
a$BETA[flip] <- a$BETA[flip] * -1
a$A1FREQ[flip] <- 1 - a$A1FREQ[flip]

message("IDs")
a$ID <- paste0(a$CHROM, ":", a$GENPOS, "_", a$ALLELE1, "_", a$ALLELE0)
message("P-values")
a$pval <- 10^-a$LOG10P

message("Summarising")
nom <- gwas$gwas[1] %>% strsplit("_") %>% unlist
s <- tibble(
phen = nom[1],
age = nom[2],
sex = nom[3],
nsnp = nrow(a),
meanchisq = mean(a$CHISQ, na.rm=TRUE),
medianchisq = median(a$CHISQ, na.rm=TRUE),
lambda1 = medianchisq / qchisq(0.5, 1),
# lambda2 = get_lambda(a$pval),
minp = min(a$pval, na.rm=TRUE),
max_n = max(a$N, na.rm=TRUE),
min_n = min(a$N, na.rm=TRUE),
mean_n = mean(a$N, na.rm=TRUE)
)
message("Writing")
b <- a %>% dplyr::select(chr=CHROM, pos=GENPOS, id=ID, ea=ALLELE1, oa=ALLELE0, eaf=A1FREQ, beta=BETA, se=SE, n=N)
out <- file.path(resdir, paste0(gwas$gwas[1], ".regenie.gz"))
data.table::fwrite(b, file=out, row=FALSE, col=TRUE, qu=FALSE, sep="\t", nThread=1)

return(s)
}

args <- commandArgs(T)

resdir <- args[1]
nchr <- as.numeric(args[2])
phenolist <- args[3]

# resdir <- "/local-scratch/projects/Lifecourse-GWAS/gib/alspac/results3/04"
# nchr <- 22
# phenolist <- scan("/local-scratch/projects/Lifecourse-GWAS/gib/alspac/phen_proc2/phenolist", what="character")
phenolist <- scan(phenolist, what="character")

gwass <- lapply(phenolist, \(x) {
nom <- basename(x) %>% gsub(".phen$", "", .)
d <- dirname(x)
tibble(
gwas = nom,
fn = file.path(d, "regenie", paste0("step2_", 1:nchr, "_", nom, ".regenie.gz")),
exists = file.exists(fn)
)
}) %>% bind_rows()
table(gwass$exists)

if(!all(gwass$exists)) {
stop("The following files do not exist: ", paste0(gwass$fn[!gwass$exists], collapse="\n"))
}


ugwas <- unique(gwass$gwas)
gwas_summary <- mclapply(ugwas, \(x) {
message(x)
parse_file(subset(gwass, gwas == x), resdir)
}, mc.cores=as.numeric(Sys.getenv("env_threads")))

saveRDS(gwas_summary, file=file.path(resdir, "gwas_summary.rds"))

0 comments on commit 7668090

Please sign in to comment.