From 449624a8e79d202fe701f701ac5e4a6c895c0a4f Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:23:03 +0000 Subject: [PATCH 01/40] Delete not needed library(TwoSampleMR) calls --- tests/testthat/test_add_metadata.r | 1 - tests/testthat/test_eve.R | 1 - tests/testthat/test_harmonise.R | 1 - tests/testthat/test_heterogeneity.R | 1 - tests/testthat/test_instruments.R | 1 - tests/testthat/test_ld.R | 1 - tests/testthat/test_ldsc.R | 1 - tests/testthat/test_leaveoneout.R | 1 - tests/testthat/test_mvmr.R | 1 - tests/testthat/test_otherformats.R | 1 - tests/testthat/test_outcomes.R | 1 - tests/testthat/test_plots.R | 1 - tests/testthat/test_rsq.r | 1 - tests/testthat/test_singlesnp.R | 1 - tests/testthat/test_steiger.R | 1 - 15 files changed, 15 deletions(-) diff --git a/tests/testthat/test_add_metadata.r b/tests/testthat/test_add_metadata.r index 6d23f96a..16f72e69 100644 --- a/tests/testthat/test_add_metadata.r +++ b/tests/testthat/test_add_metadata.r @@ -1,5 +1,4 @@ context("add metadata") -library(TwoSampleMR) test_that("exposure data 1", { d <- extract_instruments('ieu-a-2') %>% add_metadata() diff --git a/tests/testthat/test_eve.R b/tests/testthat/test_eve.R index 29ae2fd7..6b05892f 100644 --- a/tests/testthat/test_eve.R +++ b/tests/testthat/test_eve.R @@ -1,5 +1,4 @@ context("eve") -library(TwoSampleMR) dat <- make_dat("ieu-a-2", "ieu-a-7") %>% add_metadata() diff --git a/tests/testthat/test_harmonise.R b/tests/testthat/test_harmonise.R index e32ca8ed..d3f5b3f3 100644 --- a/tests/testthat/test_harmonise.R +++ b/tests/testthat/test_harmonise.R @@ -1,5 +1,4 @@ context("harmonise") -library(TwoSampleMR) exp_dat <- extract_instruments("ieu-a-2") out_dat <- extract_outcome_data(exp_dat$SNP, "ieu-a-7") diff --git a/tests/testthat/test_heterogeneity.R b/tests/testthat/test_heterogeneity.R index 70e4053c..c1d99bd7 100644 --- a/tests/testthat/test_heterogeneity.R +++ b/tests/testthat/test_heterogeneity.R @@ -1,5 +1,4 @@ context("heterogeneity") -library(TwoSampleMR) dat <- make_dat() diff --git a/tests/testthat/test_instruments.R b/tests/testthat/test_instruments.R index 76059d71..4bbc86f6 100644 --- a/tests/testthat/test_instruments.R +++ b/tests/testthat/test_instruments.R @@ -1,5 +1,4 @@ context("Instruments") -library(TwoSampleMR) test_that("server and mrinstruments", { diff --git a/tests/testthat/test_ld.R b/tests/testthat/test_ld.R index 9e726bbd..33c85d21 100644 --- a/tests/testthat/test_ld.R +++ b/tests/testthat/test_ld.R @@ -1,5 +1,4 @@ context("ld") -library(TwoSampleMR) a <- extract_instruments(2, clump=FALSE) diff --git a/tests/testthat/test_ldsc.R b/tests/testthat/test_ldsc.R index f46b4103..290a9796 100644 --- a/tests/testthat/test_ldsc.R +++ b/tests/testthat/test_ldsc.R @@ -1,5 +1,4 @@ context("LDSC") -library(TwoSampleMR) test_that("get afl2", { diff --git a/tests/testthat/test_leaveoneout.R b/tests/testthat/test_leaveoneout.R index 0c5b6645..1b5832c0 100644 --- a/tests/testthat/test_leaveoneout.R +++ b/tests/testthat/test_leaveoneout.R @@ -1,5 +1,4 @@ context("leaveoneout") -library(TwoSampleMR) dat <- make_dat(2, 7) diff --git a/tests/testthat/test_mvmr.R b/tests/testthat/test_mvmr.R index 3bba4a2e..1e7d0577 100644 --- a/tests/testthat/test_mvmr.R +++ b/tests/testthat/test_mvmr.R @@ -1,5 +1,4 @@ context("mvmr") -library(TwoSampleMR) test_that("control", { lipids <- mv_extract_exposures(c("ieu-a-299","ieu-a-300","ieu-a-302")) diff --git a/tests/testthat/test_otherformats.R b/tests/testthat/test_otherformats.R index 4201513c..5ddcfcb1 100644 --- a/tests/testthat/test_otherformats.R +++ b/tests/testthat/test_otherformats.R @@ -1,5 +1,4 @@ context("other formats") -library(TwoSampleMR) dat <- make_dat("ieu-a-2", "ieu-a-7") diff --git a/tests/testthat/test_outcomes.R b/tests/testthat/test_outcomes.R index 0e8d4711..feb86afd 100644 --- a/tests/testthat/test_outcomes.R +++ b/tests/testthat/test_outcomes.R @@ -1,5 +1,4 @@ context("outcome") -library(TwoSampleMR) test_that("outcomes", { diff --git a/tests/testthat/test_plots.R b/tests/testthat/test_plots.R index 59fba0fa..428791a4 100644 --- a/tests/testthat/test_plots.R +++ b/tests/testthat/test_plots.R @@ -1,5 +1,4 @@ context("plots") -library(TwoSampleMR) test_that("scatter plot", { dat <- make_dat(2,7) diff --git a/tests/testthat/test_rsq.r b/tests/testthat/test_rsq.r index 29d63997..057edfd9 100644 --- a/tests/testthat/test_rsq.r +++ b/tests/testthat/test_rsq.r @@ -1,5 +1,4 @@ context("add rsq") -library(TwoSampleMR) test_that("exposure data 1", { d <- extract_instruments('ieu-a-2') %>% add_rsq() diff --git a/tests/testthat/test_singlesnp.R b/tests/testthat/test_singlesnp.R index c87a920f..bd8cf63d 100644 --- a/tests/testthat/test_singlesnp.R +++ b/tests/testthat/test_singlesnp.R @@ -1,5 +1,4 @@ context("singlesnp") -library(TwoSampleMR) dat <- make_dat("ieu-a-2", "ieu-a-7") diff --git a/tests/testthat/test_steiger.R b/tests/testthat/test_steiger.R index 5c995df1..1d7ad56c 100644 --- a/tests/testthat/test_steiger.R +++ b/tests/testthat/test_steiger.R @@ -1,5 +1,4 @@ context("steiger") -library(TwoSampleMR) w <- make_dat(2, 7) From 3809c3a0a87d9b755582f529019fa70c0fe72b09 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:19:28 +0000 Subject: [PATCH 02/40] Use string GWAS IDs --- tests/testthat/test_leaveoneout.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_leaveoneout.R b/tests/testthat/test_leaveoneout.R index 1b5832c0..ed0d4665 100644 --- a/tests/testthat/test_leaveoneout.R +++ b/tests/testthat/test_leaveoneout.R @@ -1,6 +1,6 @@ context("leaveoneout") -dat <- make_dat(2, 7) +dat <- make_dat("ieu-a-2", "ieu-a-7") test_that("leaveoneout", { w <- mr_leaveoneout(dat) From 210617b2c54760bdb765f266384c7bebb4acf9c9 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:29:02 +0000 Subject: [PATCH 03/40] Use string GWAS IDs --- R/make_dat.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/make_dat.R b/R/make_dat.R index 9e0956cd..ea9a8fe2 100644 --- a/R/make_dat.R +++ b/R/make_dat.R @@ -2,13 +2,13 @@ #' #' Convenient function to create a harmonised dataset. #' -#' @param exposures The default is `c(2, 301)` (BMI and LDL). -#' @param outcomes The default is `c(7, 1001)` (EDU and CHD). #' @param proxies Look for proxies? Default = TRUE +#' @param exposures The default is `c("ieu-a-2", "ieu-a-301")` (BMI and LDL). +#' @param outcomes The default is `c("ieu-a-7", "ieu-a-1001")` (CHD and EDU). #' #' @export #' @return Harmonised data frame -make_dat <- function(exposures=c(2,301), outcomes=c(7,1001), proxies=TRUE) +make_dat <- function(exposures=c("ieu-a-2","ieu-a-301"), outcomes=c("ieu-a-7","ieu-a-1001"), proxies=TRUE) { a <- extract_instruments(exposures) b <- extract_outcome_data(a$SNP, outcomes, proxies=proxies) From 164b0ff14d933e6d403c766b3fa11f9e3a8f58a8 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:29:12 +0000 Subject: [PATCH 04/40] Improve helpfile formatting --- R/make_dat.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/make_dat.R b/R/make_dat.R index ea9a8fe2..e5708d85 100644 --- a/R/make_dat.R +++ b/R/make_dat.R @@ -2,9 +2,9 @@ #' #' Convenient function to create a harmonised dataset. #' -#' @param proxies Look for proxies? Default = TRUE #' @param exposures The default is `c("ieu-a-2", "ieu-a-301")` (BMI and LDL). #' @param outcomes The default is `c("ieu-a-7", "ieu-a-1001")` (CHD and EDU). +#' @param proxies Look for proxies? Default = `TRUE` #' #' @export #' @return Harmonised data frame From 597e01fb2fdc5c1f38a7cec76edf87ac52551a45 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:33:36 +0000 Subject: [PATCH 05/40] devtools::document() --- man/make_dat.Rd | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/man/make_dat.Rd b/man/make_dat.Rd index 8342ee20..4162af50 100644 --- a/man/make_dat.Rd +++ b/man/make_dat.Rd @@ -4,14 +4,18 @@ \alias{make_dat} \title{Convenient function to create a harmonised dataset} \usage{ -make_dat(exposures = c(2, 301), outcomes = c(7, 1001), proxies = TRUE) +make_dat( + exposures = c("ieu-a-2", "ieu-a-301"), + outcomes = c("ieu-a-7", "ieu-a-1001"), + proxies = TRUE +) } \arguments{ -\item{exposures}{The default is \code{c(2, 301)} (BMI and LDL).} +\item{exposures}{The default is \code{c("ieu-a-2", "ieu-a-301")} (BMI and LDL).} -\item{outcomes}{The default is \code{c(7, 1001)} (EDU and CHD).} +\item{outcomes}{The default is \code{c("ieu-a-7", "ieu-a-1001")} (CHD and EDU).} -\item{proxies}{Look for proxies? Default = TRUE} +\item{proxies}{Look for proxies? Default = \code{TRUE}} } \value{ Harmonised data frame From 26c7949057585166042e06ee2ae26ad77a7bba9d Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:33:49 +0000 Subject: [PATCH 06/40] Bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 056c2b0e..6e9e22a8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TwoSampleMR Title: Two Sample MR Functions and Interface to MR Base Database -Version: 0.5.8 +Version: 0.5.9 Authors@R: c( person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0920-1055")), From 504d841c8f2b2d4a3b1ee0e598cdb159e5da3b2a Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:33:59 +0000 Subject: [PATCH 07/40] Update NEWS.md --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index 96afebb6..a35698f1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# TwoSampleMR v0.5.9 + +* Minor improvements to `make_dat()` default arguments and helpfile +* Minor improvements to package tests + # TwoSampleMR v0.5.8 (Release date: 2023-11-16) From 305a31447c1ba5de37209b041924fcf5413681a7 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:57:57 +0000 Subject: [PATCH 08/40] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index a35698f1..d4a5b7a8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # TwoSampleMR v0.5.9 +* Fixed a minor issue in `dat_to_RadialMR()` * Minor improvements to `make_dat()` default arguments and helpfile * Minor improvements to package tests From 1a5e33e166856e85e592e108300dca1c8f2577b7 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Wed, 20 Dec 2023 10:54:26 +0000 Subject: [PATCH 09/40] Bump actions/upload-artifact to v4 --- .github/workflows/test-coverage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 6642deb4..191adb76 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -42,7 +42,7 @@ jobs: - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package From 25630447ac06e2c33654614fe0295213e6dc4d3d Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 28 Nov 2023 13:57:25 +0000 Subject: [PATCH 10/40] Fix subset call Co-Authored-By: gibran hemani <1924940+explodecomputer@users.noreply.github.com> --- R/other_formats.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/other_formats.R b/R/other_formats.R index adb0fda3..37fedc96 100644 --- a/R/other_formats.R +++ b/R/other_formats.R @@ -172,7 +172,7 @@ dat_to_RadialMR <- function(dat) message("Converting:") message(" - exposure: ", x$exposure[1]) message(" - outcome: ", x$outcome[1]) - d <- subset(x, mr_keep=TRUE) + d <- subset(x, mr_keep) d <- RadialMR::format_radial(d$beta.exposure, d$beta.outcome, d$se.exposure, d$se.outcome, RSID=d$SNP) return(d) }) From a195383c7de36cb416fd4b4b44c886b3c1d3c9f1 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 15:43:22 +0000 Subject: [PATCH 11/40] Bump roxygen2 version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e9e22a8..441926c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -68,4 +68,4 @@ Remotes: WSpiller/RadialMR Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 From 1662a8231f341cd64c32bb20309382f43b2b30a3 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 18:56:45 +0000 Subject: [PATCH 12/40] usethis::use_package_doc() --- R/TwoSampleMR-package.R | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/R/TwoSampleMR-package.R b/R/TwoSampleMR-package.R index df79c7fb..a65cf643 100644 --- a/R/TwoSampleMR-package.R +++ b/R/TwoSampleMR-package.R @@ -1,14 +1,6 @@ -#' TwoSampleMR: Two Sample MR functions and interface to MR Base database -#' -#' A package for performing Mendelian randomization using GWAS summary data. -#' It uses the [IEU GWAS database](https://gwas.mrcieu.ac.uk/) to obtain data automatically, -#' and a wide range of methods to run the analysis. You can use the [MR-Base web app](https://www.mrbase.org/) -#' to try out a limited range of the functionality in this package, -#' but for any serious work we strongly recommend using this R package. -#' -#' **Full documentation available here:** [https://mrcieu.github.io/TwoSampleMR](https://mrcieu.github.io/TwoSampleMR/) -#' -#' @name TwoSampleMR-package -#' @aliases TwoSampleMR twosamplemr -#' @docType package +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end NULL From 25658de025a2f2fd2ad9cb65405fd26b88f5af1c Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 15:44:25 +0000 Subject: [PATCH 13/40] Move content from TwoSampleMR-package.R --- DESCRIPTION | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 441926c5..b8b0989e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,9 +16,10 @@ Authors@R: c( comment = c(ORCID = "0000-0003-4655-4511")) ) Description: A package for performing Mendelian randomization using GWAS - summary data. It uses the IEU GWAS database to obtain data - automatically, and a wide range of methods to run the analysis. You - can use the MR-Base web app to try out a limited range of the + summary data. It uses the IEU GWAS database + to automatically obtain data, and a wide + range of methods to run the analysis. You can use the MR-Base web app + to try out a limited range of the functionality in this package, but for any serious work we strongly recommend using this R package. License: MIT + file LICENSE From f804400eaffd7c443815e42c5aae7cbd8138595a Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:11:11 +0000 Subject: [PATCH 14/40] devtools::document() --- man/TwoSampleMR-package.Rd | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/man/TwoSampleMR-package.Rd b/man/TwoSampleMR-package.Rd index b36eedb4..4677d2ef 100644 --- a/man/TwoSampleMR-package.Rd +++ b/man/TwoSampleMR-package.Rd @@ -2,17 +2,32 @@ % Please edit documentation in R/TwoSampleMR-package.R \docType{package} \name{TwoSampleMR-package} -\alias{TwoSampleMR-package} \alias{TwoSampleMR} -\alias{twosamplemr} -\title{TwoSampleMR: Two Sample MR functions and interface to MR Base database} +\alias{TwoSampleMR-package} +\title{TwoSampleMR: Two Sample MR Functions and Interface to MR Base Database} \description{ -A package for performing Mendelian randomization using GWAS summary data. -It uses the \href{https://gwas.mrcieu.ac.uk/}{IEU GWAS database} to obtain data automatically, -and a wide range of methods to run the analysis. You can use the \href{https://www.mrbase.org/}{MR-Base web app} -to try out a limited range of the functionality in this package, -but for any serious work we strongly recommend using this R package. +A package for performing Mendelian randomization using GWAS summary data. It uses the IEU GWAS database \url{https://gwas.mrcieu.ac.uk/} to automatically obtain data, and a wide range of methods to run the analysis. You can use the MR-Base web app \url{https://www.mrbase.org/} to try out a limited range of the functionality in this package, but for any serious work we strongly recommend using this R package. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/MRCIEU/TwoSampleMR} + \item \url{https://mrcieu.github.io/TwoSampleMR/} + \item Report bugs at \url{https://github.com/MRCIEU/TwoSampleMR/issues/} +} + +} +\author{ +\strong{Maintainer}: Gibran Hemani \email{g.hemani@bristol.ac.uk} (\href{https://orcid.org/0000-0003-0920-1055}{ORCID}) + +Authors: +\itemize{ + \item Philip Haycock \email{philip.haycock@bristol.ac.uk} (\href{https://orcid.org/0000-0001-5001-3350}{ORCID}) + \item Jie Zheng \email{Jie.Zheng@bristol.ac.uk} (\href{https://orcid.org/0000-0002-6623-6839}{ORCID}) + \item Tom Gaunt \email{Tom.Gaunt@bristol.ac.uk} (\href{https://orcid.org/0000-0003-0924-3247}{ORCID}) + \item Ben Elsworth \email{Ben.Elsworth@bristol.ac.uk} (\href{https://orcid.org/0000-0001-7328-4233}{ORCID}) + \item Tom Palmer \email{tom.palmer@bristol.ac.uk} (\href{https://orcid.org/0000-0003-4655-4511}{ORCID}) } -\details{ -\strong{Full documentation available here:} \href{https://mrcieu.github.io/TwoSampleMR/}{https://mrcieu.github.io/TwoSampleMR} + } +\keyword{internal} From 39671a9007b9dba12119e371f72b34c83b1787f2 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:46:11 +0000 Subject: [PATCH 15/40] Add trailing / to URL --- README.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.Rmd b/README.Rmd index 1e85584b..7c151e55 100644 --- a/README.Rmd +++ b/README.Rmd @@ -9,4 +9,4 @@ output: github_document -**Full documentation available here:** https://mrcieu.github.io/TwoSampleMR +**Full documentation available here:** https://mrcieu.github.io/TwoSampleMR/ From 43458b5e10e72df075ff448c595c5caf42ef3486 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:46:37 +0000 Subject: [PATCH 16/40] Amend URL to CRAN canonical form --- vignettes/perform_mr.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/perform_mr.Rmd b/vignettes/perform_mr.Rmd index c0a71df7..3431e38e 100644 --- a/vignettes/perform_mr.Rmd +++ b/vignettes/perform_mr.Rmd @@ -601,7 +601,7 @@ If you want to perform analysis with your local summary data (i.e. not in the Op In the examples shown so far it is assumed that instruments are independent (i.e. they are not in linkage disequilibrium, LD). This is to avoid 'double counting' effects. An alternative approach is to estimate the MR effects accounting for the correlation between variants. -The TwoSampleMR package has not implemented this yet, but the [MendelianRandomization](https://cran.r-project.org/web/packages/MendelianRandomization/index.html) R package by Olena Yavorska and Stephen Burgess does have this functionality. We can use the TwoSampleMR package to extract, format and harmonise data, and then convert to the format required by the MendelianRandomization package. The IEU GWAS database server has the individual level genetic data for ~500 Europeans in 1000 genomes data, and can obtain the LD matrix for a set of SNPs using these data. For example: +The TwoSampleMR package has not implemented this yet, but the [MendelianRandomization](https://CRAN.R-project.org/package=MendelianRandomization) R package by Olena Yavorska and Stephen Burgess does have this functionality. We can use the TwoSampleMR package to extract, format and harmonise data, and then convert to the format required by the MendelianRandomization package. The IEU GWAS database server has the individual level genetic data for ~500 Europeans in 1000 genomes data, and can obtain the LD matrix for a set of SNPs using these data. For example: ```{r eval=FALSE} snplist <- c("rs234", "rs1205") From fbe3a9267784f74092181be16f5a8525668d9a7c Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:47:04 +0000 Subject: [PATCH 17/40] Update PHESANT pubmed URL --- vignettes/gwas2020.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/gwas2020.Rmd b/vignettes/gwas2020.Rmd index 0a0658c9..e85719e0 100644 --- a/vignettes/gwas2020.Rmd +++ b/vignettes/gwas2020.Rmd @@ -25,7 +25,7 @@ Another change is that the R package that managed the authentication has updated ### UKBiobank data has been curated -We conducted a large GWAS analysis using a pipeline that systematically analysed every [PHESANT](https://www.ncbi.nlm.nih.gov/pubmed/29040602) phenotype in UK Biobank. There were previously ~20k traits with complete GWAS data, but a majority of these were binary traits based on very few numbers of cases. We have now filtered out unreliable datasets, there are 2514 traits remaining, with any binary traits removed that had fewer than 1000 cases. Another issue is the combination of small numbers of cases and allele frequency - here minor allele count (MAC) for a particular association could be very small which would lead to high false positives when using Bolt-LMM. The remaining traits have been filtered to only retain associations where the MAC > 90. +We conducted a large GWAS analysis using a pipeline that systematically analysed every [PHESANT](https://pubmed.ncbi.nlm.nih.gov/29040602/) phenotype in UK Biobank. There were previously ~20k traits with complete GWAS data, but a majority of these were binary traits based on very few numbers of cases. We have now filtered out unreliable datasets, there are 2514 traits remaining, with any binary traits removed that had fewer than 1000 cases. Another issue is the combination of small numbers of cases and allele frequency - here minor allele count (MAC) for a particular association could be very small which would lead to high false positives when using Bolt-LMM. The remaining traits have been filtered to only retain associations where the MAC > 90. Document detailing this investigation here: https://htmlpreview.github.io/?https://raw.githubusercontent.com/MRCIEU/ukbb-gwas-analysis/master/docs/ldsc_clumped_analysis.html?token=AAOV6TBQXEXEPT7SUXXLWMC6DWP3O From 9a4f2973fdd027c44f044ef01c5f85cd76517655 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:47:21 +0000 Subject: [PATCH 18/40] Update opengwas-requests URL --- vignettes/gwas2020.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/gwas2020.Rmd b/vignettes/gwas2020.Rmd index e85719e0..8b39e258 100644 --- a/vignettes/gwas2020.Rmd +++ b/vignettes/gwas2020.Rmd @@ -109,7 +109,7 @@ Either the data in the database, or the GWAS VCF files, can be queried and the r ## How to request new data -We have setup a github issues page here: https://github.com/MRCIEU/igd-data-requests/issues +We have setup a github issues page here: https://github.com/MRCIEU/opengwas-requests/issues Please visit here to make a log of new data requests, or to contribute new data. From e580373721ef383fa8c9ebfc31f840fae93ff5ef Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:47:44 +0000 Subject: [PATCH 19/40] Add <> around URL --- index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.md b/index.md index 1c798b3e..7ce95412 100644 --- a/index.md +++ b/index.md @@ -10,7 +10,7 @@ A package for performing Mendelian randomization using GWAS summary data. It use ## January 2020 major update -**We have made substantial changes to the package, database and reference panels.** For full details of the changes, please visit https://mrcieu.github.io/TwoSampleMR/articles/gwas2020.html +**We have made substantial changes to the package, database and reference panels.** For full details of the changes, please visit ## Installation From 46fbc8ceefe9228d8b6fd602dd1e1ad20b2fb0d1 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:48:08 +0000 Subject: [PATCH 20/40] Update lifecycle URL --- index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.md b/index.md index 7ce95412..117099f8 100644 --- a/index.md +++ b/index.md @@ -2,8 +2,8 @@ [![R-CMD-check](https://github.com/MRCIEU/TwoSampleMR/actions/workflows/check-full.yaml/badge.svg)](https://github.com/MRCIEU/TwoSampleMR/actions/workflows/check-full.yaml) -[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) [![DOI](https://zenodo.org/badge/49515156.svg)](https://zenodo.org/badge/latestdoi/49515156) [![Codecov test coverage](https://codecov.io/gh/MRCIEU/TwoSampleMR/branch/master/graph/badge.svg)](https://codecov.io/gh/MRCIEU/TwoSampleMR?branch=master) +[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![DOI](https://zenodo.org/badge/49515156.svg)](https://zenodo.org/badge/latestdoi/49515156) A package for performing Mendelian randomization using GWAS summary data. It uses the [IEU GWAS database](https://gwas.mrcieu.ac.uk/) to obtain data automatically, and a wide range of methods to run the analysis. You can use the [MR-Base web app](https://www.mrbase.org/) to try out a limited range of the functionality in this package, but for any serious work we strongly recommend using this R package. From 055e3fb8a3825b510442ae3c9cbba80fd92829bc Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:48:25 +0000 Subject: [PATCH 21/40] Update codecov URL --- index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.md b/index.md index 117099f8..0c1cdafe 100644 --- a/index.md +++ b/index.md @@ -2,8 +2,8 @@ [![R-CMD-check](https://github.com/MRCIEU/TwoSampleMR/actions/workflows/check-full.yaml/badge.svg)](https://github.com/MRCIEU/TwoSampleMR/actions/workflows/check-full.yaml) -[![Codecov test coverage](https://codecov.io/gh/MRCIEU/TwoSampleMR/branch/master/graph/badge.svg)](https://codecov.io/gh/MRCIEU/TwoSampleMR?branch=master) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![DOI](https://zenodo.org/badge/49515156.svg)](https://zenodo.org/badge/latestdoi/49515156) +[![Codecov test coverage](https://codecov.io/gh/MRCIEU/TwoSampleMR/branch/master/graph/badge.svg)](https://app.codecov.io/gh/MRCIEU/TwoSampleMR?branch=master) A package for performing Mendelian randomization using GWAS summary data. It uses the [IEU GWAS database](https://gwas.mrcieu.ac.uk/) to obtain data automatically, and a wide range of methods to run the analysis. You can use the [MR-Base web app](https://www.mrbase.org/) to try out a limited range of the functionality in this package, but for any serious work we strongly recommend using this R package. From 72a978141004e3bc95d7a4edaf624672ab03c0ad Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:48:59 +0000 Subject: [PATCH 22/40] rmarkdown::render('README.Rmd') --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 842ef110..e77bfd44 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,10 @@ [![R-CMD-check](https://github.com/MRCIEU/TwoSampleMR/actions/workflows/check-full.yaml/badge.svg)](https://github.com/MRCIEU/TwoSampleMR/actions/workflows/check-full.yaml) [![Lifecycle: -experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) +experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![DOI](https://zenodo.org/badge/49515156.svg)](https://zenodo.org/badge/latestdoi/49515156) [![Codecov test -coverage](https://codecov.io/gh/MRCIEU/TwoSampleMR/branch/master/graph/badge.svg)](https://codecov.io/gh/MRCIEU/TwoSampleMR?branch=master) +coverage](https://codecov.io/gh/MRCIEU/TwoSampleMR/branch/master/graph/badge.svg)](https://app.codecov.io/gh/MRCIEU/TwoSampleMR?branch=master) A package for performing Mendelian randomization using GWAS summary @@ -56,4 +56,4 @@ is available here: **Full documentation available here:** - + From 921cbeeb2758b909e0e2c4dfe81075d0df9ea4e8 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:49:18 +0000 Subject: [PATCH 23/40] Update bioRxiv URL --- R/moe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/moe.R b/R/moe.R index d37f6afd..fcbb278a 100644 --- a/R/moe.R +++ b/R/moe.R @@ -151,7 +151,7 @@ get_rsq <- function(dat) #' Mixture of experts #' -#' Based on the method described here \url{https://www.biorxiv.org/content/early/2017/08/23/173682}. +#' Based on the method described here \url{https://www.biorxiv.org/content/10.1101/173682v2}. #' Once all MR methods have been applied to a summary set, you can then use the mixture of experts to predict the method most likely to be the most accurate. #' #' @param res Output from [mr_wrapper()]. From 034a948810fe8e5acad53a7bdf48a36eabac7f89 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:49:40 +0000 Subject: [PATCH 24/40] Use \doi{} instead of --- R/add_rsq.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_rsq.r b/R/add_rsq.r index 11cc1ac7..86f563d5 100644 --- a/R/add_rsq.r +++ b/R/add_rsq.r @@ -222,7 +222,7 @@ compareNA <- function(v1,v2) { #' Estimate proportion of variance of liability explained by SNP in general population #' #' This uses equation 10 in Lee et al. A Better Coefficient of Determination for Genetic Profile Analysis. -#' Genetic Epidemiology 36: 214–224 (2012) . +#' Genetic Epidemiology 36: 214–224 (2012) \doi{10.1002/gepi.21614}. #' #' @param lor Vector of Log odds ratio. #' @param af Vector of allele frequencies. From bf751bf283c7955304a49d218c8dcba3239e6b99 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 11 Jan 2024 19:52:18 +0000 Subject: [PATCH 25/40] devtools::document() --- man/get_r_from_lor.Rd | 2 +- man/mr_moe.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/get_r_from_lor.Rd b/man/get_r_from_lor.Rd index 90646a9b..f44f275f 100644 --- a/man/get_r_from_lor.Rd +++ b/man/get_r_from_lor.Rd @@ -34,5 +34,5 @@ Vector of signed r values } \description{ This uses equation 10 in Lee et al. A Better Coefficient of Determination for Genetic Profile Analysis. -Genetic Epidemiology 36: 214–224 (2012) \url{https://doi.org/10.1002/gepi.21614}. +Genetic Epidemiology 36: 214–224 (2012) \doi{10.1002/gepi.21614}. } diff --git a/man/mr_moe.Rd b/man/mr_moe.Rd index aee6cb64..bf8348e0 100644 --- a/man/mr_moe.Rd +++ b/man/mr_moe.Rd @@ -15,7 +15,7 @@ mr_moe(res, rf) List } \description{ -Based on the method described here \url{https://www.biorxiv.org/content/early/2017/08/23/173682}. +Based on the method described here \url{https://www.biorxiv.org/content/10.1101/173682v2}. Once all MR methods have been applied to a summary set, you can then use the mixture of experts to predict the method most likely to be the most accurate. } \details{ From c4d705ba04c86644d83be890b1bf947f4d525d47 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 23 Jan 2024 06:57:01 +0000 Subject: [PATCH 26/40] Bump roxygen2 version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b8b0989e..fb0ab9bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,4 +69,4 @@ Remotes: WSpiller/RadialMR Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 From 5de7ebc906eaf935d55ca527c22e202cc417fe48 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 09:56:10 +0000 Subject: [PATCH 27/40] Run tests on macos-14 M1 runners --- .github/workflows/check-full.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 028d375d..dfc9e6ef 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -19,6 +19,7 @@ jobs: fail-fast: false matrix: config: + - {os: macos-14, r: 'release'} - {os: macOS-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} From c1cdac356011d4b7caf00e8e2a7dc1202b82f785 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 09:58:45 +0000 Subject: [PATCH 28/40] Add GHA bullet to NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d4a5b7a8..888033c6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * Fixed a minor issue in `dat_to_RadialMR()` * Minor improvements to `make_dat()` default arguments and helpfile * Minor improvements to package tests +* Amendments to GitHub Actions workflows # TwoSampleMR v0.5.8 From 420372c5cefe6fa4399a93c072fe5abbd0e4011b Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:07:45 +0000 Subject: [PATCH 29/40] Add updated URLs NEWS bullet --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 888033c6..61bd6d08 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Minor improvements to `make_dat()` default arguments and helpfile * Minor improvements to package tests * Amendments to GitHub Actions workflows +* Updated several URLs which had changed # TwoSampleMR v0.5.8 From 3ee2508a78b5d6eceef8c18f4df923e9e9c07c87 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:18:40 +0000 Subject: [PATCH 30/40] Delete whitespace --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 61bd6d08..766a4d09 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,7 +14,7 @@ * Updated URL to R-CMD-check README badge * Updates to GitHub Actions workflows -TwoSampleMR v0.5.7 +TwoSampleMR v0.5.7 ============== (Release date: 2023-05-29) From dcd2ee7d8dec6fe744c89d0e99ab213083547a8c Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:22:20 +0000 Subject: [PATCH 31/40] Delete whitespace --- vignettes/gwas2020.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/gwas2020.Rmd b/vignettes/gwas2020.Rmd index 8b39e258..4d9be914 100644 --- a/vignettes/gwas2020.Rmd +++ b/vignettes/gwas2020.Rmd @@ -19,7 +19,7 @@ There is backward compatibility built into the R packages that access the data, ### Authentication -Previously you would automatically be asked to authenticate any query to the database, through google. Now, we are making authentication voluntary - something that you do at the start of a session only if you need access to specific private datasets on the database. For the vast majority of use cases this is not required. +Previously you would automatically be asked to authenticate any query to the database, through google. Now, we are making authentication voluntary - something that you do at the start of a session only if you need access to specific private datasets on the database. For the vast majority of use cases this is not required. Another change is that the R package that managed the authentication has updated, and the file tokens generated are slightly different. For full information on how to deal with this, see here: https://mrcieu.github.io/ieugwasr/articles/guide.html#authentication @@ -35,7 +35,7 @@ Previously the data were QC'd to remove malformed results and then deposited as ### LD reference panel is now harmonised -We have updated the LD reference panel to be harmonised against human genome build 37, and as a consequence a few variants have been lost from the version that was previously used. +We have updated the LD reference panel to be harmonised against human genome build 37, and as a consequence a few variants have been lost from the version that was previously used. ### Instrument lists are up-to-date @@ -65,11 +65,11 @@ Previously we were excluding these, but they are now retained ### Multi-allelic variants are retained -Previously we were excluding these, but they are now retained. Be warned that if you extract a variant that has multiple alleles then you may get more than one row for that variant. +Previously we were excluding these, but they are now retained. Be warned that if you extract a variant that has multiple alleles then you may get more than one row for that variant. ### More data -Automated download from the EBI repository, and an automated upload system and batch data processing system means that more data can be added faster to keep the database current. +Automated download from the EBI repository, and an automated upload system and batch data processing system means that more data can be added faster to keep the database current. ### Error messages are more informative @@ -89,7 +89,7 @@ It is now possible to perform clumping, or create LD matrices, using your own lo ### Access the data directly -Previously the data was only accessible through the database. Now the data can be downloaded in "GWAS VCF" format from here https://gwas.mrcieu.ac.uk/. (IEU members can access all the data on RDSF or bluecrystal4 directly). This means that if you want to perform very large or numerous operations, you can do it on HPC or locally in a more performant manner by using the data files directly. Please see the [gwasvcf R package](https://github.com/mrcieu/gwasvcf) on how to work with these data. +Previously the data was only accessible through the database. Now the data can be downloaded in "GWAS VCF" format from here https://gwas.mrcieu.ac.uk/. (IEU members can access all the data on RDSF or bluecrystal4 directly). This means that if you want to perform very large or numerous operations, you can do it on HPC or locally in a more performant manner by using the data files directly. Please see the [gwasvcf R package](https://github.com/mrcieu/gwasvcf) on how to work with these data. ### Connect the data to different analytical tools From 73ba2c40b9650be767b2b359f9750877b9bf3880 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:22:37 +0000 Subject: [PATCH 32/40] Delete whitespace and enclose URL in <> --- vignettes/gwas2020.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/gwas2020.Rmd b/vignettes/gwas2020.Rmd index 4d9be914..364d1aab 100644 --- a/vignettes/gwas2020.Rmd +++ b/vignettes/gwas2020.Rmd @@ -53,7 +53,7 @@ We are using Elasticsearch and Neo4j on an Oracle Cloud Infrastructure to serve ### Browse available datasets online -We have a new home for the GWAS summary data: https://gwas.mrcieu.ac.uk/. +We have a new home for the GWAS summary data: . ### Chromosome and position From e26730f6431db4400e61888711dd2fa3a62d0c8b Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:30:32 +0000 Subject: [PATCH 33/40] Call functions functions --- vignettes/exposure.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/exposure.Rmd b/vignettes/exposure.Rmd index 544a7b56..c1383d06 100644 --- a/vignettes/exposure.Rmd +++ b/vignettes/exposure.Rmd @@ -149,7 +149,7 @@ bmi_exp_dat$exposure <- "BMI" ## Using an existing data frame -If the data already exists as a data frame in R then it can be converted into the correct format using the `format_data` function. For example, here is some randomly created data: +If the data already exists as a data frame in R then it can be converted into the correct format using the `format_data()` function. For example, here is some randomly created data: ```{r} random_df <- data.frame( @@ -319,7 +319,7 @@ You can provide a list of SNP IDs, the SNPs will be extracted from 1000 genomes bmi_exp_dat <- clump_data(bmi_exp_dat) ``` -The `clump_data` command takes any data frame that has been formatted to be an exposure data type of data frame. Note that for the instruments in the R/MRInstruments package the SNPs are already LD clumped. +The `clump_data()` function takes any data frame that has been formatted to be an exposure data type of data frame. Note that for the instruments in the MRInstruments package the SNPs are already LD clumped. **Note:** The LD reference panel only includes SNPs (no INDELs). There are five super-populations from which LD can be calculated, by default European samples are used. Only SNPs with MAF > 0.01 within-population are available. From f310972381254ca958d99b2f1cd4f35eead54bcf Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:32:20 +0000 Subject: [PATCH 34/40] Delete whitespace --- vignettes/outcome.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/outcome.Rmd b/vignettes/outcome.Rmd index 0644fb88..347e1520 100644 --- a/vignettes/outcome.Rmd +++ b/vignettes/outcome.Rmd @@ -24,11 +24,11 @@ knitr::opts_chunk$set( library(TwoSampleMR) ``` -Once instruments for the exposure trait have been specified, those variants need to be extracted from the outcome trait. +Once instruments for the exposure trait have been specified, those variants need to be extracted from the outcome trait. ## Available studies in IEU GWAS database -The IEU GWAS database (IGD) contains complete GWAS summary statistics from a large number of studies. You can browse them here: +The IEU GWAS database (IGD) contains complete GWAS summary statistics from a large number of studies. You can browse them here: https://gwas.mrcieu.ac.uk/ From 2dacc300f98452012e77869e25bfc7934f6493c7 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:36:38 +0000 Subject: [PATCH 35/40] Delete whitespace --- vignettes/perform_mr.Rmd | 51 ++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/vignettes/perform_mr.Rmd b/vignettes/perform_mr.Rmd index 3431e38e..601cba8a 100644 --- a/vignettes/perform_mr.Rmd +++ b/vignettes/perform_mr.Rmd @@ -63,7 +63,7 @@ mr(dat, method_list = c("mr_egger_regression", "mr_ivw")) By default, all the methods that are labelled `TRUE` in the `use_by_default` column are used by the `mr()` function. -* * * +* * * ## Sensitivity analyses @@ -123,7 +123,7 @@ res_loo <- mr_leaveoneout(dat) By default the method used is the inverse variance weighted method, but this can be changed by using the `method` argument. -* * * +* * * ## Plots @@ -217,7 +217,8 @@ p4[[1]] A 1-to-many MR analysis interrogates the effect of a single exposure on multiple outcomes or multiple exposures on a single outcome. The results of this analysis can be visualised using the 1-to-many forest plot, with or without stratification on a categorical variable. From a visual point of view, the function works best for 50 or fewer results and is not really designed to handle more than a 100 results. If your number of results is much greater than 50, it may be better to split these across two separate plots. For example, if you have 100 sets of results you could divide these equally across two plots and then combine the two plots together in another programme like Powerpoint. The function assumes the results are already in the right order for plotting. As such, users are advised to sort their results according to how they would like them to appear in the plot. Users can use their own code to do this or they can use the `sort_1_to_many()` function. -### Step 1: generate 1-to-many MR results +### Step 1: generate 1-to-many MR results + ```{r cache=FALSE, warning=FALSE, eval=FALSE} exp_dat <- extract_instruments(outcomes = c(2, 100, 1032, 104, 1, 72, 999)) table(exp_dat$exposure) @@ -265,7 +266,7 @@ forest_plot_1_to_many( ) ``` -It is also possible to add additional columns and column titles and to choose the size of the text in the columns: +It is also possible to add additional columns and column titles and to choose the size of the text in the columns: ```{r cache=FALSE, warning=FALSE, message=FALSE, eval=FALSE} res$pval<-formatC(res$pval, format = "e", digits = 2) @@ -316,9 +317,9 @@ forest_plot_1_to_many( ) ``` -#### Example 2. MR results for multiple MR methods grouped by multiple exposures +#### Example 2. MR results for multiple MR methods grouped by multiple exposures -In this next example we plot the results from an analysis of the effect of multiple exposures on coronary heart disease using multiple methods, with results grouped by exposure. We also want the result for the IVW method to be given priority and to go above the other methods. We also want the exposure with the largest IVW effect size to go the top of the plot. We also set the TraitM argument to the column describing the MR method. This is because we are grouping the results on the exposures. Normally the row labels would correspond to the exposures but in this example we want the row names to correspond to the MR method. +In this next example we plot the results from an analysis of the effect of multiple exposures on coronary heart disease using multiple methods, with results grouped by exposure. We also want the result for the IVW method to be given priority and to go above the other methods. We also want the exposure with the largest IVW effect size to go the top of the plot. We also set the TraitM argument to the column describing the MR method. This is because we are grouping the results on the exposures. Normally the row labels would correspond to the exposures but in this example we want the row names to correspond to the MR method. ```{r cache=FALSE, warning=FALSE, fig.height=10, eval=FALSE} res <- mr(dat2) @@ -353,7 +354,7 @@ forest_plot_1_to_many( #### Example 3. Stratify results on a grouping variable -In this next example we plot the same results as above but with results stratified by a grouping variable. We also select one MR method for each unique exposure-outcome combination and sort the results by decreasing effect size within each group (i.e. largest effect at the top). +In this next example we plot the same results as above but with results stratified by a grouping variable. We also select one MR method for each unique exposure-outcome combination and sort the results by decreasing effect size within each group (i.e. largest effect at the top). ```{r cache=FALSE, warning=FALSE, eval=FALSE} res <- mr(dat2) @@ -383,9 +384,9 @@ forest_plot_1_to_many( ) ``` -In the above example we made up an arbitrary grouping variable called "subcategory" with values "Group 1" and "Group 2". Typically, however, the grouping variable might correspond to something like a trait ontology (e.g. anthropometric and glycemic traits) or study design (e.g. MR and observational studies). +In the above example we made up an arbitrary grouping variable called "subcategory" with values "Group 1" and "Group 2". Typically, however, the grouping variable might correspond to something like a trait ontology (e.g. anthropometric and glycemic traits) or study design (e.g. MR and observational studies). -#### Example 4. Effect of BMI on 103 diseases +#### Example 4. Effect of BMI on 103 diseases The plot function works best with 50 or fewer rows and is not really designed to handle more than a 100. Visualising a single-column forest plot with 100 results is also quite difficult. If your number of results is much greater than 50, it is advisable to split the results across two different plots. In the example below we select BMI as the exposure and test this against 103 diseases in the IEU GWAS database: @@ -456,7 +457,7 @@ plot1 dev.off() ``` -* * * +* * * ## MR.RAPS: Many weak instruments analysis @@ -481,7 +482,7 @@ res <- ## Reports -A report can be generated that performs all MR analyses, sensitivity analyses, and plots, and presents them in a single self-contained html web page, word document, or pdf document. +A report can be generated that performs all MR analyses, sensitivity analyses, and plots, and presents them in a single self-contained html web page, word document, or pdf document. ```{r eval=FALSE} mr_report(dat) @@ -510,7 +511,7 @@ knitr::kable(out) It calculates the variance explained in the exposure and the outcome by the instrumenting SNPs, and tests if the variance in the outcome is less than the exposure. -This test is, like many others, liable to give inaccurate causal directions under some measurement error parameters in the exposure and the outcome (e.g. if the outcome has much lower measurement precision then its proportion of variance explained will be underestimated). Sensitivity can be applied to evaluate the extent to which the inferred causal direction is liable to measurement error, in two ways. +This test is, like many others, liable to give inaccurate causal directions under some measurement error parameters in the exposure and the outcome (e.g. if the outcome has much lower measurement precision then its proportion of variance explained will be underestimated). Sensitivity can be applied to evaluate the extent to which the inferred causal direction is liable to measurement error, in two ways. 1. Provide estimates of measurement error for the exposure and the outcome, and obtain an adjusted estimate of the causal direction 2. For all possible values of measurement error, identify the proportion of the parameter space which supports the inferred causal direction @@ -549,7 +550,7 @@ id_exposure <- c("ieu-a-299", "ieu-a-300", "ieu-a-302") id_outcome <- "ieu-a-7" ``` -First obtain the instruments for each lipid fraction. This entails obtaining a combined set of SNPs including all instruments, and getting those SNPs for each lipid fraction. Therefore, if there are e.g. 20 instruments for each of 3 lipid fractions, but combined there are 30 unique SNPs, then we need to extract each of the 30 SNPs from each lipid fraction (exposure). +First obtain the instruments for each lipid fraction. This entails obtaining a combined set of SNPs including all instruments, and getting those SNPs for each lipid fraction. Therefore, if there are e.g. 20 instruments for each of 3 lipid fractions, but combined there are 30 unique SNPs, then we need to extract each of the 30 SNPs from each lipid fraction (exposure). ```{r eval=FALSE} exposure_dat <- mv_extract_exposures(id_exposure) @@ -587,9 +588,9 @@ With these three different parameters there are eight different ways to do MV an ### Note about visualisation -Plots can be generated using the `plots = TRUE` argument for `mv_multiple()` and `mv_residual()`. +Plots can be generated using the `plots = TRUE` argument for `mv_multiple()` and `mv_residual()`. -The current plots being generated are not necessarily adequate because while they show the slope through the raw points, they do not demonstrate that the raw points might be effectively different between plots because they are conditional on the other exposures. +The current plots being generated are not necessarily adequate because while they show the slope through the raw points, they do not demonstrate that the raw points might be effectively different between plots because they are conditional on the other exposures. ### Using your own summary data @@ -642,7 +643,7 @@ MendelianRandomization::mr_ivw(dat2[[1]], correl = TRUE) We recently developed MR-MoE, a method to choose the most appropriate amongst several MR tests using a machine learning algorithm. Note that the method is still under review, but full details are described here: . -MR-MoE operates by taking a set of harmonised data, inferring some characteristics about the dataset, and using those characteristics to predict how well each of the different MR methods will perform on the dataset, in terms of maximising power while minimising false discovery rates. +MR-MoE operates by taking a set of harmonised data, inferring some characteristics about the dataset, and using those characteristics to predict how well each of the different MR methods will perform on the dataset, in terms of maximising power while minimising false discovery rates. In order to run the analysis you must download an RData object that contains the trained random forests that are used to predict the efficacy of each method. This can be downloaded from here: @@ -692,13 +693,13 @@ Looking at the `estimates`, we see that there is a column called `MOE` which is * * * -## Post MR results management +## Post MR results management The TwoSampleMR package also provides the following functions for managing or editing MR results. ### Split outcome names -The outcome column in the output of mr() combines the original outcome name with the outcome trait ID. +The outcome column in the output of mr() combines the original outcome name with the outcome trait ID. ```{r} head(res) @@ -713,27 +714,27 @@ split_outcome(res) ### Split exposure names -Similarly to the outcome column, the exposure column in the output of `mr()` combines the original exposure name with the exposure trait ID. This can be split into separate columns for the id and exposure name using the split_exposure function. +Similarly to the outcome column, the exposure column in the output of `mr()` combines the original exposure name with the exposure trait ID. This can be split into separate columns for the id and exposure name using the split_exposure function. ### Generate odds ratios with 95% confidence intervals -Users can convert log odds ratios into odds ratios with 95% confidence intervals using: +Users can convert log odds ratios into odds ratios with 95% confidence intervals using: ```{r} generate_odds_ratios(res) ``` -### Subset on method +### Subset on method -It is sometimes useful to subset results on MR method, so that there is one unique result for each exposure-outcome combination: +It is sometimes useful to subset results on MR method, so that there is one unique result for each exposure-outcome combination: ```{r} subset_on_method(res) ``` -The default is to subset on the IVW method when >1 SNP is available and to use the Wald ratio method when a single SNP is available. Users can specify which multi-SNP method to subset on. +The default is to subset on the IVW method when >1 SNP is available and to use the Wald ratio method when a single SNP is available. Users can specify which multi-SNP method to subset on. -### Combine all results +### Combine all results It is often useful to combine all results and study level characterists into a single dataframe or table, e.g. for sharing results with collaborators or when the user wishes to present all results in a single table or figure. This can be done using the `combine_all_mrresults()` function: @@ -775,6 +776,6 @@ head(all_res[, c( )]) ``` -This combines all results from `mr()`, `mr_heterogeneity()`, `mr_pleiotropy_test()` and `mr_singlesnp()` into a single dataframe. It also merges the results with outcome study level characteristics from the `available_outcomes()` function, including sample size characteristics. If requested, it also exponentiates results (e.g. if the user wants log odds ratio converted into odds ratios with 95 percent confidence intervals). +This combines all results from `mr()`, `mr_heterogeneity()`, `mr_pleiotropy_test()` and `mr_singlesnp()` into a single dataframe. It also merges the results with outcome study level characteristics from the `available_outcomes()` function, including sample size characteristics. If requested, it also exponentiates results (e.g. if the user wants log odds ratio converted into odds ratios with 95 percent confidence intervals). ## References From dfd8abb0f5483e1997c27e9738c38d09dabe4a8a Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:37:02 +0000 Subject: [PATCH 36/40] Remove inline html
from markdown syntax --- vignettes/perform_mr.Rmd | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vignettes/perform_mr.Rmd b/vignettes/perform_mr.Rmd index 601cba8a..a75c47fa 100644 --- a/vignettes/perform_mr.Rmd +++ b/vignettes/perform_mr.Rmd @@ -498,9 +498,7 @@ This function will create a separate report file for every exposure-outcome comb This is an implementation of the method described here: -[Hemani G, Tilling K, Davey Smith G.
-**Orienting the causal relationship between imprecisely measured traits using GWAS summary data.**
-PLoS Genetics. 2017. 13(11): e1007081.](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1007081) +[Hemani G, Tilling K, Davey Smith G. **Orienting the causal relationship between imprecisely measured traits using GWAS summary data.** PLoS Genetics. 2017. 13(11): e1007081.](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1007081) In MR it is assumed that the instruments influence the exposure first and then the outcome through the exposure. But sometimes this is difficult to evaluate, for example is a cis-acting SNP influencing gene expression levels or DNA methylation levels first? The causal direction between the hypothesised exposure and outcomes can be tested using the Steiger test [@hemani-plosgen-2017]. For example: From 68a98df9b16280622c535f670503205177aee0dc Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:37:58 +0000 Subject: [PATCH 37/40] Delete whitespace --- vignettes/harmonise.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/harmonise.Rmd b/vignettes/harmonise.Rmd index 2efc52da..69b81d90 100644 --- a/vignettes/harmonise.Rmd +++ b/vignettes/harmonise.Rmd @@ -34,7 +34,7 @@ bmi_exp_dat <- extract_instruments(outcomes = 'ieu-a-2') chd_out_dat <- extract_outcome_data(snps = bmi_exp_dat$SNP, outcomes = 'ieu-a-7') ``` -but it is important to harmonise the effects. This means that the effect of a SNP on the exposure and the effect of that SNP on the outcome must each correspond to the same allele. +but it is important to harmonise the effects. This means that the effect of a SNP on the exposure and the effect of that SNP on the outcome must each correspond to the same allele. **Note:** The IEU GWAS database contains data that is already harmonised, meaning that the non-effect allele is aligned to the human genome reference sequence (build 37). It's still recommended to harmonise, but in principle everything should be on the forward strand and effect alleles always relating to the same allele. Some discrepancies could arise if there are multi-allelic variants that are represented as different bi-allelic variants in different studies. @@ -47,7 +47,7 @@ dat <- harmonise_data( ) ``` -This creates a new data frame that has the exposure data and outcome data combined. +This creates a new data frame that has the exposure data and outcome data combined. If there were 3 exposure traits and 3 outcome traits then there will be 9 sets of harmonisations being performed - harmonising the SNP effects of exposure trait 1 against outcome trait 1; exposure trait 1 against outcome trait 2; and so on. @@ -133,7 +133,7 @@ This is similar to the above, except the allele frequency no longer gives us inf ### Options -There are three options to harmonising the data. +There are three options to harmonising the data. 1. Assume all alleles are presented on the forward strand 2. Try to infer the forward strand alleles using allele frequency information @@ -156,10 +156,10 @@ There are therefore multiple potential combinations of body mass index and coron dat <- power_prune(dat, method = 1, dist.outcome = "binary") ``` -This drops the duplicate exposure-outcome sets with the smaller outcome sample size (number of cases for binary outcomes). Remaining duplicates are then dropped on the basis of the exposure sample size. However, if there are a large number of SNPs available to instrument an exposure, the outcome GWAS with the better SNP coverage may provide better power than the outcome GWAS with the larger sample size. This can occur, for example, if the larger outcome GWAS has used a targeted genotyping array. In such instances, it may be better to prune studies on the basis of instrument strength (i.e. variation in exposure explained by the instrumental SNPs) as well as sample size. This can be done by setting the method argument to 2: +This drops the duplicate exposure-outcome sets with the smaller outcome sample size (number of cases for binary outcomes). Remaining duplicates are then dropped on the basis of the exposure sample size. However, if there are a large number of SNPs available to instrument an exposure, the outcome GWAS with the better SNP coverage may provide better power than the outcome GWAS with the larger sample size. This can occur, for example, if the larger outcome GWAS has used a targeted genotyping array. In such instances, it may be better to prune studies on the basis of instrument strength (i.e. variation in exposure explained by the instrumental SNPs) as well as sample size. This can be done by setting the method argument to 2: ```{r eval=FALSE} dat <- power_prune(dat, method = 2, dist.outcome = "binary") ``` -This procedure drops duplicate exposure-outcome sets on the basis of instrument strength and sample size, and assumes that the SNP-exposure effects correspond to a continuous trait with a normal distribution (i.e. exposure should not be binary). The SNP-outcome effects can correspond to either a binary or continuous trait (default behaviour is to assume a binary distribution). If the exposure is binary then method 1 should be used. +This procedure drops duplicate exposure-outcome sets on the basis of instrument strength and sample size, and assumes that the SNP-exposure effects correspond to a continuous trait with a normal distribution (i.e. exposure should not be binary). The SNP-outcome effects can correspond to either a binary or continuous trait (default behaviour is to assume a binary distribution). If the exposure is binary then method 1 should be used. From 84bfeed001c8ec83b2cfcbe0a1d5fb2efc61436e Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:38:31 +0000 Subject: [PATCH 38/40] Delete whitespace --- vignettes/introduction.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 6792849f..44bd37b9 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -22,7 +22,7 @@ knitr::opts_chunk$set( ## Background -Two sample Mendelian randomisation (2SMR) is a method to estimate the causal effect of an exposure on an outcome using only summary statistics from genome wide association studies (GWAS). Though conceptually straightforward, there are a number of steps that are required to perform the analysis properly, and they can be cumbersome. The TwoSampleMR package aims to make this easy by combining three important components +Two sample Mendelian randomisation (2SMR) is a method to estimate the causal effect of an exposure on an outcome using only summary statistics from genome wide association studies (GWAS). Though conceptually straightforward, there are a number of steps that are required to perform the analysis properly, and they can be cumbersome. The TwoSampleMR package aims to make this easy by combining three important components - data management and harmonisation - the statistical routines to estimate the causal effects @@ -32,7 +32,7 @@ The general principles [@DaveySmith2003; @DaveySmithHemani2014], and statistical This package uses the [ieugwasr](https://github.com/mrcieu/ieugwasr) package to connect to the database of thousands of complete GWAS summary data. -* * * +* * * ## Installation @@ -45,7 +45,7 @@ install_github("MRCIEU/TwoSampleMR") If you don't have the `remotes` package install it from CRAN using `install.packages("remotes")`. -* * * +* * * ## Overview From b135695c4f5a49a338e88cda85e5dd7b4038b33d Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:48:04 +0000 Subject: [PATCH 39/40] Delete TwoSampleMR bullet TwoSampleMR version 0.5.0 was been released --- vignettes/gwas2020.Rmd | 1 - 1 file changed, 1 deletion(-) diff --git a/vignettes/gwas2020.Rmd b/vignettes/gwas2020.Rmd index 364d1aab..3661cc25 100644 --- a/vignettes/gwas2020.Rmd +++ b/vignettes/gwas2020.Rmd @@ -99,7 +99,6 @@ Either the data in the database, or the GWAS VCF files, can be queried and the r - The IEU GWAS database: https://gwas.mrcieu.ac.uk - API to the IEU GWAS database: https://gwas-api.mrcieu.ac.uk -- Updated TwoSampleMR package branch (not merged to master yet): https://github.com/MRCIEU/TwoSampleMR/tree/ieugwasr - ieugwasr package, for R access to the API: https://mrcieu.github.io/ieugwasr/ - ieugwaspy package, for python access to the API: https://github.com/MRCIEU/ieugwaspy/ (Under construction) - gwasvcf package, R interface to GWAS VCF files: https://mrcieu.github.io/gwasvcf/ From 4a7ba3b9b5455444f051dfe43be49b9ddda77754 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 1 Feb 2024 10:49:04 +0000 Subject: [PATCH 40/40] Add release date --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 766a4d09..dbf52522 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # TwoSampleMR v0.5.9 +(Release date: 2024-02-01) + * Fixed a minor issue in `dat_to_RadialMR()` * Minor improvements to `make_dat()` default arguments and helpfile * Minor improvements to package tests