From f175f79014795e51eb3215b1bd2992fb7078dbdc Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 11:04:36 +0200 Subject: [PATCH 01/11] respect relative or absolute paths in filetable --- NAMESPACE | 4 +++- R/msmsRead.R | 14 +++++++++++--- man/msmsWorkflow.Rd | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 551cf02..7ba4fb7 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -145,6 +145,8 @@ exportMethods(show) exportMethods(spectraCount) exportMethods(toMassbank) import(MSnbase) +import(R.utils) +import(RCurl) import(Rcpp) import(S4Vectors) import(XML) @@ -159,4 +161,4 @@ import(yaml) importFrom(Biobase,"classVersion<-") importFrom(Biobase,classVersion) importFrom(Biobase,isCurrent) -importFrom(Biobase,isVersioned) \ No newline at end of file +importFrom(Biobase,isVersioned) diff --git a/R/msmsRead.R b/R/msmsRead.R index 8e3f93a..8173f68 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -1,3 +1,6 @@ +#' @import R.utils +NULL + #' #' Extracts and processes spectra from a specified file list, according to #' loaded options and given parameters. @@ -68,9 +71,14 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } else{ ##If a filetable is supplied read it tab <- read.csv(filetable, stringsAsFactors = FALSE) - # Assuming that filetable contains paths - # relative to its own location - tab[,"Files"] <- paste(dirname(filetable), tab[,"Files"], sep="/") + # Check if we have absolute or relative paths. + # If relative, they are assumed to be relative to the filetable path + + tab[,"Files"] <- ifelse( + isAbsolutePath(tab[,"Files"]), + tab[,"Files"], + paste(dirname(filetable), tab[,"Files"], sep="/") + ) w@files <- tab[,"Files"] cpdids <- tab[,"ID"] } diff --git a/man/msmsWorkflow.Rd b/man/msmsWorkflow.Rd index 5ab97e7..8f20870 100755 --- a/man/msmsWorkflow.Rd +++ b/man/msmsWorkflow.Rd @@ -13,6 +13,7 @@ msmsWorkflow( useRtLimit = TRUE, archivename = NA, readMethod = "mzR", + filetable = NULL, findPeaksArgs = NULL, plots = FALSE, precursorscan.cf = FALSE, From 5eacf0b39d155fdbc082059979a18863bfa0779e Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 12:45:39 +0200 Subject: [PATCH 02/11] added R.utils import --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a7dabb9..eb1313e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,7 @@ Encoding: UTF-8 Imports: XML,rjson,S4Vectors,digest, rcdk,yaml,mzR,methods,Biobase,MSnbase,httr, - enviPat,assertthat + enviPat,assertthat,R.utils Suggests: BiocStyle,gplots,RMassBankData, xcms (>= 1.37.1), From 72a735ab844793eb7273c3a1b05b67d3c815babb Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 12:47:02 +0200 Subject: [PATCH 03/11] Processing mode per-entry from mode argument or filetable --- R/msmsRead.R | 40 +++++++++++++++++++++++++++------------- man/msmsRead.Rd | 5 ++++- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/R/msmsRead.R b/R/msmsRead.R index 8173f68..a901318 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -27,6 +27,9 @@ NULL #' just requires a CSV with two columns and the column header "mz", "int". #' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions #' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). +#' For `readMethod == "mzR"`, a vector of `mode` entries is supported. The user +#' should check that they are either all positive or negative. If this isn't the case, +#' the recalibration will be incorrect. #' @param confirmMode Defaults to false (use most intense precursor). Value 1 uses #' the 2nd-most intense precursor for a chosen ion (and its data-dependent scans) #' , etc. @@ -44,18 +47,18 @@ NULL #' @author Erik Mueller, UFZ #' @export msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, - readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, + readMethod, mode = NULL, confirmMode = FALSE, useRtLimit = TRUE, Args = NULL, settings = getOption("RMassBank"), progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){ .checkMbSettings() - ##Read the files and cpdids according to the definition - ##All cases are silently accepted, as long as they can be handled according to one definition - if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + if(is.null(filetable)){ ##If no filetable is supplied, filenames must be named explicitly if(is.null(files)) stop("Please supply the files") + if(is.null(mode)) + stop("Please supply the mode(s)") ##Assign the filenames to the workspace w@files <- unlist(files) @@ -81,7 +84,12 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, ) w@files <- tab[,"Files"] cpdids <- tab[,"ID"] + mode <- tab[,"mode"] } + + ##Read the files and cpdids according to the definition + ##All cases are silently accepted, as long as they can be handled according to one definition + if(!all(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) ##If there's more cpdids than filenames or the other way around, then abort if(length(w@files) != length(cpdids)){ @@ -101,7 +109,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, # if(length(na.ids)){ # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list") # } - + ##This should work if(readMethod == "minimal"){ ##Edit options @@ -115,23 +123,29 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, } if(readMethod == "mzR"){ + + # To do: check if we can use this verbatim in xcms method too + mode_ <- mode + if(length(mode) == 1) + mode_ <- rep(mode, length(w@files)) + if(length(mode) != length(w@files)) + stop("Supply either one mode or a vector for one mode per file") + ##Progressbar nLen <- length(w@files) nProg <- 0 pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) - count <- 1 - envir <- environment() - w@spectra <- as(lapply(w@files, function(fileName) { + w@spectra <- as(lapply(seq_along(w@files), function(i) { + fileName <- w@files[i] # Find compound ID - cpdID <- cpdids[count] - # Set counter up - envir$count <- envir$count + 1 - + cpdID <- cpdids[i] + + # Retrieve spectrum data spec <- findMsMsHR(fileName = fileName, - cpdID = cpdID, mode = mode, confirmMode = confirmMode, useRtLimit = useRtLimit, + cpdID = cpdID, mode = mode_[i], confirmMode = confirmMode, useRtLimit = useRtLimit, ppmFine = settings$findMsMsRawSettings$ppmFine, mzCoarse = settings$findMsMsRawSettings$mzCoarse, fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan, diff --git a/man/msmsRead.Rd b/man/msmsRead.Rd index 337031a..f287fda 100644 --- a/man/msmsRead.Rd +++ b/man/msmsRead.Rd @@ -43,7 +43,10 @@ so that e.g. a recalibration can be performed, and "peaklist" just requires a CSV with two columns and the column header "mz", "int".} \item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).} +([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). +For `readMethod == "mzR"`, a vector of `mode` entries is supported. The user +should check that they are either all positive or negative. If this isn't the case, +the recalibration will be incorrect.} \item{confirmMode}{Defaults to false (use most intense precursor). Value 1 uses the 2nd-most intense precursor for a chosen ion (and its data-dependent scans) From 3b2a0c9e8edb64af2cda1a24b9ee119ab337f79a Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 12:47:20 +0200 Subject: [PATCH 04/11] removed outdated .polarity --- R/RmbSpectrum2Update.R | 4 ++-- R/formulaCalculator.R | 9 --------- R/leCsvAccess.R | 9 +++++++++ R/leMsmsRaw.R | 12 ++++++------ 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/RmbSpectrum2Update.R b/R/RmbSpectrum2Update.R index 2cd7eda..17e4459 100644 --- a/R/RmbSpectrum2Update.R +++ b/R/RmbSpectrum2Update.R @@ -83,10 +83,10 @@ .updateObject.RmbSpectraSet.updatePolarity <- function(w) { - w@parent@polarity <- .polarity[[w@mode]] + w@parent@polarity <- getAdductPolarity(w@mode) for(n in seq_len(length(w@children))) { - w@children[[n]]@polarity <- .polarity[[w@mode]] + w@children[[n]]@polarity <- getAdductPolarity(w@mode) } w } diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R index e45db84..4125175 100755 --- a/R/formulaCalculator.R +++ b/R/formulaCalculator.R @@ -390,12 +390,3 @@ split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE) "pM" = "+", "mM" = "-", "pNH4" = "+") - -.polarity <- list( - "pH" = as.integer(1), - "pNa" = as.integer(1), - "mH" = as.integer(0), - "mFA" = as.integer(0), - "pM" = as.integer(1), - "mM" = as.integer(0), - "pNH4" = as.integer(1)) diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 1b00cfc..4f524e4 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -356,6 +356,13 @@ getMonoisotopicMass <- function(formula){ } return(monoisotopicMass) } + +getAdductPolarity <- function(mode) { + df <- getAdductInformation("") + charge <- df[df$mode == mode,charge] + ifelse(charge > 0, 1, 0) +} + getAdductInformation <- function(formula){ adductDf <- as.data.frame(rbind( @@ -487,6 +494,8 @@ getAdductInformation <- function(formula){ ), stringsAsFactors = F) adductDf$charge <- as.integer(adductDf$charge) + + if(any(any(duplicated(adductDf$mode)), any(duplicated(adductDf$adductString)))) stop("Invalid adduct table") return(adductDf) diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R index 9ccc51d..17a9fe9 100644 --- a/R/leMsmsRaw.R +++ b/R/leMsmsRaw.R @@ -123,7 +123,7 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo enforcePolarity <- FALSE if(enforcePolarity) - polarity <- .polarity[[mode]] + polarity <- getAdductPolarity(mode) else polarity <- NA # access data directly for finding the MS/MS data. This is done using @@ -167,10 +167,10 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo # Overwrite the polarity with a value we generate, so it's consistent. # Some mzML files give only -1 as a result for polarity, which is useless for us - sp@parent@polarity <- .polarity[[sp@mode]] + sp@parent@polarity <- getAdductPolarity(sp@mode) for(n in seq_len(length(sp@children))) { - sp@children[[n]]@polarity <- .polarity[[sp@mode]] + sp@children[[n]]@polarity <- getAdductPolarity(sp@mode) } # If we had to open the file, we have to close it again @@ -493,7 +493,7 @@ findMsMsHRperxcms <- function(fileName, cpdID, mode="pH", findPeaksArgs = NULL, sp@name <- findName(cpdID[i]) sp@formula <- findFormula(cpdID[i]) sp@mode <- mode - sp@polarity <- .polarity[[sp@mode]] + sp@polarity <- getAdductPolarity(sp@mode) return(sp) }) return(P) @@ -992,7 +992,7 @@ findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA, headerCache = NULL, f if(!is.na(polarity)) { if(is.character(polarity)) - polarity <- .polarity[[polarity]] + polarity <- getAdductPolarity(polarity) headerMS1 <- headerMS1[headerMS1$polarity == polarity,] } @@ -1108,7 +1108,7 @@ toRMB <- function(msmsXCMSspecs = NA, cpdID = NA, mode="pH", MS1spec = NA){ precursorIntensity = ifelse(test = "into_parent" %in% colnames(spec), yes = spec[,"into_parent"], no = 0), precursorCharge = as.integer(1), collisionEnergy = 0, - polarity = .polarity[[mode]], + polarity = getAdductPolarity(mode), tic = 0, peaksCount = nrow(spec), rt = median(spec[,"rt"]), From bdf618d3ebf4f3c91b2ea59c79799d2d1d24ccf4 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 13:41:52 +0200 Subject: [PATCH 05/11] removed old adduct type definitions, now using adduct table --- R/buildRecord.R | 5 +++-- R/formulaCalculator.R | 25 ------------------------- R/leCsvAccess.R | 8 +++++++- R/parseMbRecord.R | 4 ++-- 4 files changed, 12 insertions(+), 30 deletions(-) diff --git a/R/buildRecord.R b/R/buildRecord.R index b940950..0ebe08d 100644 --- a/R/buildRecord.R +++ b/R/buildRecord.R @@ -153,7 +153,7 @@ getAnalyticalInfo <- function(cpd = NULL) ai <- list() # define positive or negative, based on processing mode. if(!is.null(cpd)) - mode <- .ionModes[[cpd@mode]] + mode <- getIonMode(cpd@mode) # again, these constants are read from the options: ai[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument @@ -257,9 +257,10 @@ setMethod("buildRecord", "RmbSpectrum2", function(o, ..., cpd = NULL, mbdata = l ms_fi <- list() if(!is.null(cpd)) { + adductInfo <- getAdductInformation("") ms_fi[['BASE_PEAK']] <- round(mz(cpd@parent)[which.max(intensity(cpd@parent))],4) ms_fi[['PRECURSOR_M/Z']] <- round(cpd@mz,4) - ms_fi[['PRECURSOR_TYPE']] <- .precursorTypes[cpd@mode] + ms_fi[['PRECURSOR_TYPE']] <- adductInfo[adductInfo$mode == cpd@mode, "adductString"] if(all(!is.na(spectrum@precursorIntensity), spectrum@precursorIntensity != 0, diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R index 4125175..caf4f62 100755 --- a/R/formulaCalculator.R +++ b/R/formulaCalculator.R @@ -364,29 +364,4 @@ split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE) return(list(pos=pos, neg=neg)) } -.precursorTypes <- list( - "pH" = "[M+H]+", - "pNa" = "[M+Na]+", - "mH" = "[M-H]-", - "mFA" = "[M+HCOO-]-", - "pM" = "[M]+", - "mM" = "[M]-", - "pNH4" = "[M+NH4]+") -.ionModes <- list( - "pH" = "POSITIVE", - "pNa" = "POSITIVE", - "mH" = "NEGATIVE", - "mFA" = "NEGATIVE", - "pM" = "POSITIVE", - "mM" = "NEGATIVE", - "pNH4" = "POSITIVE") - -.formulaTag <- list( - "pH" = "+", - "pNa" = "+", - "mH" = "-", - "mFA" = "-", - "pM" = "+", - "mM" = "-", - "pNH4" = "+") diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 5f66db6..35389fe 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -359,10 +359,16 @@ getMonoisotopicMass <- function(formula){ getAdductPolarity <- function(mode) { df <- getAdductInformation("") - charge <- df[df$mode == mode,charge] + charge <- df[df$mode == mode,"charge"] ifelse(charge > 0, 1, 0) } +getIonMode <- function(mode) { + df <- getAdductInformation("") + charge <- df[df$mode == mode,"charge"] + ifelse(charge > 0, "POSITIVE", "NEGATIVE") +} + getAdductInformation <- function(formula){ adductDf <- as.data.frame(rbind( diff --git a/R/parseMbRecord.R b/R/parseMbRecord.R index 95d0aa1..21d1bf8 100644 --- a/R/parseMbRecord.R +++ b/R/parseMbRecord.R @@ -255,8 +255,8 @@ parseMbRecords <- function(files) # Select one spectrum to get compound data from: sp <- sps[[1]] cpd@mz <- as.numeric(sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_M/Z']]) - cpd@mode <- names(RMassBank:::.precursorTypes)[which(RMassBank:::.precursorTypes == - sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_TYPE']])] + adductInfo <- getAdductInformation() + cpd@mode <- adductInfo[adductInfo$adductString == sp@info[["MS$FOCUSED_ION"]][['PRECURSOR_TYPE']], "mode"] cpd@name <- sp@info[["CH$NAME"]][[1]] cpd@formula <- sp@info[['CH$FORMULA']] cpd@smiles <- sp@info[['CH$SMILES']] From 7432337626b618e1d4cd9a405c382972d68cb4f1 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 14:35:14 +0200 Subject: [PATCH 06/11] Fixed charge to integer type --- R/leCsvAccess.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index 35389fe..cbf3a26 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -360,7 +360,7 @@ getMonoisotopicMass <- function(formula){ getAdductPolarity <- function(mode) { df <- getAdductInformation("") charge <- df[df$mode == mode,"charge"] - ifelse(charge > 0, 1, 0) + ifelse(charge > 0, 1L, 0L) } getIonMode <- function(mode) { From 9b70a61e4c82ce3ca6549ea5c283dc404d9e1afb Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 14:35:41 +0200 Subject: [PATCH 07/11] Removed unnecessary mode argument in workflow steps --- R/leMsMs.r | 66 +++++++++++++++++------------------ man/makeRecalibration.Rd | 3 -- man/msmsRead.Rd | 2 +- man/reanalyzeFailpeaks.Rd | 4 +-- man/recalibrate.addMS1data.Rd | 3 -- 5 files changed, 35 insertions(+), 43 deletions(-) diff --git a/R/leMsMs.r b/R/leMsMs.r index ec471e7..f67acc7 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -79,7 +79,12 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec progressbar = "progressBarHook", MSe = FALSE) { .checkMbSettings() - if(!any(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + + if(!is.na(mode)) + if(!all(mode %in% knownAdducts())) stop(paste("The ionization mode", mode, "is unknown.")) + if(is.na(mode) && (1 %in% steps) && is.null(filetable)) + stop("If step 1 (reading) is included, mode must be specified either as argument or in the filetable.") + if(!is.na(archivename)) w@archivename <- archivename @@ -146,7 +151,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { - s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="preliminary", + s <- analyzeMsMs(msmsPeaks = spec, mode=spec@mode, detail=TRUE, run="preliminary", filterSettings = settings$filterSettings, spectraList = settings$spectraList, method = analyzeMethod) # Progress: @@ -195,7 +200,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec if(newRecalibration) { # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra - recal <- makeRecalibration(w, mode, + recal <- makeRecalibration(w, recalibrateBy = settings$recalibrateBy, recalibrateMS1 = settings$recalibrateMS1, recalibrator = settings$recalibrator, @@ -205,7 +210,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec } w@parent <- w w@aggregated <- data.frame() - spectra <- recalibrateSpectra(mode, w@spectra, w = w, + spectra <- recalibrateSpectra(w@spectra, w = w, recalibrateBy = settings$recalibrateBy, recalibrateMS1 = settings$recalibrateMS1) w@spectra <- spectra @@ -218,21 +223,21 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen)) w@spectra <- as(lapply(w@spectra, function(spec) { - #print(spec$id) - if(findLevel(spec@id,TRUE) == "unknown"){ - analyzeMethod <- "intensity" - } else { - analyzeMethod <- "formula" - } - s <- analyzeMsMs(msmsPeaks = spec, mode=mode, detail=TRUE, run="recalibrated", - filterSettings = settings$filterSettings, - spectraList = settings$spectraList, method = analyzeMethod) - # Progress: - nProg <<- nProg + 1 - pb <- do.call(progressbar, list(object=pb, value= nProg)) - - return(s) - }), "SimpleList") + #print(spec$id) + if(findLevel(spec@id,TRUE) == "unknown"){ + analyzeMethod <- "intensity" + } else { + analyzeMethod <- "formula" + } + s <- analyzeMsMs(msmsPeaks = spec, mode=spec@mode, detail=TRUE, run="recalibrated", + filterSettings = settings$filterSettings, + spectraList = settings$spectraList, method = analyzeMethod) + # Progress: + nProg <<- nProg + 1 + pb <- do.call(progressbar, list(object=pb, value= nProg)) + + return(s) + }), "SimpleList") ## for(f in w@files) ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f)) suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE))) @@ -268,7 +273,7 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec { message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O") w <- reanalyzeFailpeaks( - w, custom_additions="N2O", mode=mode, + w, custom_additions="N2O", filterSettings=settings$filterSettings, progressbar=progressbar) if(!is.na(archivename)) @@ -1406,8 +1411,6 @@ processProblematicPeaks <- function(w, archivename = NA) #' the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}). #' @param spectrum For \code{recalibrateSingleSpec}: #' a \code{MSnbase} \code{Spectrum}-derived object, commonly a \code{RmbSpectrum2} for MS2 or \code{Spectrum1} for MS1. -#' @param mode \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions -#' ([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-). #' @param rawspec For \code{recalibrateSpectra}:an \code{RmbSpectraSetList} of \code{RmbSpectraSet} objects #' , as the \code{w@@spectra} slot from \code{msmsWorkspace} or any object returned by \code{\link{findMsMsHR}}. #' If empty, no spectra are recalibrated, but the recalibration curve is @@ -1436,7 +1439,7 @@ processProblematicPeaks <- function(w, archivename = NA) #' #' @author Michael Stravs, Eawag #' @export -makeRecalibration <- function(w, mode, +makeRecalibration <- function(w, recalibrateBy = getOption("RMassBank")$recalibrateBy, recalibrateMS1 = getOption("RMassBank")$recalibrateMS1, recalibrator = getOption("RMassBank")$recalibrator, @@ -1458,7 +1461,7 @@ makeRecalibration <- function(w, mode, if(nrow(rcdata) == 0) stop("No peaks matched to generate recalibration curve.") - ms1data <- recalibrate.addMS1data(w@spectra, mode, recalibrateMS1Window) + ms1data <- recalibrate.addMS1data(w@spectra, recalibrateMS1Window) ms1data <- ms1data[,c("mzFound", "dppm", "mzCalc")] if (recalibrateMS1 != "none") { @@ -1582,7 +1585,7 @@ plotRecalibration.direct <- function(rcdata, rc, rc.ms1, title, mzrange, #' @export -recalibrateSpectra <- function(mode, rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL, +recalibrateSpectra <- function(rawspec = NULL, rc = NULL, rc.ms1=NULL, w = NULL, recalibrateBy = getOption("RMassBank")$recalibrateBy, recalibrateMS1 = getOption("RMassBank")$recalibrateMS1) { @@ -1740,7 +1743,6 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' filterSettings = getOption("RMassBank")$filterSettings) #' @param aggregated A peake aggregate table (\code{w@@aggregate}) (after processing electronic noise removal!) #' @param custom_additions The allowed additions, e.g. "N2O". -#' @param mode Processing mode (\code{"pH", "pNa", "mH"} etc.) #' @param mass (Usually recalibrated) m/z value of the peak. #' @param cpdID Compound ID of this spectrum. #' @param counter Current peak index (used exclusively for the progress @@ -1765,7 +1767,7 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' \dontrun{ #' reanalyzedRcSpecs <- reanalyzeFailpeaks(w@@aggregated, custom_additions="N2O", mode="pH") #' # A single peak: -#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH") +#' reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1) #' } #' #' @@ -1775,7 +1777,7 @@ filterPeakSatellites <- function(peaks, filterSettings = getOption("RMassBank")$ #' #' #' @export -reanalyzeFailpeaks <- function(w, custom_additions, mode, filterSettings = +reanalyzeFailpeaks <- function(w, custom_additions, filterSettings = getOption("RMassBank")$filterSettings, progressbar = "progressBarHook") { nProg <- 0 @@ -1800,7 +1802,7 @@ reanalyzeFailpeaks <- function(w, custom_additions, mode, filterSettings = fp <- fp[!duplicated(fp$mz),,drop=FALSE] peaks.rean <- lapply(fp$mz, reanalyzeFailpeak, custom_additions = custom_additions, cpdID = sp@id, - mode = mode, filterSettings = filterSettings) + mode = sp@mode, filterSettings = filterSettings) matched <- (unlist(lapply(peaks.rean, nrow))) > 0 df.rean <- do.call(rbind, peaks.rean[matched]) @@ -2140,8 +2142,6 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, #' @usage recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = #' getOption("RMassBank")$recalibrateMS1Window) #' @param spec A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed". -#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -#' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-). #' @param recalibrateMS1Window Window width to look for MS1 peaks to recalibrate (in ppm). #' @return A dataframe with columns \code{mzFound, formula, mzCalc, dppm, dbe, int, #' dppmBest, formulaCount, good, cpdID, scan, parentScan, dppmRc}. However, @@ -2157,7 +2157,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, #' } #' @author Michael Stravs, EAWAG #' @export -recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = +recalibrate.addMS1data <- function(spec, recalibrateMS1Window = getOption("RMassBank")$recalibrateMS1Window) { ## which_OK <- lapply(validPrecursors, function(pscan) @@ -2176,7 +2176,7 @@ recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window = ms1peaks <- lapply(specFound, function(cpd){ if(cpd@formula == "") return(NULL) - mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0) + mzL <- findMz.formula(cpd@formula,cpd@mode,recalibrateMS1Window,0) mzCalc <- mzL$mzCenter ms1 <- mz(cpd@parent) diff --git a/man/makeRecalibration.Rd b/man/makeRecalibration.Rd index 09b761d..a250271 100755 --- a/man/makeRecalibration.Rd +++ b/man/makeRecalibration.Rd @@ -24,9 +24,6 @@ makeRecalibration(w, mode, \item{w}{For \code{makeRecalibration}: to perform the recalibration with. For \code{recalibrateSpectra}: the \code{msmsWorkspace} which contains the recalibration curves (alternatively to specifying \code{rc, rc.ms1}).} -\item{mode}{\code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).} - \item{recalibrateBy}{Whether recalibration should be done by ppm ("ppm") or by m/z ("mz").} \item{recalibrateMS1}{Whether MS1 spectra should be recalibrated separately ("separate"), diff --git a/man/msmsRead.Rd b/man/msmsRead.Rd index f287fda..99ddaf0 100644 --- a/man/msmsRead.Rd +++ b/man/msmsRead.Rd @@ -11,7 +11,7 @@ msmsRead( files = NULL, cpdids = NULL, readMethod, - mode, + mode = NULL, confirmMode = FALSE, useRtLimit = TRUE, Args = NULL, diff --git a/man/reanalyzeFailpeaks.Rd b/man/reanalyzeFailpeaks.Rd index df49d0a..9d3f99c 100755 --- a/man/reanalyzeFailpeaks.Rd +++ b/man/reanalyzeFailpeaks.Rd @@ -13,8 +13,6 @@ reanalyzeFailpeak(custom_additions, mass, cpdID, counter, pb = NULL, mode, \arguments{ \item{custom_additions}{The allowed additions, e.g. "N2O".} -\item{mode}{Processing mode (\code{"pH", "pNa", "mH"} etc.)} - \item{filterSettings}{Settings for filtering data. Refer to\code{\link{analyzeMsMs}} for settings.} \item{progressbar}{The progress bar callback to use. Only needed for specialized @@ -55,7 +53,7 @@ additional elements (e.g. "N2O"). \dontrun{ reanalyzedRcSpecs <- reanalyzeFailpeaks(w@aggregated, custom_additions="N2O", mode="pH") # A single peak: -reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1, "pH") +reanalyzeFailpeak("N2O", 105.0447, 1234, 1, 1) } diff --git a/man/recalibrate.addMS1data.Rd b/man/recalibrate.addMS1data.Rd index 229523d..07ff7f2 100755 --- a/man/recalibrate.addMS1data.Rd +++ b/man/recalibrate.addMS1data.Rd @@ -10,9 +10,6 @@ recalibrate.addMS1data(spec,mode="pH", recalibrateMS1Window = \arguments{ \item{spec}{A \code{msmsWorkspace} or \code{RmbSpectraSetList} containing spectra for which MS1 "peaks" should be "constructed".} -\item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions -([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).} - \item{recalibrateMS1Window}{Window width to look for MS1 peaks to recalibrate (in ppm).} } \value{ From 2defe7f2ceb787e3abe5ad71412ef7ca2791e9ba Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Tue, 3 Aug 2021 16:33:13 +0200 Subject: [PATCH 08/11] fixes for 0-length ch@ok --- R/leMsMs.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/leMsMs.r b/R/leMsMs.r index f67acc7..56c41b2 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -1789,7 +1789,7 @@ reanalyzeFailpeaks <- function(w, custom_additions, filterSettings = return(sp) children <- lapply(sp@children, function(ch) { - if(!ch@ok) + if(!isTRUE(ch@ok)) return(ch) peaks <- getData(ch) # get the peaks that have no matching formula, but are considered not noise etc. @@ -2104,7 +2104,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE, return(sp) children <- lapply(sp@children, function(ch) { - if(ch@ok == FALSE) + if(!isTRUE(ch@ok)) return(ch) # filterOK TRUE if multiplicity is sufficient ch <- addProperty(ch, "filterOK", "logical", NA) From e81095fcda24bf458751c3269f1114eea38d98d7 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Fri, 6 Aug 2021 18:20:19 +0200 Subject: [PATCH 09/11] fix for some spectra with zero good peaks in aggregation step --- R/leMsMs.r | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/leMsMs.r b/R/leMsMs.r index 56c41b2..4edbde2 100755 --- a/R/leMsMs.r +++ b/R/leMsMs.r @@ -1139,6 +1139,10 @@ aggregateSpectra <- function(spec, addIncomplete=FALSE) table.c$rawOK <- NULL table.c$low <- NULL table.c$satellite <- NULL + if(!("formulaSource" %in% colnames(table.c))) + table.c$formulaSource <- character(nrow(table.c)) + + # add scan no table.c$scan <- rep(c@acquisitionNum, nrow(table.c)) return(table.c) From 3f0f954c8c474ec643fa1de266f45a8d134c48ed Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Mon, 9 Aug 2021 19:15:12 +0200 Subject: [PATCH 10/11] added mAc, fixed vignettes after mode changes --- DESCRIPTION | 3 ++- R/leCsvAccess.R | 1 + R/msmsRead.R | 2 +- vignettes/RMassBankNonstandard.Rmd | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f83b286..096a86a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,8 @@ Suggests: xcms (>= 1.37.1), CAMERA, RUnit, - knitr + knitr, + rmarkdown Collate: 'alternateAnalyze.R' 'formulaCalculator.R' diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R index cbf3a26..edda092 100755 --- a/R/leCsvAccess.R +++ b/R/leCsvAccess.R @@ -494,6 +494,7 @@ getAdductInformation <- function(formula){ c(mode = "m3H_pM_p2Na", addition = add.formula(formula, "Na2H-3"), charge = -1, adductString = "[2M+2Na-3H]-"), c(mode = "m3H_pM", addition = add.formula(formula, "H-3"), charge = -1, adductString = "[2M-3H]-"), c(mode = "mH_p2M", addition = add.formula(formula, add.formula(formula, "H-1")), charge = -1, adductString = "[3M-H]-"), + c(mode = "mAc", addition = "C2O2H3", charge = -1, adductString = "[M+CH3COO]-"), ## ??? c(mode = "", addition = "", charge = 0, adductString = "[M]") diff --git a/R/msmsRead.R b/R/msmsRead.R index ed29543..f2eee58 100644 --- a/R/msmsRead.R +++ b/R/msmsRead.R @@ -128,7 +128,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, mode_ <- mode if(length(mode) == 1) mode_ <- rep(mode, length(w@files)) - if(length(mode) != length(w@files)) + if(length(mode_) != length(w@files)) stop("Supply either one mode or a vector for one mode per file") ##Progressbar diff --git a/vignettes/RMassBankNonstandard.Rmd b/vignettes/RMassBankNonstandard.Rmd index ff69069..a10ea37 100644 --- a/vignettes/RMassBankNonstandard.Rmd +++ b/vignettes/RMassBankNonstandard.Rmd @@ -65,7 +65,7 @@ w <- loadMsmsWorkspace(system.file("results/pH_narcotics_RF.RData", The recalibration curve: ```{r fig=TRUE} -recal <- makeRecalibration(w@parent, "pH", +recal <- makeRecalibration(w@parent, recalibrateBy = rmbo$recalibrateBy, recalibrateMS1 = rmbo$recalibrateMS1, recalibrator = list(MS1="recalibrate.loess",MS2="recalibrate.loess"), From 9ce6c237dc57f4b53e7d094cbbe059590111b441 Mon Sep 17 00:00:00 2001 From: Michele Stravs Date: Thu, 12 Aug 2021 17:29:55 +0200 Subject: [PATCH 11/11] fix for empty spectra in list.tsv step --- R/createMassBank.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/createMassBank.R b/R/createMassBank.R index b108432..eeaaff6 100755 --- a/R/createMassBank.R +++ b/R/createMassBank.R @@ -1730,7 +1730,10 @@ makeMollist <- function(compiled) # For every "compiled" entry (here, compiled is not one "compiled" entry but the total # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the # first spectrum.) Make the ID into 0000 format. - + + emptySpectra <- unlist(lapply(compiled, function(cpd) length(cpd@children) == 0)) + compiled <- compiled[!emptySpectra] + tsvlist <- t(sapply(compiled, function(entry) { name <- entry@children[[1]]@info[["CH$NAME"]][[1]]