SeqTech_LitReview

Jennifer Chang 10/2/2020

Sequencing Technology

HiSeq
MiSeq
NovaSeq
Nanopore
PacBio

library(tidyverse)
library(magrittr)

# todo: loop this later
hiseq <- read_delim("csv-HiSeq-set.csv", delim = ",")
miseq <- read_delim("csv-MiSeq-set.csv", delim = ",")
novaseq <- read_delim("csv-NovaSeq-set.csv", delim = ",")
nanopore1 <- read_delim("csv-nanopore-set.csv", delim = ",")
nanopore2 <- read_delim("csv-nanopore-set2.csv", delim = ",")
pacbio <- read_delim("csv-pacbio-set.csv", delim = ",")

names(hiseq) <- names(hiseq) %>% gsub(" ", "_", .) %>% gsub("/", "_", .)
names(miseq) <- names(miseq) %>% gsub(" ", "_", .)  %>% gsub("/", "_", .)
names(novaseq) <- names(novaseq) %>% gsub(" ", "_", .)  %>% gsub("/", "_", .)
names(nanopore1) <- names(nanopore1) %>% gsub(" ", "_", .)  %>% gsub("/", "_", .)
names(nanopore2) <- names(nanopore2) %>% gsub(" ", "_", .)  %>% gsub("/", "_", .)
names(pacbio) <- names(pacbio) %>% gsub(" ", "_", .)  %>% gsub("/", "_", .)

Merge Info an Excel File

hiseq$HiSeq = TRUE
miseq$MiSeq = TRUE
novaseq$NovaSeq = TRUE
nanopore1$Nanopore = TRUE
nanopore2$Nanopore = TRUE
pacbio$PacBio = TRUE

new_columns = c(names(hiseq), "HiSeq", "MiSeq", "NovaSeq", "Nanopore", "PacBio")

# ===== Add column if it's not already there
fncols <- function(data, cname) {
  add <- cname[!cname %in% names(data)]
  if (length(add) != 0) data[add] <- NA
  data
}

formatDf <- function(df, columns) {
  df <- df %>%
    fncols(., columns) %>%
    dplyr::select(all_of(columns))
  return(df)
}

hiseq <- hiseq %>% formatDf(., new_columns)
miseq <- miseq %>% formatDf(., new_columns)
novaseq <- novaseq %>% formatDf(., new_columns)
nanopore1 <- nanopore1 %>% formatDf(., new_columns)
nanopore2 <- nanopore2 %>% formatDf(., new_columns)
pacbio <- pacbio %>% formatDf(., new_columns)

uniqMerge <- function(vc) {
  vc <- vc %>%
    na.omit(.) %>%
    unique(.) %>%
    paste(., collapse = ",", sep = "")
  if (grepl(",", vc)) {
    vc <- vc %>%
      stringr::str_split(., ",", simplify = T) %>%
      as.vector(.) %>%
      unique(.) %>%
      paste(., collapse = ",", sep = "")
  }
  return(vc)
}
names(hiseq)
#>  [1] "PMID"             "Title"            "Authors"          "Citation"        
#>  [5] "First_Author"     "Journal_Book"     "Publication_Year" "Create_Date"     
#>  [9] "PMCID"            "NIHMS_ID"         "DOI"              "HiSeq"           
#> [13] "MiSeq"            "NovaSeq"          "Nanopore"         "PacBio"
merged_df <- dplyr::bind_rows(
  hiseq,
  miseq,
  novaseq,
  nanopore1,
  nanopore2,
  pacbio
) %>%
  dplyr::group_by(PMID) %>%
  dplyr::summarize(
    PMCID = PMCID %>% uniqMerge(.),
    HiSeq = HiSeq %>% uniqMerge(.),
    MiSeq = MiSeq %>% uniqMerge(.),
    NovaSeq = NovaSeq %>% uniqMerge(.),
    Nanopore = Nanopore %>% uniqMerge(.),
    PacBio = PacBio %>% uniqMerge(.),
    Year = Publication_Year %>% uniqMerge(.),
    Title = Title %>% uniqMerge(.),
    Authors = Authors %>% uniqMerge(.),
    Journal = Journal_Book %>% uniqMerge(.),
    Citation = Citation %>% uniqMerge(.),
    DOI = DOI %>% uniqMerge(.),
    Create_Date = Create_Date %>% uniqMerge(.)
  )
#> `summarise()` ungrouping output (override with `.groups` argument)

writexl::write_xlsx(merged_df, path="SeqTech_new.xlsx")

temp <- merged_df %>% 
  mutate(
    groups = dplyr::case_when(HiSeq=="TRUE" ~ "HiSeq",
                              MiSeq=="TRUE" ~ "MiSeq",
                              NovaSeq=="TRUE" ~ "NovaSeq",
                              Nanopore=="TRUE" ~ "Nanopore",
                              PacBio=="TRUE" ~ "PacBio")
  ) 

ggplot(temp, aes(x=Year, fill=groups)) +
  geom_bar() +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5)) +
  labs(y = "PubMed Papers", x= "Publication Year", fill="SeqTech")

Heh, I think there’s something wrong with my nanopore results… way too many compared to others. Will restrict it to “Nanopore DNA”.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

SeqTech_LitReview.md

SeqTech_LitReview.md

SeqTech_LitReview

Sequencing Technology

Merge Info an Excel File

Files

SeqTech_LitReview.md

Latest commit

History

SeqTech_LitReview.md

File metadata and controls

SeqTech_LitReview

Sequencing Technology

Merge Info an Excel File