Skip to content

Latest commit

 

History

History
258 lines (194 loc) · 8.75 KB

015b_plot_concentrations_pea.md

File metadata and controls

258 lines (194 loc) · 8.75 KB

015b_plot_concentrations_pea.R

xrydbh 2022-02-04

library(tidyverse)
library(RColorBrewer)
library(readxl)
library(writexl)

# Set path
# If script is running separately move up one step with setwd(".."). Only needed once (for first script run separately).



loaded_cyt_objs <- load("./RData/15_annoDb.cytokine.chemokine.RData")

##############################
# Format data for rank plot
########################


# read pea file
pea.raw.0.0 <- readxl::read_excel("./data/001_pea_raw.xlsx") 

missing_values_allowed_for_prot <- 0 #3 for with_at_least_5_vals (_in_high_or_low) or 
filter_missing_val_both_groups_or_either_or <- "either_or" #"either_or" # or "both"


# make longer

NPX.0.3 <- pea.raw.0.0 %>% 
  pivot_longer(cols=contains("_"),names_to="SC_Donor",values_to = "NPX") %>% 
  mutate(NPX=as.numeric(NPX)) %>% 
  mutate(LOD=as.numeric(LOD)) 

######################################
# Count numb of NAs in High and Low
# group by proteins
#################################3####
# Separate column SC_Donor into "High low Group" and "Unit 1-8"
NPX.0.3.5 <- NPX.0.3 %>%
  separate(SC_Donor,
           into = c("Group","Unit"),
           sep = "_")

# Get number of NPX below LOD (missing values) per gene and group
numb_NPX_vals_below_LOD.0.0 <- NPX.0.3.5 %>% 
  group_by(Gene.symbol,Group) %>% 
  summarise(NPX_below_LOD = sum(NPX<LOD)) %>% 
  pivot_wider(names_from = Group,values_from =NPX_below_LOD) %>% 
  dplyr::rename(Numb_NA_High=High,Numb_NA_Low=Low)
## `summarise()` has grouped output by 'Gene.symbol'. You can override using the
## `.groups` argument.
NPX.0.3.1 <- full_join(NPX.0.3,numb_NPX_vals_below_LOD.0.0 ,by="Gene.symbol")



# Replace UniprotID NA for Gene.symbol NT-proBNP and OlinkID  OID00131 and OID01214 by  P16860
# Also calculate average between OID00131 and OID01214

#what OlinkID have Corresponding UniprotID as NA
#NPX.0.0 %>% filter(is.na(UniProt )) %>% pluck("OlinkID") %>% unique()


# Get average for the two NT-proBNP Olink variants
NT_proBNP <- NPX.0.3.1  %>% 
  filter(Gene.symbol=="NT-proBNP") %>% 
  group_by(SC_Donor) %>% 
  mutate(NPX=(mean(NPX))) %>% # Calculate mean
  ungroup() %>% 
  filter(OlinkID=="OID00131") # keep only on of the two laternative OlinkIDs

# Replace duplicates with average
NPX.0.4 <- NPX.0.3.1  %>% 
  filter(Gene.symbol!="NT-proBNP") %>%
  bind_rows(NT_proBNP)

# Replace UniprotID NA for NT-proBNP with "P16860"
NPX.0.4$UniprotID[is.na(NPX.0.4$UniprotID)] <- "P16860"

# Remove duplicate UniprotIDs "Q8NEV9,Q14213" (IL27a/IL27b) with only one ID
NPX.0.4$UniprotID[NPX.0.4$UniprotID=="Q8NEV9,Q14213"] <- "Q8NEV9"
# Remove duplicate UniprotIDs "Q11128,P21217" (FUT3/FUT5) with only one ID
NPX.0.4$UniprotID[NPX.0.4$UniprotID=="Q11128,P21217"] <- "Q11128"


#NPX.0.4 %>% pivot_wider(names_from=SC_Donor,values_from = NPX)
####################

# summarize and arrange by NPX
pea.for.rankplot.0.0 <- NPX.0.4 %>% 
  group_by(UniprotID) %>% # group by gene symbol
  summarize(sum_npx=sum(NPX,na.rm = T)) %>% # summarize into sum per genes across samples
  filter(sum_npx!=0) %>% # remove sums that are zero
  arrange(desc(sum_npx)) 

pea.for.rankplot <- pea.for.rankplot.0.0 %>% # sort
  mutate(rank=seq(1:nrow(pea.for.rankplot.0.0))) # crate rank variable

pea.for.rankplot.gs <- pea.for.rankplot %>% inner_join(NPX.0.4,by=c("UniprotID"="UniprotID")) %>% 
  dplyr::select(Gene.symbol,sum_npx,rank) %>% 
  distinct()

###########################
# check overlap with Cytokines and chemokines
#######################

# Get cytokines as gs in ms 
cyts_in_pea.gs3 <- pea.for.rankplot.gs %>% 
  inner_join(annoDb.cytokine.chemokine,by=c("Gene.symbol"="Gene.symbol")) %>% 
  dplyr::select(-UniprotID) %>% 
  distinct() %>% 
  mutate(GO_term=replace(GO_term, GO_term=="chemokine receptor antagonist activity", "chemokine receptor binding"))
cyts_in_pea.gs3$GO_term %>% as.factor() %>% levels()
## [1] "chemokine activity"                                          
## [2] "chemokine receptor binding"                                  
## [3] "cytokine activity"                                           
## [4] "cytokine receptor activity"                                  
## [5] "cytokine receptor binding"                                   
## [6] "hematopoietic progenitor cell differentiation"               
## [7] "hematopoietic stem cell migration"                           
## [8] "hematopoietic stem cell migration to bone marrow"            
## [9] "positive regulation of hematopoietic stem cell proliferation"
# get number of ge terms matching in ms data
goTerminPEACounts <-cyts_in_pea.gs3 %>% 
  distinct %>% 
  group_by(GO_term) %>% 
  tally()

# join
cytokine_summary_table.pea.0.0 <- goTermCounts %>% 
  full_join(goTerminPEACounts,by=c("GO_term"="GO_term"))

# replace NA with 0
cytokine_summary_table.pea.0.0$n.y <- cytokine_summary_table.pea.0.0$n.y %>% 
  tidyr::replace_na(0)

# get percenttge
cytokine_summary_table.pea.0.1 <- cytokine_summary_table.pea.0.0 %>% 
  dplyr::rename(nInAnnoDB=n.x,nInMS=n.y) %>% 
  mutate(perc_in=(nInMS/nInAnnoDB)*100) %>% 
  mutate(perc_out=((nInAnnoDB-nInMS)/nInAnnoDB)*100)

# listGO_term gene members not in MS
GO_term_gene_members_not_in_pea <- annoDb.cytokine.chemokine %>% 
  dplyr::filter(!(Gene.symbol%in%cyts_in_pea.gs3$Gene.symbol)) %>% 
  dplyr::select(-UniprotID) %>% 
  distinct() %>% 
  dplyr::group_by(GO_term) %>% 
  dplyr::summarise(Gene.symbol = paste(Gene.symbol, collapse = ","))

# list  GO_term MS gene members
GO_term_pea_gene_members <- cyts_in_pea.gs3 %>% 
  dplyr::group_by(GO_term) %>% 
  dplyr::summarise(Gene.symbol = paste(Gene.symbol, collapse = ","))

cytokine_summary_table.pea.0.2  <- cytokine_summary_table.pea.0.1  %>% 
  full_join(GO_term_pea_gene_members,by=c("GO_term"="GO_term")) %>% 
  dplyr::rename(In_pea=Gene.symbol) %>% 
  full_join(GO_term_gene_members_not_in_pea,by=c("GO_term"="GO_term")) %>% 
  dplyr::rename(Not_in_pea=Gene.symbol) 

#########################
# plot
##########################

# plot with some names
rankplot.pea <- pea.for.rankplot.gs %>% 
  ggplot(aes(x=rank, y=log2(sum_npx))) + 
  geom_point(color="grey",alpha=1) +
  #geom_point(data=cyts_in_ms.gs,color="red") +
  ggrepel::geom_text_repel(aes(x = rank, 
                               y = log2(sum_npx),
                               color=GO_term, 
                               label = Gene.symbol,
                               fontface = "bold"),
                           data=cyts_in_pea.gs3,
                           max.overlaps=20,
                           nudge_x=90
  ) +  
  #scale_color_manual(values = mycolors)+
  #scale_color_brewer(palette="Paired")
  # scale_color_brewer(values = "Paired") + #    scale_color_manual(values = rainbow(13)) +  #
  
  #geom_text(aes(label=Gene.symbol,color=as.factor(GO_term)),data=cyts_in_ms.gs3) + #, check_overlap = TRUE, nudge_x = 70
  geom_text(aes(label=Gene.symbol),data=head(pea.for.rankplot.gs,n=10L), nudge_x = -90,check_overlap = TRUE,alpha=0.5)+ #, nudge_x = -90, position=position_jitter(width=1,height=1),
  geom_text(aes(label=Gene.symbol),data=tail(pea.for.rankplot.gs,n=10L),check_overlap = TRUE, nudge_x = -90,alpha=0.5)+
  ggtitle(paste("Plot of norm_ab sum and rank of ",nrow(pea.for.rankplot)," PEA proteins",sep="") )
ggtitle(paste("Demarkating ",nrow(pea.for.rankplot.gs)," MS proteins",sep="") )
## $title
## [1] "Demarkating 459 MS proteins"
## 
## attr(,"class")
## [1] "labels"
rankplot.pea
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_text_repel).

## Warning: Removed 8 rows containing missing values (geom_text).

pdf(file = "./out_r/15_b_rankplot_concentrations_pea.pdf",   # The directory you want to save the file in
    width = 15, # The width of the plot in inches
    height = 15) # The height of the plot in inches
rankplot.pea
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_text_repel).

## Warning: Removed 8 rows containing missing values (geom_text).
dev.off()
## quartz_off_screen 
##                 2
write_xlsx(cytokine_summary_table.pea.0.2,"./out_r/015b_cytokine_summary_table_pea.xlsx")