Skip to content

Commit

Permalink
Images and enrichr
Browse files Browse the repository at this point in the history
Includes ability to download 10 Reactome pathway figures. Supports enrichr results as input.
  • Loading branch information
bpickett authored Mar 2, 2023
1 parent 5b6bd58 commit cdf514b
Showing 1 changed file with 83 additions and 7 deletions.
90 changes: 83 additions & 7 deletions Pathways2Targets2.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ args=(commandArgs(TRUE))
if(length(args)==0){
print("No arguments supplied.")
}
images <- TRUE #retrieve images from Reactome

#setwd("~/Desktop")
infile <- args[1]
#infile <- "CRC_edgeR.txt_entrez.tsv_2022-10-08_08-23-30_SPIA_Results.csv"
#infile <- "enrichr_results1.csv" #enrichr data
outfile <- paste0(infile,"-Treatments.tsv")
outfile1 <- paste0(infile,"-RankedTargets.tsv")
setwd("~/fsl_groups/fslg_PickettLabGroup/spia")
Expand All @@ -21,6 +24,7 @@ merged_drugs <- as.data.frame(NULL)
trac_vector <- as.vector(c(1,2,3,9,10,11,18,19,20,26,27,28))

#define weights
vlow <- 0.01
low <- 0.5
med <- 1
hi <- 2
Expand All @@ -42,6 +46,35 @@ humanPanther <- pathways("hsapiens", "panther")
#read in spia results table
#setwd("~/Downloads")
in_data <- read.csv(file = infile, header = TRUE, sep = ",")
enrichr <- FALSE

#modify input table if it was generated by enrichr
if(ncol(in_data)==9){#data is csv from enrichr analysis
#filter non-significant pathways
in_data <- subset(in_data,Adjusted.P.value < 0.05)
#add Reactome column
new_path_col <- as.vector(rep("Reactome",nrow(in_data)))
in_data <- cbind(in_data,new_path_col)
#manipulate data frame
in_data$Old.P.value <- NULL
in_data$Old.Adjusted.P.value <- NULL
Split <- strsplit(as.character(in_data$Overlap), "/", fixed = TRUE)
NDE <- sapply(Split, "[", 1)
pSize <- sapply(Split, "[", 2)
in_data <- cbind(in_data,NDE)
in_data <- cbind(in_data,pSize)
in_data$Overlap <- NULL
#change colnames for significance and pathway name
vec_enrichr_names <- as.vector(c("Name","pG","pGFWER","Odds.Ratio","Combined.Score","Genes","SourceDB","NDE","pSize"))
colnames(in_data) <- vec_enrichr_names
Split1 <- strsplit(as.character(in_data$Name), " R-HSA-", fixed = TRUE)
Reactome_path_name <- as.vector(sapply(Split1, "[", 1))
#Reactome_path_ID <- sapply(Split, "[", 2)
in_data$Name <- Reactome_path_name
enrichr <- TRUE
rm(Split1, Split, NDE, pSize)
}

sig_paths <- as.vector(in_data$Name)
sig_dbs <- as.vector(in_data$SourceDB)

Expand All @@ -68,7 +101,7 @@ for(i in 1:length(sig_dbs)){
}else if(sig_dbs[i] == "Reactome"){
#print("in Reactome loop")
if(length(humanReactome@entries[[sig_paths[i]]])==0){
print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
next()
}
db_entrezID <- as.vector(humanReactome@entries[[sig_paths[i]]]@protEdges[["src"]])
Expand All @@ -84,7 +117,7 @@ for(i in 1:length(sig_dbs)){
unique = TRUE)
}else if(sig_dbs[i] == "NCI"){
if(length(humanNCI@entries[[sig_paths[i]]])==0){
print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
next()
}
db_entrezID <- as.vector(humanNCI@entries[[sig_paths[i]]]@protEdges[["src"]])
Expand All @@ -100,7 +133,7 @@ for(i in 1:length(sig_dbs)){
unique = TRUE)
}else if(sig_dbs[i] == "Panther"){
if(length(humanPanther@entries[[sig_paths[i]]])==0){
print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
next()
}
db_entrezID <- as.vector(humanPanther@entries[[sig_paths[i]]]@protEdges[["src"]])
Expand All @@ -116,15 +149,15 @@ for(i in 1:length(sig_dbs)){
unique = TRUE)
}else if(sig_dbs[i] == "Biocarta"){
if(length(humanBioCarta@entries[[sig_paths[i]]])==0){
print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data"))
next()
}
db_entrezID <- as.vector(humanBioCarta@entries[[sig_paths[i]]]@protEdges[["src"]])
db_entrezID <- c(db_entrezID,humanBioCarta@entries[[sig_paths[i]]]@protEdges[["dest"]])
db_entrezID <- db_entrezID[!duplicated(db_entrezID)]
}else{
print(paste0("Error: no valid pathway: ",sig_dbs[i]))
next;
next()
}
# entrez.genes <- db_entrezID
# mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
Expand Down Expand Up @@ -181,7 +214,7 @@ for(i in 1:length(sig_dbs)){

#submit to openTargets.org
for(k in 1:length(ensembl_vector)){
#k <- 5
#k <- 2
target <- ensembl_vector[k]
#target <- "ENSG00000232810"
#target <- "ENSG00000197919"
Expand Down Expand Up @@ -247,7 +280,7 @@ for(i in 1:length(sig_dbs)){
}else if(length(target_data[["target"]][["subcellularLocations"]])>0){
subCellLoc <- as.vector(unlist(target_data[["target"]][["subcellularLocations"]][[1]]))
}
#make sure to only store unique records to
#make sure to only store unique records
if(length(unique_chembl) == 0){
unique_chembl <- inter_col[1]
# #if drug has no approved indications, then add an N/A for the record
Expand All @@ -256,6 +289,9 @@ for(i in 1:length(sig_dbs)){
# }
temp_row <- as.vector(c(target_data[["target"]][["id"]],target_data[["target"]][["approvedSymbol"]],target_data[["target"]][["approvedName"]],target_data[["target"]][["associatedDiseases"]][["count"]],tractability,sm_approved,sm_advancedClinical,sm_phase1,ab_approved,ab_advancedClinical,ab_phase1,pr_approved,pr_advancedClinical,pr_phase1,oc_approved,oc_advancedClinical,oc_phase1,subCellLoc,length(target_data[["target"]][["safetyLiabilities"]]),target_data[["target"]][["knownDrugs"]][["uniqueDrugs"]],inter_col,sig_dbs[i], sig_paths[i]))
temp_row <- as.data.frame(t(temp_row))
#if(is.numeric(temp_row[22])==FALSE){
# next()
#}
merged_drugs <- as.data.frame(rbind(merged_drugs,temp_row),stringsAsFactors = FALSE, drop = FALSE)
#write.table(df_init, file = outfile, row.names = FALSE, col.names=TRUE, sep = "\t", append = FALSE)

Expand Down Expand Up @@ -463,4 +499,44 @@ merged_drugs1 <- merged_drugs1[order(-merged_drugs1$Weighted_Score,
),]
write.table(merged_drugs1, file = outfile1, row.names = FALSE, col.names=TRUE, sep = "\t", append = FALSE)

if(images){
#retrieve images for 10 most frequent Reactome pathways
reactome_merged_drugs <- subset(merged_drugs,Pathway_DB=="Reactome")
reactome_all_paths <- as.vector(reactome_merged_drugs$Pathway_Name)
temp_path_counts <- as.data.frame(table(reactome_all_paths))
temp_path_counts <- temp_path_counts[order(-temp_path_counts$Freq),]
unique_reactome_paths <- as.vector(unique(reactome_all_paths))

if(length(unique_reactome_paths) >= 10){
unique_reactome_paths <- head(as.vector(temp_path_counts$reactome_all_paths),10)
}else{
unique_reactome_paths <- as.vector(temp_path_counts$reactome_all_paths)
}

#iterate through all unique pathways
for(c in 1:length(unique_reactome_paths)){
#c <- 1
# Set base URL of API endpoint
search_base_url <- "https://reactome.org/ContentService/search/query?query="
search_mid_url <- curlEscape(unique_reactome_paths[c])
search_end_url <- "&species=Homo%20sapiens&types=pathway&cluster=true&parserType=STD&Start%20row=0&rows=10&Force%20filters=false"
url_var <- paste0(search_base_url,search_mid_url,search_end_url)
getInfoInJson <- GET(url_var)
#GET('https://reactome.org/ContentService/search/query?query=Major%20pathway%20of%20rRNA%20processing%20in%20the%20nucleolus%20and%20cytosol&species=Homo%20sapiens&types=pathway&cluster=true&parserType=STD&Start%20row=0&rows=10&Force%20filters=false')#,
#parse response
res <- fromJSON(rawToChar(getInfoInJson$content))
if(length(res[["results"]][["entries"]][[1]][["stId"]][1])==1){
reactome_pathId <- res[["results"]][["entries"]][[1]][["stId"]][1]
print(paste0("Pathway ID found: ",reactome_pathId))
}else{
print("Pathway ID doesn't exist")
}
print("Retrieving pathway image...")
image_var <- paste0("https://reactome.org/ContentService/exporter/diagram/",reactome_pathId,".png")
file_dest <- getwd()
setwd("~/Downloads/")
system(paste0("wget ",image_var," -p -k --random-wait"))
}
}

print("Computing data...complete")

0 comments on commit cdf514b

Please sign in to comment.