-
Notifications
You must be signed in to change notification settings - Fork 1
/
Figure2B_MAG_score.Rmd
177 lines (128 loc) · 7.28 KB
/
Figure2B_MAG_score.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
---
title: "Calculate MAG (Magnitude association score) score"
author: "Neerja Katiyar"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output: workflowr::wflow_html
editor_options:
chunk_output_type: console
---
# Prepare the environment
```{r setup, include=FALSE}
require(knitr)
knitr::opts_chunk$set(echo = TRUE)
opts_knit$set(root.dir = "/Users/katiyn/Dropbox (JAX)/MouseAging_clean/Mice_aging_NK_resubmission/code/") #set root dir!
```
```{r libraries, include=TRUE, echo=TRUE}
library(fgsea)
library(data.table)
library(ggplot2)
library(dplyr)
library(psych)
ens2gene <- function(genes){
genome <- annotables::grcm38
# ens to gene symbol mapping
mapping <-
base::subset(genome,
genome$ensgene %in% genes,
select = c('ensgene', 'symbol'))
m <- match(genes, mapping$ensgene)
sym.genes <- mapping$symbol[m]
names(sym.genes) <- genes
return(sym.genes)
}
```
```{r read_RNAseq_DE_files, include=TRUE, echo=TRUE}
file_mem_B6 <- read.csv("./data/MEMORY_Age18vs3_B6_RNAseq.csv", sep=",", header=T)
file_mem_B6_sorted <- file_mem_B6[order(file_mem_B6$logFC, decreasing=TRUE),]
file_mem_NZO <- read.csv("./data/MEMORY_Age18vs3_NZO_RNAseq.csv", sep=",", header=T)
file_mem_NZO_sorted <- file_mem_NZO[order(file_mem_NZO$logFC, decreasing=TRUE),]
file_naive_B6 <- read.csv("./data/NAIVE_Age18vs3_B6_RNAseq.csv", sep=",", header=T)
file_naive_B6_sorted <- file_naive_B6[order(file_naive_B6$logFC, decreasing=TRUE),]
file_naive_NZO <- read.csv("./data/NAIVE_Age18vs3_NZO_RNAseq.csv", sep=",", header=T)
file_naive_NZO_sorted <- file_naive_NZO[order(file_naive_NZO$logFC, decreasing=TRUE),]
file_pbl_B6 <- read.csv("./data/PBL_Age18vs3_B6_RNAseq.csv", sep=",", header=T)
file_pbl_B6_sorted <- file_pbl_B6[order(file_pbl_B6$logFC, decreasing=TRUE),]
file_pbl_NZO <- read.csv("./data/PBL_Age18vs3_NZO_RNAseq.csv", sep=",", header=T)
file_pbl_NZO_sorted <- file_pbl_NZO[order(file_pbl_NZO$logFC, decreasing=TRUE),]
file_spleen_B6 <- read.csv("./data/SPLEEN_Age18vs3_B6_RNAseq.csv", sep=",", header=T)
file_spleen_B6_sorted <- file_spleen_B6[order(file_spleen_B6$logFC, decreasing=TRUE),]
file_spleen_NZO <- read.csv("./data/SPLEEN_Age18vs3_NZO_RNAseq.csv", sep=",", header=T)
file_spleen_NZO_sorted <- file_spleen_NZO[order(file_spleen_NZO$logFC, decreasing=TRUE),]
```
```{r convert_geneID, include=TRUE, echo=TRUE}
file_mem_B6_sorted$gene <- ens2gene(file_mem_B6_sorted$Gene.Ensembl)
file_mem_NZO_sorted$gene <- ens2gene(file_mem_NZO_sorted$Gene.Ensembl)
file_naive_B6_sorted$gene <- ens2gene(file_naive_B6_sorted$Gene.Ensembl)
file_naive_NZO_sorted$gene <- ens2gene(file_naive_NZO_sorted$Gene.Ensembl)
file_pbl_B6_sorted$gene <- ens2gene(file_pbl_B6_sorted$Gene.Ensembl)
file_pbl_NZO_sorted$gene <- ens2gene(file_pbl_NZO_sorted$Gene.Ensembl)
file_spleen_B6_sorted$gene <- ens2gene(file_spleen_B6_sorted$Gene.Ensembl)
file_spleen_NZO_sorted$gene <- ens2gene(file_spleen_NZO_sorted$Gene.Ensembl)
```
```{r common_genes, include=TRUE, echo=TRUE}
common_genes <- Reduce(intersect, list(file_mem_B6_sorted$Gene.Ensembl,file_mem_NZO_sorted$Gene.Ensembl,file_naive_B6_sorted$Gene.Ensembl, file_naive_NZO_sorted$Gene.Ensembl, file_pbl_B6_sorted$Gene.Ensembl, file_pbl_NZO_sorted$Gene.Ensembl, file_spleen_B6_sorted$Gene.Ensembl, file_spleen_NZO_sorted$Gene.Ensembl))
file_mem_B6_subset_common <- subset(file_mem_B6_sorted, file_mem_B6_sorted$Gene.Ensembl %in% common_genes)
file_mem_NZO_subset_common <- subset(file_mem_NZO_sorted, file_mem_NZO_sorted$Gene.Ensembl %in% common_genes)
file_naive_B6_subset_common <- subset(file_naive_B6_sorted, file_naive_B6_sorted$Gene.Ensembl %in% common_genes)
file_naive_NZO_subset_common <- subset(file_naive_NZO_sorted, file_naive_NZO_sorted$Gene.Ensembl %in% common_genes)
file_pbl_B6_subset_common <- subset(file_pbl_B6_sorted, file_pbl_B6_sorted$Gene.Ensembl %in% common_genes)
file_pbl_NZO_subset_common <- subset(file_pbl_NZO_sorted, file_pbl_NZO_sorted$Gene.Ensembl %in% common_genes)
file_spleen_B6_subset_common <- subset(file_spleen_B6_sorted, file_spleen_B6_sorted$Gene.Ensembl %in% common_genes)
file_spleen_NZO_subset_common <- subset(file_spleen_NZO_sorted, file_spleen_NZO_sorted$Gene.Ensembl %in% common_genes)
```
```{r add_ranks, include=TRUE, echo=TRUE}
#Add a rank column
file_mem_B6_subset_common$rank <- rank(abs(file_mem_B6_subset_common$logFC))
rank_mem_B6 = file_mem_B6_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_mem_B6, "./output/Rank_mem_B6.txt", sep="\t", quote=FALSE)
file_mem_NZO_subset_common$rank <- rank(abs(file_mem_NZO_subset_common$logFC))
rank_mem_NZO = file_mem_NZO_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_mem_NZO, "./output/Rank_mem_NZO.txt", sep="\t", quote=FALSE)
file_naive_B6_subset_common$rank <- rank(abs(file_naive_B6_subset_common$logFC))
rank_naive_B6 = file_naive_B6_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_naive_B6, "./output/Rank_naive_B6.txt", sep="\t", quote=FALSE)
file_naive_NZO_subset_common$rank <- rank(abs(file_naive_NZO_subset_common$logFC))
rank_naive_NZO = file_naive_NZO_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_naive_NZO, "./output/Rank_naive_NZO.txt", sep="\t", quote=FALSE)
file_pbl_B6_subset_common$rank <- rank(abs(file_pbl_B6_subset_common$logFC))
rank_pbl_B6 = file_pbl_B6_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_pbl_B6, "./output/Rank_pbl_B6.txt", sep="\t", quote=FALSE)
file_pbl_NZO_subset_common$rank <- rank(abs(file_pbl_NZO_subset_common$logFC))
rank_pbl_NZO = file_pbl_NZO_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_pbl_NZO, "./output/Rank_pbl_NZO.txt", sep="\t", quote=FALSE)
file_spleen_B6_subset_common$rank <- rank(abs(file_spleen_B6_subset_common$logFC))
rank_spleen_B6 = file_spleen_B6_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_spleen_B6, "./output/Rank_spleen_B6.txt", sep="\t", quote=FALSE)
file_spleen_NZO_subset_common$rank <- rank(abs(file_spleen_NZO_subset_common$logFC))
rank_spleen_NZO = file_spleen_NZO_subset_common %>% mutate(rank = order(logFC, decreasing = T))
write.table(rank_spleen_NZO, "./output/Rank_spleen_NZO.txt", sep="\t", quote=FALSE)
```
```{r MAG_score, include=TRUE, echo=TRUE}
n=1
gene_list <- list()
ranks_genes <- list()
tissues_B6 <- list(rank_mem_B6, rank_naive_B6, rank_pbl_B6, rank_spleen_B6)
tissues_NZO <- list(rank_mem_NZO, rank_naive_NZO, rank_pbl_NZO, rank_spleen_NZO)
for (i in common_genes) {
gene_ranks_final_score = 0
n =1
for (j in 1:length(tissues_B6)) {
df1 <- as.data.frame(tissues_B6[n])
df2 <- as.data.frame(tissues_NZO[n])
n = n+1
gene_tissue_B6 <- df1 %>% filter(Gene.Ensembl == i)
gene_tissue_NZO <- df2 %>% filter(Gene.Ensembl == i)
gene_ranks_new <- c((1/gene_tissue_B6$rank), (1/gene_tissue_NZO$rank))
gene_rank_score <- geometric.mean(gene_ranks_new)
gene_ranks_final_score = gene_ranks_final_score + gene_rank_score
}
gene_list[i] <- i
ranks_genes[i] <- gene_ranks_final_score
}
df_total <- as.data.frame(do.call(rbind, ranks_genes))
names(df_total) <- c("Score")
df_total$normalized_score <- df_total$Score/length(common_genes)
df_total$Ensembl_Gene <- rownames(df_total)
df_total$gene <- ens2gene(df_total$Ensembl_Gene)
write.table(df_total, "./output/MAG_tissues_all.txt", sep="\t", quote=FALSE, row.names=FALSE)
```