-
Notifications
You must be signed in to change notification settings - Fork 0
/
Seurat-Project.Rmd
103 lines (75 loc) · 2.99 KB
/
Seurat-Project.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
---
title: "Seurat Project"
author: "Alquama"
date: "10/21/2023"
output: html_document
---
```{r setup, include=FALSE}
```
### Load the Libraries ###
```{r}
library(tidyr)
library(Seurat)
library(hdf5r)
library(ggplot2)
```
```{r}
# Load the NSCLC dataset
nsclc.sparse.m <- Read10X_h5(filename = '../Bioinfo Projects/20k_NSCLC_DTC_3p_nextgem_Multiplex_count_raw_feature_bc_matrix.h5')
str(nsclc.sparse.m)
cts <- nsclc.sparse.m$`Gene Expression`
```
```{r}
# Initialize the Seurat object with the raw (non-normalized data).
nsclc.seurat.obj <- CreateSeuratObject(counts = cts, project = "NSCLC", min.cells = 3, min.features = 200)
str(nsclc.seurat.obj)
nsclc.seurat.obj
```
### Pre-processing work flow ##
```{r}
## QC and selection of cells ##
View([email protected])
## % Mitochondira Percentage ##
nsclc.seurat.obj[["percent.mt"]] <- PercentageFeatureSet(nsclc.seurat.obj, pattern = "^MT-")
View([email protected])
# Visualize QC metrics as a violin plot ##
VlnPlot(nsclc.seurat.obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
FeatureScatter(nsclc.seurat.obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
VlnPlot(nsclc.seurat.obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
# Filtering ##
nsclc.seurat.obj <- subset(nsclc.seurat.obj, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 &
percent.mt < 5)
# Normalize data ###
nsclc.seurat.obj <- NormalizeData(nsclc.seurat.obj)
str(nsclc.seurat.obj)
### # 4. Identify highly variable features ##
nsclc.seurat.obj <- FindVariableFeatures(nsclc.seurat.obj, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(nsclc.seurat.obj), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(nsclc.seurat.obj)
LabelPoints(plot = plot1, points = top10, repel = TRUE)
# Scaling
all.genes <- rownames(nsclc.seurat.obj)
nsclc.seurat.obj <- ScaleData(nsclc.seurat.obj, features = all.genes)
str(nsclc.seurat.obj)
# Perform Linear dimensionality reduction
nsclc.seurat.obj <- RunPCA(nsclc.seurat.obj, features = VariableFeatures(object = nsclc.seurat.obj))
# visualize PCA results
print(nsclc.seurat.obj[["pca"]], dims = 1:5, nfeatures = 5)
DimHeatmap(nsclc.seurat.obj, dims = 1, cells = 500, balanced = TRUE)
# determine dimensionality of the data
ElbowPlot(nsclc.seurat.obj)
# 7. Clustering ##
nsclc.seurat.obj <- FindNeighbors(nsclc.seurat.obj, dims = 1:15)
# understanding resolution
nsclc.seurat.obj <- FindClusters(nsclc.seurat.obj, resolution = c(0.1,0.3, 0.5, 0.7, 1))
View([email protected])
DimPlot(nsclc.seurat.obj, group.by = "RNA_snn_res.0.5", label = TRUE)
# setting identity of clusters
Idents(nsclc.seurat.obj)
Idents(nsclc.seurat.obj) <- "RNA_snn_res.0.1"
Idents(nsclc.seurat.obj)
nsclc.seurat.obj <- RunUMAP(nsclc.seurat.obj, dims = 1:15)
```