-
Notifications
You must be signed in to change notification settings - Fork 1
/
lydia_grant_ZH_ONC.R
62 lines (59 loc) · 3.37 KB
/
lydia_grant_ZH_ONC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
ONC_1d <- read.csv('C://Bioinf/Zhigang_ONC/GSE137398_ONCRGCs_1w_afterCrush_count_mat.csv')
rownames(ONC_1d) <- ONC_1d$X
ONC_1d <- ONC_1d[,-1]
ONC_1d <- CreateSeuratObject(counts = ONC_1d, min.cells = 3, min.features = 100)
s.genes <- cc.genes.updated.2019$s.genes
g2m.genes <- cc.genes.updated.2019$g2m.genes
convertHumanGeneList <- function(x){
require("biomaRt")
human = useEnsembl("ensembl", dataset = "hsapiens_gene_ensembl", mirror = "www", host = "dec2021.archive.ensembl.org")
mouse = useEnsembl("ensembl", dataset = "mmusculus_gene_ensembl", mirror = "www", host = "dec2021.archive.ensembl.org")
genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T)
humanx <- unique(genesV2[, 2])
# Print the first 6 genes found to the screen
print(head(humanx))
return(humanx)
}
m.s.genes <- convertHumanGeneList(cc.genes.updated.2019$s.genes)
m.g2m.genes <- convertHumanGeneList(cc.genes.updated.2019$g2m.genes)
ProcessSeu <- function(Seurat){
Seurat <- NormalizeData(Seurat)
Seurat <- FindVariableFeatures(Seurat, selection.method = "vst", nfeatures = 2000)
Seurat <- ScaleData(Seurat, verbose = T, vars.to.regress = c('nCount_RNA', 'nFeature_RNA', 'percent.mt',"percent.rb","S.Score","G2M.Score"))
Seurat <- RunPCA(Seurat)
Seurat <- FindNeighbors(Seurat, dims = 1:30)
Seurat <- FindClusters(Seurat, resolution = 1)
Seurat <- RunUMAP(Seurat, dims = 1:30)
Seurat <- RunTSNE(Seurat, dims.use = 1:30 )
DimPlot(object = Seurat, reduction = "umap")
return (Seurat)
}
ONC_1d [["percent.rb"]] <- PercentageFeatureSet(ONC_1d , pattern = "^Rps|^Rpl|^Mrps|^Mrpl", assay = 'RNA')
ONC_1d <- CellCycleScoring(ONC_1d, s.features = m.s.genes, g2m.features = m.g2m.genes, set.ident = FALSE)
ONC_1d[["percent.mt"]] <- PercentageFeatureSet(ONC_1d, pattern = "^mt-")
VlnPlot(ONC_1d, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", 'percent.rb'), ncol = 4)
ONC_1d <- subset(ONC_1d, subset = nCount_RNA > 100 & nCount_RNA < 70000 & nFeature_RNA > 100 & nFeature_RNA < 9000 & percent.mt < 20 & percent.rb < 35)
ONC_1d <- ScaleData(ONC_1d, verbose = T, vars.to.regress = c('nCount_RNA', 'nFeature_RNA','percent.mt', "percent.rb","S.Score","G2M.Score"))
gc()
ONC_1d <- ProcessSeu(ONC_1d)
RDoublet <- function(tmp){
sweep.res.list <- paramSweep_v3(tmp, PCs = 1:30, sct = FALSE)
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats)
pKopt <- as.numeric(as.character(bcmvn$pK[bcmvn$BCmetric == max(bcmvn$BCmetric)]))
pKopt <- pKopt[order(pKopt, decreasing = TRUE) ]
pKopt <- pKopt[1]
homotypic.prop <- modelHomotypic(tmp$seurat_clusters)
nExp_poi <- round(0.1*length(colnames(tmp))) ## Assuming 10% doublet formation rate
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
tmp <- doubletFinder_v3(tmp, PCs = 1:30, pN = 0.25, pK = pKopt, nExp = nExp_poi, reuse.pANN = FALSE)
tmp <- doubletFinder_v3(tmp, PCs = 1:30, pN = 0.25, pK = pKopt, nExp = nExp_poi.adj, reuse.pANN = paste("pANN_0.25",pKopt,nExp_poi, sep="_"))
return (tmp)
}
ONC_1d <- RDoublet(ONC_1d)
ONC_1d <- subset(ONC_1d, cells = colnames(ONC_1d)[which(ONC_1d[[]][12] == 'Singlet')])
ONC_1d <- subset(ONC_1d, cells = colnames(ONC_1d)[which(ONC_1d[[]][13] == 'Singlet')])
gc()
ONC_1d <- ProcessSeu(ONC_1d)
ONC_1d$stage <- 'ONC 1 week ZH'
SaveH5Seurat(ONC_1d, 'C://Bioinf/Zhigang_ONC/ONC_1w_EK.h5Seurat')