-
Notifications
You must be signed in to change notification settings - Fork 13
QTL mapping on small chunks using snakemake
Marc Jan Bonder edited this page Oct 22, 2024
·
8 revisions
Information on snakemake
To run the pipeline on chunks you can use your feature annotation file to create chunking file for the snakemake pipeline.
Below is the R code to create such a chunking file. You can put the numbers of genes per chunk yourself (currently 50).
library(matrixStats)
library(readr)
#### gene annotation
gene_anno = read.delim("/vol/projects/wli/projects/genetics/sw/limix/data/anno.nochr.txt",as.is=T)
testCombinations = NULL
#
nGenes = 50
startPos = 0
endOffset = 1000000000
sink("/vol/projects/wli/projects/genetics/sw/limix/data/chunks.txt")
for(chr in unique(gene_anno$chromosome)){
#print(chr)
annotationRel = gene_anno[which(gene_anno$chromosome==chr),]
annotationRel = annotationRel[order(annotationRel$start,annotationRel$end),]
##First go through the list to fix genes to they all touch.
annotationRel$start[1] = startPos
for(i in 2:nrow(annotationRel)){
if(i == nrow(annotationRel)){
annotationRel$end[i] = annotationRel$end[i]+endOffset
}
#If "overlapping" than we don't need to do anything.
if((annotationRel$start[i]>annotationRel$end[i-1])){
#print(i)
distance = (annotationRel$start[i]-annotationRel$end[i-1])/2
annotationRel$start[i] = ceiling(annotationRel$start[i]-distance)
annotationRel$end[i-1] = floor(annotationRel$end[i-1]+distance)
}
}
chunks = seq(1, nrow(annotationRel),nGenes)
#Need to add the last one as a separate entry.
if(chunks[length(chunks)] < nrow(annotationRel)){
chunks = c(chunks,(nrow(annotationRel)+1))
}
for(i in 1:(length(chunks)-1)){
cat(paste(chr,":",annotationRel$start[chunks[i]],"-",annotationRel$end[(chunks[i+1]-1)],"\n",sep=""))
}
}
sink()