Skip to content

QTL mapping on small chunks using snakemake

Marc Jan Bonder edited this page Oct 22, 2024 · 8 revisions

Information on snakemake

Chunk file

To run the pipeline on chunks you can use your feature annotation file to create chunking file for the snakemake pipeline.

Below is the R code to create such a chunking file. You can put the numbers of genes per chunk yourself (currently 50).

library(matrixStats)
library(readr)
#### gene annotation
gene_anno = read.delim("/vol/projects/wli/projects/genetics/sw/limix/data/anno.nochr.txt",as.is=T)
testCombinations = NULL
#
nGenes = 50
startPos = 0
endOffset = 1000000000
sink("/vol/projects/wli/projects/genetics/sw/limix/data/chunks.txt")
for(chr in unique(gene_anno$chromosome)){
  #print(chr)
  annotationRel = gene_anno[which(gene_anno$chromosome==chr),]
  annotationRel = annotationRel[order(annotationRel$start,annotationRel$end),]
  ##First go through the list to fix genes to they all touch.
  annotationRel$start[1] = startPos
  for(i in 2:nrow(annotationRel)){
    if(i == nrow(annotationRel)){
      annotationRel$end[i] = annotationRel$end[i]+endOffset
    }
    #If "overlapping" than we don't need to do anything.
    if((annotationRel$start[i]>annotationRel$end[i-1])){
      #print(i)
      distance = (annotationRel$start[i]-annotationRel$end[i-1])/2
      annotationRel$start[i] = ceiling(annotationRel$start[i]-distance)
      annotationRel$end[i-1] = floor(annotationRel$end[i-1]+distance)
    }
  }
  chunks = seq(1, nrow(annotationRel),nGenes)
  #Need to add the last one as a separate entry.
  if(chunks[length(chunks)] < nrow(annotationRel)){
    chunks = c(chunks,(nrow(annotationRel)+1))
  }
  for(i in 1:(length(chunks)-1)){
    cat(paste(chr,":",annotationRel$start[chunks[i]],"-",annotationRel$end[(chunks[i+1]-1)],"\n",sep=""))
  }
}
sink()
Clone this wiki locally