Skip to content

Commit

Permalink
cleaning up, make more readable
Browse files Browse the repository at this point in the history
  • Loading branch information
ViktorHy committed Oct 4, 2019
1 parent e802303 commit 90757b4
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 41 deletions.
41 changes: 34 additions & 7 deletions bin/scoutloader.pl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/perl -w

use Backticks;
use strict;
my $directory = '/fs1/results/cron/scout';

Expand All @@ -8,8 +8,8 @@
while (my $file = readdir(DIR)) {

if ( $file =~ /\.yaml/) {
my $fullpath = $directory."/".$file;
scoutcommand($fullpath);

scoutcommand($directory,$file);
}

}
Expand All @@ -18,14 +18,41 @@


sub scoutcommand {
my $yaml_file = shift;
my ($directory, $file) = @_;

my $yaml_file = $directory."/".$file;
my $command = "ssh viktor\@cmdscout1.lund.skane.se 'scout load case $yaml_file'";
my $log = '/fs1/results/cron/scout/scout_upload.log';
my $errlog = '/fs1/results/cron/scout/scout_upload.errlog';
my $datestring = localtime();
open(LOG, '>>' , $log) or die $!;
print LOG "$datestring :: $yaml_file was loaded using: $command\n\n";
my $go = `$command`;


my $results = `$command`;
my $status = $results->success;

if ( $status ) {
print LOG "$datestring :: $yaml_file was loaded using: $command\n\n";
unlink $yaml_file;
}
else {
my $infile = 0;
print "HAPPENS\n";
open(ERRLOG, $errlog) or die $!;
while (<ERRLOG>) {
print $file,"\n";
if ($_ =~ /\/$file:/) {
$infile = 1;
}
}
close(ERRLOG);
open(ERRLOG, '>>' , $errlog) or die $!;
if ($infile == 0) {
print ERRLOG "$datestring :: $yaml_file: could not be loaded.\n";
}
close(ERRLOG);
}
close(LOG);
unlink $yaml_file;


}
134 changes: 100 additions & 34 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -236,19 +236,29 @@ process bqsr {
bam_neigh = commons.join(' -i ')

"""
sentieon driver -t ${task.cpus} -r $genome_file -i $bam_neigh $shard --algo QualCal -k $KNOWN1 -k $KNOWN2 ${shard_name}_${id}.bqsr.table
sentieon driver \\
-t ${task.cpus} \\
-r $genome_file \\
-i $bam_neigh $shard \\
--algo QualCal -k $KNOWN1 -k $KNOWN2 ${shard_name}_${id}.bqsr.table
"""
}

// Merge the bqrs shards
process merge_bqsr {
publishDir "${OUTDIR}/bam/wgs/bqsr_tables"

input:
set id, file(tables) from bqsr_table.groupTuple()

output:
set val(id), file("${id}_merged.bqsr.table") into bqsr_merged

"""
sentieon driver --passthru --algo QualCal --merge ${id}_merged.bqsr.table $tables
sentieon driver \\
--passthru \\
--algo QualCal \\
--merge ${id}_merged.bqsr.table $tables
"""
}
bqsr_merged
Expand Down Expand Up @@ -298,7 +308,13 @@ process bam_recal {
group = "bams"

"""
sentieon driver -t ${task.cpus} -r $genome_file -i $bam -q $table --algo QualCal -k $KNOWN1 -k $KNOWN2 ${id}_recal_post --algo ReadWriter ${id}_recal.bam
sentieon driver \\
-t ${task.cpus} \\
-r $genome_file \\
-i $bam \\
-q $table \\
--algo QualCal -k $KNOWN1 -k $KNOWN2 ${id}_recal_post \\
--algo ReadWriter ${id}_recal.bam
"""
}

Expand All @@ -319,19 +335,28 @@ merged_recal_dedup_bam.into{ mrdb1; mrdb2; mrdb3; }
// Do variant calling using DNAscope, sharded
process dnascope {
cpus 16

input:
set id, file(bams), file(bai), file(bqsr), val(shard_name), val(shard), val(one), val(two), val(three) from bam_shard_shard

output:
set id, file("${shard_name}_${id}.vcf"), file("${shard_name}_${id}.vcf.idx") into vcf_shard

script:
combo = [one, two, three]
combo = (combo - 0) //first dummy value
combo = (combo - (genomic_num_shards+1)) //last dummy value
commons = (combo.collect{ "${it}_${id}.bam" }) //add .bam to each shardie, remove all other bams
bam_neigh = commons.join(' -i ')
type = mode == "family" ? "--emit_mode GVCF" : ""

"""
/opt/sentieon-genomics-201711.05/bin/sentieon driver -t ${task.cpus} -r $genome_file -i $bam_neigh $shard -q $bqsr --algo DNAscope $type ${shard_name}_${id}.vcf
/opt/sentieon-genomics-201711.05/bin/sentieon driver \\
-t ${task.cpus} \\
-r $genome_file \\
-i $bam_neigh $shard \\
-q $bqsr \\
--algo DNAscope $type ${shard_name}_${id}.vcf
"""
}

Expand All @@ -349,7 +374,11 @@ process merge_vcf {
group = "vcfs"
vcfs_sorted = vcfs.sort(false) { a, b -> a.getBaseName().tokenize("_")[0] as Integer <=> b.getBaseName().tokenize("_")[0] as Integer } .join(' ')
"""
/opt/sentieon-genomics-201711.05/bin/sentieon driver -t ${task.cpus} --passthru --algo DNAscope --merge ${id}.dnascope.vcf $vcfs_sorted
/opt/sentieon-genomics-201711.05/bin/sentieon driver \\
-t ${task.cpus} \\
--passthru \\
--algo DNAscope \\
--merge ${id}.dnascope.vcf $vcfs_sorted
"""
}

Expand All @@ -372,15 +401,20 @@ process gvcf_combine {
// Om fler än en vcf, GVCF combine annars döp om och skickade vidare
if (mode == "family" ) {
ggvcfs = vcf.join(' -v ')

"""
sentieon driver -t ${task.cpus} -r $genome_file --algo GVCFtyper \\
-v $ggvcfs ${group}.combined.gvcf
sentieon driver \\
-t ${task.cpus} \\
-r $genome_file \\
--algo GVCFtyper \\
-v $ggvcfs ${group}.combined.gvcf
"""
}
// annars ensam vcf, skicka vidare
else {
ggvcf = vcf.join('')
gidx = idx.join('')

"""
mv ${ggvcf} ${group}.combined.gvcf
mv ${gidx} ${group}.combined.gvcf.idx
Expand All @@ -392,8 +426,10 @@ process gvcf_combine {
process create_ped {
input:
set group, id, sex, mother, father, phenotype, diagnosis from ped

output:
file("${group}.ped") into ped_ch

script:
if ( sex =~ /F/) {
sex = "2"
Expand All @@ -413,6 +449,7 @@ process create_ped {
if ( mother == "" ) {
mother = "0"
}

"""
echo "${group}\t${id}\t${father}\t${mother}\t${sex}\t${phenotype}" > ${group}.ped
"""
Expand All @@ -437,10 +474,15 @@ process madeline {
when:
mode == "family"

script:
"""
ped_parser -t ped $ped --to_madeline -o ${ped}.madeline
madeline2 -L "IndividualId" ${ped}.madeline -o ${ped}.madeline -x xml
ped_parser \\
-t ped $ped \\
--to_madeline \\
-o ${ped}.madeline
madeline2 \\
-L "IndividualId" ${ped}.madeline \\
-o ${ped}.madeline \\
-x xml
"""
}

Expand Down Expand Up @@ -469,9 +511,11 @@ process intersect {

process split_normalize {
cpus 16

input:
//set group, file(vcf) from vcf_temp
set group, file(vcf) from intersected_vcf

output:
set group, file("${group}.norm.DPAF.vcf") into split

Expand All @@ -487,68 +531,83 @@ process split_normalize {
process annotate_vep {
container = '/fs1/resources/containers/container_VEP.sif'
cpus 56

input:
set group, file(vcf) from split

output:
set group, file("${group}.vep.vcf") into vep

"""
vep \\
-i ${vcf} \\
-o ${group}.vep.vcf \\
--offline \\
--merged \\
--everything \\
--vcf \\
--no_stats \\
--fork ${task.cpus} \\
--force_overwrite \\
--plugin CADD,$CADD \\
--plugin LoFtool \\
--plugin MaxEntScan,$MAXENTSCAN,SWA,NCSS \\
--fasta $VEP_FASTA \\
--dir_cache $VEP_CACHE \\
--dir_plugins $VEP_CACHE/Plugins \\
--distance 200 \\
-cache \\
-custom $GNOMAD \\
-custom $GERP \\
-custom $PHYLOP \\
-custom $PHASTCONS
-i ${vcf} \\
-o ${group}.vep.vcf \\
--offline \\
--merged \\
--everything \\
--vcf \\
--no_stats \\
--fork ${task.cpus} \\
--force_overwrite \\
--plugin CADD,$CADD \\
--plugin LoFtool \\
--plugin MaxEntScan,$MAXENTSCAN,SWA,NCSS \\
--fasta $VEP_FASTA \\
--dir_cache $VEP_CACHE \\
--dir_plugins $VEP_CACHE/Plugins \\
--distance 200 \\
-cache \\
-custom $GNOMAD \\
-custom $GERP \\
-custom $PHYLOP \\
-custom $PHASTCONS
"""
}

// Annotating variants with SnpSift 2

process snp_sift {
cpus 16

input:
set group, file(vcf) from vep

output:
set group, file("${group}.clinvar.vcf") into snpsift

"""
SnpSift -Xmx60g annotate $CLINVAR -info CLNSIG,CLNACC,CLNREVSTAT $vcf > ${group}.clinvar.vcf
SnpSift -Xmx60g annotate $CLINVAR \\
-info CLNSIG,CLNACC,CLNREVSTAT $vcf > ${group}.clinvar.vcf
"""

}

// // Adding SweGen allele frequencies
process swegen_all {
cpus 16

input:
set group, file(vcf) from snpsift

output:
set group, file("${group}.swegen.vcf") into sweall

"""
SnpSift -Xmx60g annotate $SWEGEN -name swegen -info AF $vcf > ${group}.swegen.vcf
SnpSift -Xmx60g annotate $SWEGEN \\
-name swegen \\
-info AF $vcf > ${group}.swegen.vcf
"""
}
// Annotating variants with Genmod
process annotate_genmod {
cpus 16

input:
set group, file(vcf) from sweall

output:
set group, file("${group}.genmod.vcf") into genmod

"""
genmod annotate --spidex $SPIDEX --annotate_regions $vcf -o ${group}.genmod.vcf
"""
Expand All @@ -562,8 +621,10 @@ process inher_models {
input:
set group, file(vcf) from genmod
file(ped) from ped_inher

output:
set group, file("${group}.models.vcf") into inhermod

"""
genmod models $vcf -p ${task.cpus} -f $ped > ${group}.models.vcf
"""
Expand All @@ -575,14 +636,16 @@ process inher_models {
// Modifying CLNSIG field to allow it to be used by genmod score properly:
process modify_vcf {
cpus 16

input:
set group, file(vcf) from inhermod

output:
set group, file("${group}.mod.vcf") into mod_vcf

"""
/opt/bin/modify_vcf_nexomeflow.pl $vcf > ${group}.mod.vcf
"""
//lägg till cadd i info
}


Expand Down Expand Up @@ -693,11 +756,14 @@ vcf_done.into {
process peddy {
publishDir "${OUTDIR}/ped/wgs", mode: 'copy' , overwrite: 'true'
cpus 6

input:
file(ped) from ped_peddy
set group, file(vcf), file(idx) from vcf_done1

output:
set file("${group}.ped_check.csv"),file("${group}.background_pca.json"),file("${group}.peddy.ped"),file("${group}.html"), file("${group}.het_check.csv"), file("${group}.sex_check.csv"), file("${group}.vs.html") into peddy_files

"""
source activate peddy
python -m peddy -p ${task.cpus} $vcf $ped --prefix $group
Expand Down

0 comments on commit 90757b4

Please sign in to comment.