From 90757b48fa5a60933d35859392decf0f1b716c3f Mon Sep 17 00:00:00 2001
From: Viktor Henmyr <viktor_henmyr@hotmail.com>
Date: Fri, 4 Oct 2019 16:17:48 +0200
Subject: [PATCH] cleaning up, make more readable

---
 bin/scoutloader.pl |  41 +++++++++++---
 main.nf            | 134 +++++++++++++++++++++++++++++++++------------
 2 files changed, 134 insertions(+), 41 deletions(-)
diff --git a/bin/scoutloader.pl b/bin/scoutloader.pl
index 6d0fd421..e76ea94e 100755
--- a/bin/scoutloader.pl
+++ b/bin/scoutloader.pl
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-
+use Backticks;
 use strict;
 my $directory = '/fs1/results/cron/scout';
 
@@ -8,8 +8,8 @@
 while (my $file = readdir(DIR)) {
 
     if ( $file =~ /\.yaml/) {
-        my $fullpath = $directory."/".$file;
-        scoutcommand($fullpath);
+        
+        scoutcommand($directory,$file);
     }
 
 }
@@ -18,14 +18,41 @@
 
 
 sub scoutcommand {
-    my $yaml_file = shift;
+    my ($directory, $file) = @_;
+
+    my $yaml_file = $directory."/".$file;
     my $command = "ssh viktor\@cmdscout1.lund.skane.se 'scout load case $yaml_file'";
     my $log = '/fs1/results/cron/scout/scout_upload.log';
+    my $errlog = '/fs1/results/cron/scout/scout_upload.errlog';
     my $datestring = localtime();
     open(LOG, '>>' , $log) or die $!;
-    print LOG "$datestring :: $yaml_file was loaded using: $command\n\n";
-    my $go = `$command`;
+    
+    
+    my $results = `$command`;
+    my $status = $results->success;
+
+    if ( $status ) {
+        print LOG "$datestring :: $yaml_file was loaded using: $command\n\n";
+        unlink $yaml_file;
+    }
+    else {
+        my $infile = 0;
+        print "HAPPENS\n";
+        open(ERRLOG, $errlog) or die $!;
+        while (<ERRLOG>) {
+            print $file,"\n";
+            if ($_ =~ /\/$file:/) {
+                $infile = 1;
+            }
+        }
+        close(ERRLOG);
+        open(ERRLOG, '>>' , $errlog) or die $!;
+        if ($infile == 0) {
+            print ERRLOG "$datestring :: $yaml_file: could not be loaded.\n";
+        }
+        close(ERRLOG);
+    }
     close(LOG);
-    unlink $yaml_file;
+    
     
 }
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 81fa159d..b5da8743 100644
--- a/main.nf
+++ b/main.nf
@@ -236,19 +236,29 @@ process bqsr {
     bam_neigh = commons.join(' -i ')
 
     """
-    sentieon driver -t ${task.cpus} -r $genome_file -i $bam_neigh $shard --algo QualCal -k $KNOWN1 -k $KNOWN2 ${shard_name}_${id}.bqsr.table
+    sentieon driver \\
+        -t ${task.cpus} \\
+        -r $genome_file \\
+        -i $bam_neigh $shard \\
+        --algo QualCal -k $KNOWN1 -k $KNOWN2 ${shard_name}_${id}.bqsr.table
     """
 }
 
 // Merge the bqrs shards
 process merge_bqsr {
     publishDir "${OUTDIR}/bam/wgs/bqsr_tables"
+
     input:
     set id, file(tables) from bqsr_table.groupTuple()
+
     output:
     set val(id), file("${id}_merged.bqsr.table") into bqsr_merged
+
     """
-    sentieon driver --passthru --algo QualCal --merge ${id}_merged.bqsr.table $tables
+    sentieon driver \\
+        --passthru \\
+        --algo QualCal \\
+        --merge ${id}_merged.bqsr.table $tables
     """
 }
 bqsr_merged
@@ -298,7 +308,13 @@ process bam_recal {
     group = "bams"
 
     """
-    sentieon driver -t ${task.cpus} -r $genome_file -i $bam -q $table --algo QualCal -k $KNOWN1 -k $KNOWN2 ${id}_recal_post --algo ReadWriter ${id}_recal.bam
+    sentieon driver \\
+        -t ${task.cpus} \\
+        -r $genome_file \\
+        -i $bam \\
+        -q $table \\
+        --algo QualCal -k $KNOWN1 -k $KNOWN2 ${id}_recal_post \\
+        --algo ReadWriter ${id}_recal.bam
     """
 }
 
@@ -319,10 +335,13 @@ merged_recal_dedup_bam.into{ mrdb1; mrdb2; mrdb3; }
 // Do variant calling using DNAscope, sharded
 process dnascope {
     cpus 16
+
     input:
     set id, file(bams), file(bai), file(bqsr), val(shard_name), val(shard), val(one), val(two), val(three) from bam_shard_shard
+
     output:
     set id, file("${shard_name}_${id}.vcf"), file("${shard_name}_${id}.vcf.idx") into vcf_shard
+
     script:
     combo = [one, two, three]
     combo = (combo - 0) //first dummy value
@@ -330,8 +349,14 @@ process dnascope {
     commons = (combo.collect{ "${it}_${id}.bam" })   //add .bam to each shardie, remove all other bams
     bam_neigh = commons.join(' -i ')
     type = mode == "family" ? "--emit_mode GVCF" : ""
+
     """
-    /opt/sentieon-genomics-201711.05/bin/sentieon driver -t ${task.cpus} -r $genome_file -i $bam_neigh $shard -q $bqsr --algo DNAscope $type ${shard_name}_${id}.vcf
+    /opt/sentieon-genomics-201711.05/bin/sentieon driver \\
+        -t ${task.cpus} \\
+        -r $genome_file \\
+        -i $bam_neigh $shard \\
+        -q $bqsr \\
+        --algo DNAscope $type ${shard_name}_${id}.vcf
     """
 }
 
@@ -349,7 +374,11 @@ process merge_vcf {
     group = "vcfs"
     vcfs_sorted = vcfs.sort(false) { a, b -> a.getBaseName().tokenize("_")[0] as Integer <=> b.getBaseName().tokenize("_")[0] as Integer } .join(' ')
     """
-    /opt/sentieon-genomics-201711.05/bin/sentieon driver -t ${task.cpus} --passthru --algo DNAscope --merge ${id}.dnascope.vcf $vcfs_sorted
+    /opt/sentieon-genomics-201711.05/bin/sentieon driver \\
+        -t ${task.cpus} \\
+        --passthru \\
+        --algo DNAscope \\
+        --merge ${id}.dnascope.vcf $vcfs_sorted
     """
 }
 
@@ -372,15 +401,20 @@ process gvcf_combine {
     // Om fler än en vcf, GVCF combine annars döp om och skickade vidare
     if (mode == "family" ) {
         ggvcfs = vcf.join(' -v ')
+
         """
-        sentieon driver -t ${task.cpus} -r $genome_file --algo GVCFtyper \\
-        -v $ggvcfs ${group}.combined.gvcf
+        sentieon driver \\
+            -t ${task.cpus} \\
+            -r $genome_file \\
+            --algo GVCFtyper \\
+            -v $ggvcfs ${group}.combined.gvcf
         """
     }
     // annars ensam vcf, skicka vidare
     else {
         ggvcf = vcf.join('')
         gidx = idx.join('')
+
         """
         mv ${ggvcf} ${group}.combined.gvcf
         mv ${gidx} ${group}.combined.gvcf.idx
@@ -392,8 +426,10 @@ process gvcf_combine {
 process create_ped {
     input:
     set group, id, sex, mother, father, phenotype, diagnosis from ped
+
     output:
     file("${group}.ped") into ped_ch
+
     script:
     if ( sex =~ /F/) {
         sex = "2"
@@ -413,6 +449,7 @@ process create_ped {
     if ( mother == "" ) {
         mother = "0"
     }
+
     """
     echo "${group}\t${id}\t${father}\t${mother}\t${sex}\t${phenotype}" > ${group}.ped
     """
@@ -437,10 +474,15 @@ process madeline {
     when:
     mode == "family"
 
-    script:
     """
-    ped_parser -t ped $ped --to_madeline -o ${ped}.madeline
-    madeline2 -L "IndividualId" ${ped}.madeline -o ${ped}.madeline -x xml
+    ped_parser \\
+        -t ped $ped \\
+        --to_madeline \\
+        -o ${ped}.madeline
+    madeline2 \\
+        -L "IndividualId" ${ped}.madeline \\
+        -o ${ped}.madeline \\
+        -x xml
     """
 }
 
@@ -469,9 +511,11 @@ process intersect {
 
 process split_normalize {
     cpus 16
+
     input:
     //set group, file(vcf) from vcf_temp
     set group, file(vcf) from intersected_vcf
+
     output:
     set group, file("${group}.norm.DPAF.vcf") into split
     
@@ -487,33 +531,36 @@ process split_normalize {
 process annotate_vep {
     container = '/fs1/resources/containers/container_VEP.sif'
     cpus 56
+
     input:
     set group, file(vcf) from split
+
     output:
     set group, file("${group}.vep.vcf") into vep
+
     """
     vep \\
-    -i ${vcf} \\
-    -o ${group}.vep.vcf \\
-    --offline \\
-    --merged \\
-    --everything \\
-    --vcf \\
-    --no_stats \\
-    --fork ${task.cpus} \\
-    --force_overwrite \\
-    --plugin CADD,$CADD \\
-    --plugin LoFtool \\
-    --plugin MaxEntScan,$MAXENTSCAN,SWA,NCSS \\
-    --fasta $VEP_FASTA \\
-    --dir_cache $VEP_CACHE \\
-    --dir_plugins $VEP_CACHE/Plugins \\
-    --distance 200 \\
-    -cache \\
-    -custom $GNOMAD \\
-    -custom $GERP \\
-    -custom $PHYLOP \\
-    -custom $PHASTCONS
+        -i ${vcf} \\
+        -o ${group}.vep.vcf \\
+        --offline \\
+        --merged \\
+        --everything \\
+        --vcf \\
+        --no_stats \\
+        --fork ${task.cpus} \\
+        --force_overwrite \\
+        --plugin CADD,$CADD \\
+        --plugin LoFtool \\
+        --plugin MaxEntScan,$MAXENTSCAN,SWA,NCSS \\
+        --fasta $VEP_FASTA \\
+        --dir_cache $VEP_CACHE \\
+        --dir_plugins $VEP_CACHE/Plugins \\
+        --distance 200 \\
+        -cache \\
+        -custom $GNOMAD \\
+        -custom $GERP \\
+        -custom $PHYLOP \\
+        -custom $PHASTCONS
     """
 }
 
@@ -521,12 +568,16 @@ process annotate_vep {
 
 process snp_sift {
     cpus 16
+
     input:
     set group, file(vcf) from vep
+
     output:
     set group, file("${group}.clinvar.vcf") into snpsift
+
     """
-    SnpSift -Xmx60g annotate $CLINVAR -info CLNSIG,CLNACC,CLNREVSTAT $vcf > ${group}.clinvar.vcf
+    SnpSift -Xmx60g annotate $CLINVAR \\
+        -info CLNSIG,CLNACC,CLNREVSTAT $vcf > ${group}.clinvar.vcf
     """
 
 }
@@ -534,21 +585,29 @@ process snp_sift {
 // // Adding SweGen allele frequencies
 process swegen_all {
     cpus 16
+
     input:
     set group, file(vcf) from snpsift
+
     output:
     set group, file("${group}.swegen.vcf") into sweall
+
     """
-    SnpSift -Xmx60g annotate $SWEGEN -name swegen -info AF $vcf > ${group}.swegen.vcf
+    SnpSift -Xmx60g annotate $SWEGEN \\
+        -name swegen \\
+        -info AF $vcf > ${group}.swegen.vcf
     """
 }
 // Annotating variants with Genmod
 process annotate_genmod {
     cpus 16
+
     input:
     set group, file(vcf) from sweall
+
     output:
     set group, file("${group}.genmod.vcf") into genmod
+    
     """
     genmod annotate --spidex $SPIDEX --annotate_regions $vcf -o ${group}.genmod.vcf
     """
@@ -562,8 +621,10 @@ process inher_models {
     input:
     set group, file(vcf) from genmod
     file(ped) from ped_inher
+
     output:
     set group, file("${group}.models.vcf") into inhermod
+
     """
     genmod models $vcf -p ${task.cpus} -f $ped > ${group}.models.vcf
     """
@@ -575,14 +636,16 @@ process inher_models {
 // Modifying CLNSIG field to allow it to be used by genmod score properly:
 process modify_vcf {
     cpus 16
+
     input:
     set group, file(vcf) from inhermod
+
     output:
     set group, file("${group}.mod.vcf") into mod_vcf
+
     """
     /opt/bin/modify_vcf_nexomeflow.pl $vcf > ${group}.mod.vcf
     """
-    //lägg till cadd i info
 } 
 
 
@@ -693,11 +756,14 @@ vcf_done.into {
 process peddy {
     publishDir "${OUTDIR}/ped/wgs", mode: 'copy' , overwrite: 'true'
     cpus 6
+
     input:
     file(ped) from ped_peddy
     set group, file(vcf), file(idx) from vcf_done1
+
     output:
     set file("${group}.ped_check.csv"),file("${group}.background_pca.json"),file("${group}.peddy.ped"),file("${group}.html"), file("${group}.het_check.csv"), file("${group}.sex_check.csv"), file("${group}.vs.html") into peddy_files
+    
     """
     source activate peddy
     python -m peddy -p ${task.cpus} $vcf $ped --prefix $group