From bfb7494dc7073e02912157c2d5a31c1cddb7c9f7 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 21 May 2021 12:51:47 +0100 Subject: [PATCH 01/17] add missing change log --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 3e9c762..d43d14d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # CHANGES +## 3.5.0 + +* Update to core pindel algorithm to allow complex DI events to have longer inserted sequence than deleted + * Masking real events + ## 3.4.1 * Updated Dockerfile to use pcap-core 5.4.0 - htslib/samtools 1.11 From 4159902f4e5390bcf50af5bba92151ebfec4d99e Mon Sep 17 00:00:00 2001 From: tc20 Date: Wed, 10 Nov 2021 10:17:00 +0000 Subject: [PATCH 02/17] new FF019 rule and fragment filter flag tests, FF012 and FF015 incomplete testing --- .../FragmentFilterRules.pm | 37 + perl/t/vcfPindelFragmentFlagger.t | 698 ++++++++++++++++++ 2 files changed, 735 insertions(+) create mode 100644 perl/t/vcfPindelFragmentFlagger.t diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index 5b2fdec..3a8c5aa 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -87,6 +87,10 @@ my %RULE_DESCS = ('FF001' => { 'tag' =>'INFO/LEN', 'name' => 'FF018', 'desc' => 'Sufficient Depth: Pass if depth > 10', 'test' => \&flag_018}, + 'FF019' => { 'tag' => 'INFO/LEN', + 'name' => 'FF019', + 'desc' => 'Fail when tumour supporting fragments < 3 or tumour fraction of supporting fragments < 0.05', + 'test' => \&flag_019}, ); our $previous_format_hash; @@ -450,4 +454,37 @@ sub flag_018 { return $FAIL; } +sub flag_019 { + my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; + use_prev($$RECORD[8]); + + my @tum_geno = split(':',$$RECORD[10]); + if(($tum_geno[$previous_format_hash->{'FC'}] < 3) || + ($tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] < 0.05)){ + return $FAIL; + } + + return $PASS; +} + +# sub flag_020 { +# my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; +# use_prev($$RECORD[8]); + +# my @nor_geno = split(':',$$RECORD[9]); +# my @tum_geno = split(':',$$RECORD[10]); +# if(($nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}] < 200 && ( +# $nor_geno[$previous_format_hash->{'FC'}] <= 1 && +# $nor_geno[$previous_format_hash->{'FD'}] && +# $nor_geno[$previous_format_hash->{'FC'}] < ($tum_geno[$previous_format_hash->{'FC'}] * 0.1)) +# || +# ( + +# ))){ +# return $FAIL; +# } + +# return $PASS; +# } + 1; diff --git a/perl/t/vcfPindelFragmentFlagger.t b/perl/t/vcfPindelFragmentFlagger.t new file mode 100644 index 0000000..f5c2811 --- /dev/null +++ b/perl/t/vcfPindelFragmentFlagger.t @@ -0,0 +1,698 @@ +#################################################### +# Copyright (c) 2014-2021 Genome Research Ltd. +# Author: CASM/Cancer IT, cgphelp@sanger.ac.uk +# See LICENCE for details +#################################################### + +use strict; +use warnings; +use Cwd 'abs_path'; +use Test::More; +use Data::Dumper; +use Const::Fast qw(const); + +const my @AVAILABLE_RULES => qw(FF001 FF002 FF003 FF004 FF005 FF006 FF007 FF008 FF009 FF010 FF012 FF015 FF016 FF017 FF018 FF019); + +my %rule_test_dispatch = ('FF001' => \&_test_FF001, + 'FF002' => \&_test_FF002, + 'FF003' => \&_test_FF003, + 'FF004' => \&_test_FF004, + 'FF005' => \&_test_FF005, + 'FF006' => \&_test_FF006, + 'FF007' => \&_test_FF007, + 'FF008' => \&_test_FF008, + 'FF009' => \&_test_FF009, + 'FF010' => \&_test_FF010, + 'FF012' => \&_test_FF012, + 'FF015' => \&_test_FF015, + 'FF016' => \&_test_FF016, + 'FF017' => \&_test_FF017, + 'FF018' => \&_test_FF018, + 'FF019' => \&_test_FF019, + ); + +use_ok('Sanger::CGP::PindelPostProcessing::VcfSoftFlagger'); +use_ok('Sanger::CGP::PindelPostProcessing::FragmentFilterRules'); + +my @rules_found = Sanger::CGP::PindelPostProcessing::FragmentFilterRules::available_rules(); +is_deeply(\@rules_found, \@AVAILABLE_RULES, 'Expected set of rules are implemented'); +for my $flag(@AVAILABLE_RULES) { + $rule_test_dispatch{$flag}(Sanger::CGP::PindelPostProcessing::FragmentFilterRules->rule($flag)); +} + +done_testing(); + +sub _test_FF001{ + my ($filter_hash) = @_; + subtest "Test rule FF001" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 5 5'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 6 4'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 4 6'; + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF001 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF001 pWt == pMt"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF001 pWt > pMt"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF001 pWt < pMt"); + }; +} + + +sub _test_FF002{ + my ($filter_hash) = @_; + subtest "Test rule FF002" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=18 FC 1 0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=18 FC 0 0'; + + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=18 FC 1 0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=18 FC 0 0'; + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF002 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF002 length == 4 pWt == 1"); + $RECORD = [split("\t",$test2)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF002 length == 4 pWt == 0"); + + $RECORD = [split("\t",$test3)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF002 length == 5 pWt == 1"); + $RECORD = [split("\t",$test4)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF002 length == 5 pWt == 0"); + } +} + +sub _test_FF003{ + my ($filter_hash) = @_; + subtest "Test rule FF003" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 3:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:3'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 4:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:4'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 3:4'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 2:2'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 2:1'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 1:2'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 2:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:2'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:0'; + + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF003 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 3 pMtNeg == 0"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 0 pMtNeg == 3"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 4 pMtNeg == 0"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 0 pMtNeg == 4"); + + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 3 pMtNeg == 4"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 2 pMtNeg == 2"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 2 pMtNeg == 1"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 1 pMtNeg == 2"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 2 pMtNeg == 0"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 0 pMtNeg == 2"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 0 pMtNeg == 0"); + }; +} + + +sub _test_FF004{ + my ($filter_hash) = @_; + subtest "Test rule FF004" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 9:0:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 10:0:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 199:0:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:0:0'; + + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:2:3'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:2:2'; + + + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:100:4:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:8:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:7:0'; + + + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:0:4'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:100:0:8'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:100:0:7'; + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF004 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + }; +} + +sub _test_FF005{ + my ($filter_hash) = @_; + subtest "Test rule FF005" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 199:0:1:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:199:1:0'; + + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:4:4'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:4:4'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 199:1:4:4'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 1:199:4:4'; + + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:1:1'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:3:4'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:4:4'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:4:5'; + + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:1:0'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:7:0'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:8:0'; + + my $test14 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:0:1'; + my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:0:7'; + my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:0:8'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF005 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth < 200"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth < 200 with different pos/neg ratios"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 with different pos/neg ratios"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 with different pos/neg ratios"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depth > 200 pMtPos == 1 pMtNeg == 1"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depth > 200 pMtPos == 4 pMtNeg == 3"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 pMtPos == 4 pMtNeg == 4"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 pMtPos == 4 pMtNeg == 5"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthNeg > 200 pMtPos == 1"); + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depthPos > 200 pMtPos == 7"); + $RECORD = [split("\t",$test13)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthPos > 200 pMtPos == 8"); + $RECORD = [split("\t",$test14)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthPos > 200 pMtNeg == 1"); + $RECORD = [split("\t",$test15)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depthNeg > 200 pMtNeg == 7"); + $RECORD = [split("\t",$test16)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthNeg > 200 pMtNeg == 8"); + }; +} + +sub _test_FF006{ + my ($filter_hash) = @_; + subtest "Test rule FF006" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=9 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=10 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=9 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=10 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=11 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF006 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 4 rep == 9"); + $RECORD = [split("\t",$test2)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF006 length == 4 rep == 10"); + $RECORD = [split("\t",$test3)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 4 rep == 0"); + $RECORD = [split("\t",$test7)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF006 length == 4 rep == 11"); + $RECORD = [split("\t",$test4)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 5 rep == 9"); + $RECORD = [split("\t",$test5)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 5 rep == 10"); + $RECORD = [split("\t",$test6)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 5 rep == 0"); + }; +} + +sub _test_FF007{ + my ($filter_hash) = @_; + subtest "Test rule FF007" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 0 5'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 1 5'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 0 6'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 1 6'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 7 100'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 8 100'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 9 100'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF007 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt < 6"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt < 6 and rdWt > 8pc"); + + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt == 6 and rdWt < 8pc"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF007 rdMt == 6 and rdWt > 8pc"); + + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt == 100 and rdWt == 7"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF007 rdMt == 100 and rdWt == 8"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF007 rdMt == 100 and rdWt > 8"); + }; +} + +sub _test_FF008{ + my ($filter_hash) = @_; + subtest "Test rule FF008" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 6 100'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 5 100'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 4 100'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 0 100'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/REP',"_test_FF008 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF008 pWt > pMt5%"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF008 pWt == pMt5%"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF008 pWt < pMt5% "); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF008 pWt == 0 "); + }; +} + +sub _test_FF009{ + #fail; #is coding filter +} + +sub _test_FF010{ + #fail; #normal panel filter +} + +sub _test_FF011{ + #fail; #microsatelite filter +} + +sub _test_FF012{ + my ($filter_hash) = @_; + subtest "Test rule FF0012" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:9:0:0:0:0:0 2:0:0:0:9:0:0:0:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:0:9:0:0:0:0 2:0:0:0:0:9:0:0:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:0:10:0:0:0:0 2:0:0:0:10:0:0:0:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:10:0:0:0:0:0 2:0:0:0:0:10:0:0:0:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 3:0:0:0:0:11:0:0:0:0 3:0:0:0:11:0:0:0:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:0:10:0:0:0:0 2:0:0:0:9:0:0:0:0:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 20:0:0:0:50:50:0:0:0:0 20:0:0:0:50:50:0:0:0:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 10:10:0:0:50:50:0:0:0:0 10:10:0:0:50:50:0:0:0:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 19:0:0:0:50:50:0:0:0:0 19:0:0:0:50:50:0:0:0:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 18:1:0:0:50:50:0:0:0:0 18:1:0:0:50:50:0:0:0:0'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 20:0:0:0:50:50:0:0:0:0 19:0:0:0:50:50:0:0:0:0'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 19:0:0:0:50:50:0:0:0:0 20:0:0:0:50:50:0:0:0:0'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 21:0:0:0:50:50:0:0:0:0 21:0:0:0:50:50:0:0:0:0'; + my $test14 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:20:0:50:50:0:0:0:0 0:0:20:0:50:50:0:0:0:0'; + my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:10:10:50:50:0:0:0:0 0:0:10:10:50:50:0:0:0:0'; + my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:19:0:50:50:0:0:0:0 0:0:19:0:50:50:0:0:0:0'; + my $test17 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:18:1:50:50:0:0:0:0 0:0:18:1:50:50:0:0:0:0'; + my $test18 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:20:0:50:50:0:0:0:0 0:0:19:0:50:50:0:0:0:0'; + my $test19 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:19:0:50:50:0:0:0:0 0:0:20:0:50:50:0:0:0:0'; + my $test20 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:21:0:50:50:0:0:0:0 0:0:21:0:50:50:0:0:0:0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $MATCH = 1; + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF012 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 9 dMt == 9"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 10 dMt == 10"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt > 10 dMt == 10"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt > 10 dMt == 9"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 pWt == 20"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 pWt == 19"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 pWt == 20 pMt == 19"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 pWt == 19 pMt == 20"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 pWt == 21 pMt == 21"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 bWt == 20"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 bWt == 19"); + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 bWt == 20 bMt == 19"); + $RECORD = [split("\t",$test13)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 bWt == 19 bMt == 20"); + $RECORD = [split("\t",$test14)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 bWt == 21 bMt == 21"); + }; +} + +sub _test_FF015{ + my ($filter_hash) = @_; + subtest "Test rule FF015" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:1:0 0:0:0:0:0:0:0:0:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:1 0:0:0:0:0:0:0:0:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:1:1 0:0:0:0:0:0:0:0:0:0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF015 check the correct info tag has been set for the rule"); + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF0015 no wild type at all"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtPos == 1"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtNeg == 1"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtPos == 1 bWtPos == 1"); + }; +} + +sub _test_FF016{ + my ($filter_hash) = @_; + subtest "Test rule FF016" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 2:3:1:0:0:0:0:0:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 5:0:1:0:0:0:0:0:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:5:1:0:0:0:0:0:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 6:0:1:0:0:0:0:0:0:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 4:0:0:0:0:0:0:0:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:4:0:0:0:0:0:0:0:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 5:1:1:0:0:0:0:0:0:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:5:0:0:0:0:0:0:0:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 4:1:0:0:0:0:0:0:0:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:5:0:0:0:0:0:0:0:0'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 4:1:0:0:0:0:0:0:0:0'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; + + my $test14 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; + my $test15 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/REP',"_test_FF016 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 with different pos/neg ratios"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 with different pos/neg ratios"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 6"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 4"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 4 with different pos/neg ratios"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0"); + + $MATCH = 1; + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 1"); + $MATCH = 1; + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0 rep == 1 pMtPos > 1 pMtPos == 0"); + $MATCH = 1; + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0 rep == 1 pMtPos == 0 pMtNeg > 1"); + + $MATCH = 2; + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 2: pindel calls in only one direction"); + $MATCH = 2; + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 2: pindel calls in both directions"); + $MATCH = 2; + $RECORD = [split("\t",$test13)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 2: pindel calls in both direction"); + + $MATCH = 0; + $RECORD = [split("\t",$test14)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0 rep == 0: insertion pindel calls in both directions"); + $MATCH = 1; + $RECORD = [split("\t",$test15)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 1: insertion pindel calls in both directions"); + }; +} + +sub _test_FF017{ + #fail; #simple repeat filter +} + +sub _test_FF018{ + my ($filter_hash) = @_; + subtest "Test rule FF018" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:5:5:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:6:4:0:0 0:0:0:0:0:0:5:5:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:6:4:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:4:0:0 0:0:0:0:0:0:5:5:0:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:5:4:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:6:0:0 0:0:0:0:0:0:5:5:0:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:5:6:0:0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF018 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 10"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 10 with different pos/neg ratios"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 10 with different pos/neg ratios"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF014 pWt == 10 pMt == 9"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF014 pWt == 9 pMt == 10"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 11 pMt == 10"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 11"); + }; +} + +sub _test_FF019{ + my ($filter_hash) = @_; + subtest "Test rule FF019" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 5:20:0:0:0:0:0:0:5:5:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 3:60:0:0:0:0:0:0:5:5:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 3:59:0:0:0:0:0:0:5:5:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 1:20:0:0:0:0:0:0:5:5:0:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 3:100:0:0:0:0:0:0:5:5:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 3:59:0:0:0:0:0:0:5:5:0:0 3:60:0:0:0:0:0:0:5:5:0:0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF019 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 5 pMtFD == 20 pMtFC/pMtFD > 0.05"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 3 pMtFD == 60 pMtFC/pMtFD == 0.05"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 3 pMtFD == 59 pMtFC/pMtFD > 0.05"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF019 pWtFC == 0 pMtFC == 1 pMtFD == 20 pMtFC/pMtFD == 0.05"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF019 pWtFC == 0 pMtFC == 3 pMtFD == 61 pMtFC/pMtFC < 0.05"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC / PWtFD < 0.05 pMtFC / pMtFD == 0.05"); + }; +} + +# sub _test_FF020{ +# my ($filter_hash) = @_; +# subtest "Test rule FF019" => sub { +# my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 1:20:0:0:0:0:0:0:5:5:0:0'; + +# my $sub = $filter_hash->{test}; + +# my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); +# $CHROM = [split("\t",$test1)]->[0]; +# $POS = [split("\t",$test1)]->[1]; +# $FAIL = 1; +# $PASS = 0; +# #$VCF = $$opts{vcf}; + +# is($filter_hash->{tag}, 'INFO/LEN',"_test_FF019 check the correct info tag has been set for the rule"); + +# $RECORD = [split("\t",$test1)]; +# is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 5 pMtFD == 20 pMtFC/pMtFD > 0.05"); +# }; +# } \ No newline at end of file From 2859dfc3e7b3554ae91c328938b41defbc20e3ca Mon Sep 17 00:00:00 2001 From: tc20 Date: Thu, 11 Nov 2021 10:33:38 +0000 Subject: [PATCH 03/17] FF020 added and all tests updated in vcfPindelFragmentFlagger.t --- .../FragmentFilterRules.pm | 58 +++-- perl/t/vcfPindelFragmentFlagger.t | 215 ++++++++++-------- 2 files changed, 159 insertions(+), 114 deletions(-) diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index 3a8c5aa..bcdd789 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -91,6 +91,10 @@ my %RULE_DESCS = ('FF001' => { 'tag' =>'INFO/LEN', 'name' => 'FF019', 'desc' => 'Fail when tumour supporting fragments < 3 or tumour fraction of supporting fragments < 0.05', 'test' => \&flag_019}, + 'FF020' => { 'tag' => 'INFO/LEN', + 'name' => 'FF020', + 'desc' => 'Allow some contamination in matched normal due to FFPR block acquired samples and allow for low level sequencing/PCR artefacts', + 'test' => \&flag_020}, ); our $previous_format_hash; @@ -467,24 +471,40 @@ sub flag_019 { return $PASS; } -# sub flag_020 { -# my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; -# use_prev($$RECORD[8]); - -# my @nor_geno = split(':',$$RECORD[9]); -# my @tum_geno = split(':',$$RECORD[10]); -# if(($nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}] < 200 && ( -# $nor_geno[$previous_format_hash->{'FC'}] <= 1 && -# $nor_geno[$previous_format_hash->{'FD'}] && -# $nor_geno[$previous_format_hash->{'FC'}] < ($tum_geno[$previous_format_hash->{'FC'}] * 0.1)) -# || -# ( - -# ))){ -# return $FAIL; -# } - -# return $PASS; -# } +sub flag_020 { + my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; + use_prev($$RECORD[8]); + + my @nor_geno = split(':',$$RECORD[9]); + my @tum_geno = split(':',$$RECORD[10]); + + my $fd_total = $nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}]; + my $fc_total = $nor_geno[$previous_format_hash->{'FC'}] + $tum_geno[$previous_format_hash->{'FC'}]; + + my $norfc_over_norfd = $nor_geno[$previous_format_hash->{'FC'}] / $nor_geno[$previous_format_hash->{'FD'}]; + my $tumfc_over_tumfd = $tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}]; + + if( + ($fd_total < 200 && ( + $nor_geno[$previous_format_hash->{'FC'}] <= 1 && + $nor_geno[$previous_format_hash->{'FD'}] >= 10 && + $nor_geno[$previous_format_hash->{'FC'}] < ($tum_geno[$previous_format_hash->{'FC'}] * 0.1) + )) || ( + ($nor_geno[$previous_format_hash->{'FC'}] == 1 || $nor_geno[$previous_format_hash->{'FC'}] == 2) && + $norfc_over_norfd <= 0.05 && + $tumfc_over_tumfd >= 0.2 + )){ + return $FAIL; + } + + if(($fd_total >= 200 && + $norfc_over_norfd > 0.02 && + $fc_total / $fd_total < 0.2 + )){ + return $FAIL; + } + + return $PASS; +} 1; diff --git a/perl/t/vcfPindelFragmentFlagger.t b/perl/t/vcfPindelFragmentFlagger.t index f5c2811..f595a9c 100644 --- a/perl/t/vcfPindelFragmentFlagger.t +++ b/perl/t/vcfPindelFragmentFlagger.t @@ -11,7 +11,7 @@ use Test::More; use Data::Dumper; use Const::Fast qw(const); -const my @AVAILABLE_RULES => qw(FF001 FF002 FF003 FF004 FF005 FF006 FF007 FF008 FF009 FF010 FF012 FF015 FF016 FF017 FF018 FF019); +const my @AVAILABLE_RULES => qw(FF001 FF002 FF003 FF004 FF005 FF006 FF007 FF008 FF009 FF010 FF012 FF015 FF016 FF017 FF018 FF019 FF020); my %rule_test_dispatch = ('FF001' => \&_test_FF001, 'FF002' => \&_test_FF002, @@ -29,6 +29,7 @@ my %rule_test_dispatch = ('FF001' => \&_test_FF001, 'FF017' => \&_test_FF017, 'FF018' => \&_test_FF018, 'FF019' => \&_test_FF019, + 'FF020' => \&_test_FF020, ); use_ok('Sanger::CGP::PindelPostProcessing::VcfSoftFlagger'); @@ -110,7 +111,7 @@ sub _test_FF003{ my ($filter_hash) = @_; subtest "Test rule FF003" => sub { my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 3:0'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:3'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:3'; my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 4:0'; my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:4'; my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 3:4'; @@ -428,26 +429,17 @@ sub _test_FF011{ sub _test_FF012{ my ($filter_hash) = @_; subtest "Test rule FF0012" => sub { - my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:9:0:0:0:0:0 2:0:0:0:9:0:0:0:0:0'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:0:9:0:0:0:0 2:0:0:0:0:9:0:0:0:0'; - my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:0:10:0:0:0:0 2:0:0:0:10:0:0:0:0:0'; - my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:10:0:0:0:0:0 2:0:0:0:0:10:0:0:0:0'; - my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 3:0:0:0:0:11:0:0:0:0 3:0:0:0:11:0:0:0:0:0'; - my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 2:0:0:0:0:10:0:0:0:0 2:0:0:0:9:0:0:0:0:0'; - my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 20:0:0:0:50:50:0:0:0:0 20:0:0:0:50:50:0:0:0:0'; - my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 10:10:0:0:50:50:0:0:0:0 10:10:0:0:50:50:0:0:0:0'; - my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 19:0:0:0:50:50:0:0:0:0 19:0:0:0:50:50:0:0:0:0'; - my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 18:1:0:0:50:50:0:0:0:0 18:1:0:0:50:50:0:0:0:0'; - my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 20:0:0:0:50:50:0:0:0:0 19:0:0:0:50:50:0:0:0:0'; - my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 19:0:0:0:50:50:0:0:0:0 20:0:0:0:50:50:0:0:0:0'; - my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 21:0:0:0:50:50:0:0:0:0 21:0:0:0:50:50:0:0:0:0'; - my $test14 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:20:0:50:50:0:0:0:0 0:0:20:0:50:50:0:0:0:0'; - my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:10:10:50:50:0:0:0:0 0:0:10:10:50:50:0:0:0:0'; - my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:19:0:50:50:0:0:0:0 0:0:19:0:50:50:0:0:0:0'; - my $test17 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:18:1:50:50:0:0:0:0 0:0:18:1:50:50:0:0:0:0'; - my $test18 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:20:0:50:50:0:0:0:0 0:0:19:0:50:50:0:0:0:0'; - my $test19 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:19:0:50:50:0:0:0:0 0:0:20:0:50:50:0:0:0:0'; - my $test20 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:21:0:50:50:0:0:0:0 0:0:21:0:50:50:0:0:0:0'; + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:9:0 0:0:9:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:0:9 0:0:0:9'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:10:0 0:0:10:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:0:10 0:0:0:10'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:9:0 0:0:10:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:10:0 0:0:9:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 2:10:10:0 2:10:10:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 1:10:10:0 1:10:10:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 3:10:10:0 3:10:10:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 2:10:10:0 1:10:10:0'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 1:10:10:0 2:10:10:0'; my $sub = $filter_hash->{test}; @@ -459,46 +451,38 @@ sub _test_FF012{ $PASS = 0; #$VCF = $$opts{vcf}; - is($filter_hash->{tag}, 'INFO/LEN',"_test_FF012 check the correct info tag has been set for the rule"); + is($filter_hash->{tag}, 'INFO/LEN',"_test_F012 check the correct info tag has been set for the rule"); $RECORD = [split("\t",$test1)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 9 dMt == 9"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 9 dMt == 9"); $RECORD = [split("\t",$test2)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 10 dMt == 10"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 9 dMt == 9 with different pos/neg ratios"); $RECORD = [split("\t",$test3)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt > 10 dMt == 10"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 dWt == 10 dMt == 10"); $RECORD = [split("\t",$test4)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt > 10 dMt == 9"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 dWt == 10 dMt == 10 with different pos/neg ratios"); $RECORD = [split("\t",$test5)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 pWt == 20"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 9 dMt == 10"); $RECORD = [split("\t",$test6)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 pWt == 19"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 10 dMt == 9"); $RECORD = [split("\t",$test7)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 pWt == 20 pMt == 19"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 WtFC == 2 WtFD == 10 MtFC == 2 MtFD == 10"); $RECORD = [split("\t",$test8)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 pWt == 19 pMt == 20"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 WtFC == 1 WtFD == 10 MtFC == 1 MtFD == 10"); $RECORD = [split("\t",$test9)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 pWt == 21 pMt == 21"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 WtFC == 3 WtFD == 10 MtFC == 3 MtFD == 10"); $RECORD = [split("\t",$test10)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 bWt == 20"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 WtFC == 2 WtFD == 10 MtFC == 1 MtFD == 10"); $RECORD = [split("\t",$test11)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 bWt == 19"); - $RECORD = [split("\t",$test12)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 bWt == 20 bMt == 19"); - $RECORD = [split("\t",$test13)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF012 dWt == 100 bWt == 19 bMt == 20"); - $RECORD = [split("\t",$test14)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF012 dWt == 100 bWt == 21 bMt == 21"); - }; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 WtFC == 1 WtFD == 10 MtFC == 2 MtFD == 10"); + } } sub _test_FF015{ my ($filter_hash) = @_; subtest "Test rule FF015" => sub { - my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:1:0 0:0:0:0:0:0:0:0:0:0'; - my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:1 0:0:0:0:0:0:0:0:0:0'; - my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:1:1 0:0:0:0:0:0:0:0:0:0'; + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 0 0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 1 0'; my $sub = $filter_hash->{test}; @@ -513,33 +497,29 @@ sub _test_FF015{ $RECORD = [split("\t",$test1)]; is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF0015 no wild type at all"); $RECORD = [split("\t",$test2)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtPos == 1"); - $RECORD = [split("\t",$test3)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtNeg == 1"); - $RECORD = [split("\t",$test4)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtPos == 1 bWtPos == 1"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtFC == 1"); }; } sub _test_FF016{ my ($filter_hash) = @_; subtest "Test rule FF016" => sub { - my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 2:3:1:0:0:0:0:0:0:0'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 5:0:1:0:0:0:0:0:0:0'; - my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:5:1:0:0:0:0:0:0:0'; - my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 6:0:1:0:0:0:0:0:0:0'; - my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 4:0:0:0:0:0:0:0:0:0'; - my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:4:0:0:0:0:0:0:0:0'; - my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 5:1:1:0:0:0:0:0:0:0'; - my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:5:0:0:0:0:0:0:0:0'; - my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 4:1:0:0:0:0:0:0:0:0'; - my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; - my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:5:0:0:0:0:0:0:0:0'; - my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 4:1:0:0:0:0:0:0:0:0'; - my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; - - my $test14 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; - my $test15 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 1:4:0:0:0:0:0:0:0:0'; + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 2:3:1:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 5:0:1:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 0:5:1:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 6:0:1:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 4:0:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 0:4:0:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 5:1:1:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 0:5:0:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 4:1:0:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB 0:0:0:0 0:5:0:0'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB 0:0:0:0 4:1:0:0'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; + + my $test14 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; + my $test15 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; my $sub = $filter_hash->{test}; @@ -604,13 +584,13 @@ sub _test_FF017{ sub _test_FF018{ my ($filter_hash) = @_; subtest "Test rule FF018" => sub { - my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:5:5:0:0'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:6:4:0:0 0:0:0:0:0:0:5:5:0:0'; - my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:6:4:0:0'; - my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:4:0:0 0:0:0:0:0:0:5:5:0:0'; - my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:5:4:0:0'; - my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:6:0:0 0:0:0:0:0:0:5:5:0:0'; - my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:5:5:0:0 0:0:0:0:0:0:5:6:0:0'; + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 5:5'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 6:4 5:5'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 6:4'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:4 5:5'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 5:4'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:6 5:5'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 5:6'; my $sub = $filter_hash->{test}; @@ -643,12 +623,12 @@ sub _test_FF018{ sub _test_FF019{ my ($filter_hash) = @_; subtest "Test rule FF019" => sub { - my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 5:20:0:0:0:0:0:0:5:5:0:0'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 3:60:0:0:0:0:0:0:5:5:0:0'; - my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 3:59:0:0:0:0:0:0:5:5:0:0'; - my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 1:20:0:0:0:0:0:0:5:5:0:0'; - my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 3:100:0:0:0:0:0:0:5:5:0:0'; - my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 3:59:0:0:0:0:0:0:5:5:0:0 3:60:0:0:0:0:0:0:5:5:0:0'; + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 5:20'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 3:60'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 3:59'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 1:20'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 3:100'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:59 3:60'; my $sub = $filter_hash->{test}; @@ -676,23 +656,68 @@ sub _test_FF019{ }; } -# sub _test_FF020{ -# my ($filter_hash) = @_; -# subtest "Test rule FF019" => sub { -# my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:5:5:0:0 1:20:0:0:0:0:0:0:5:5:0:0'; +sub _test_FF020{ + my ($filter_hash) = @_; + subtest "Test rule FF020" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 20:100 0:100'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:100 0:100'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 20:100 20:100'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:10 20:99'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:10 20:101'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 20:111'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:9 20:91'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:40 20:100'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:40 19:100'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:41 20:100'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:41 20:99'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:40 20:99'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:39 20:99'; + my $test14 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 9:111'; + my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 10:111'; + my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:10 10:100'; -# my $sub = $filter_hash->{test}; + my $sub = $filter_hash->{test}; -# my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); -# $CHROM = [split("\t",$test1)]->[0]; -# $POS = [split("\t",$test1)]->[1]; -# $FAIL = 1; -# $PASS = 0; -# #$VCF = $$opts{vcf}; + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; -# is($filter_hash->{tag}, 'INFO/LEN',"_test_FF019 check the correct info tag has been set for the rule"); + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF019 check the correct info tag has been set for the rule"); -# $RECORD = [split("\t",$test1)]; -# is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 5 pMtFD == 20 pMtFC/pMtFD > 0.05"); -# }; -# } \ No newline at end of file + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD == 200 WtFC / WtFD > 0.02 totalFC / totalFD < 0.2"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD == 200 WtFC / WtFD < 0.02 totalFC"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD == 200 WtFC / WtFD > 0.02 totalFC totalFC / totalFD > 0.2"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 10 WtFC < MtFC * 0.1);"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 0 WtFD == 10 WtFC < MtFC * 0.1);"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 11 WtFC < MtFC * 0.1);"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 9 WtFC / WtFD > 0.05);"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD == 0.2);"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD < 0.2);"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD == 0.2);"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD > 0.2);"); + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD > 0.2);"); + $RECORD = [split("\t",$test13)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD > 0.05 MtFC / MtFD > 0.2);"); + $RECORD = [split("\t",$test14)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 11 WtFC > MtFC * 0.1);"); + $RECORD = [split("\t",$test15)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 11 WtFC == MtFC * 0.1);"); + $RECORD = [split("\t",$test16)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 3"); + }; +} \ No newline at end of file From 79c0378fa186e8e8479f6e5892be7540bac1d90f Mon Sep 17 00:00:00 2001 From: tc20 Date: Wed, 17 Nov 2021 10:31:33 +0000 Subject: [PATCH 04/17] caught divide by 0 situations in FF019 and FF020 --- .../FragmentFilterRules.pm | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index bcdd789..6386310 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -133,6 +133,18 @@ sub reuse_repeats_tabix { } } +sub division { + my ($numerator, $denominator) = @_; + if (( + $numerator > 0 + )){ + my $decimal = $numerator / $denominator + return $decimal; + }else{ + return 0; + } +} + sub flag_001 { my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; use_prev($$RECORD[8]); @@ -462,9 +474,11 @@ sub flag_019 { my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; use_prev($$RECORD[8]); + my $tumfc_over_tumfd = $division($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]) + my @tum_geno = split(':',$$RECORD[10]); if(($tum_geno[$previous_format_hash->{'FC'}] < 3) || - ($tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] < 0.05)){ + ($tumfc_over_tumfd < 0.05)){ return $FAIL; } @@ -480,9 +494,11 @@ sub flag_020 { my $fd_total = $nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}]; my $fc_total = $nor_geno[$previous_format_hash->{'FC'}] + $tum_geno[$previous_format_hash->{'FC'}]; + my $total_div = $division($fc_total, $fd_total); + + my $norfc_over_norfd = $division($nor_geno[$previous_format_hash->{'FC'}], $nor_geno[$previous_format_hash->{'FD'}]); - my $norfc_over_norfd = $nor_geno[$previous_format_hash->{'FC'}] / $nor_geno[$previous_format_hash->{'FD'}]; - my $tumfc_over_tumfd = $tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}]; + my $tumfc_over_tumfd = $division($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]); if( ($fd_total < 200 && ( @@ -499,7 +515,7 @@ sub flag_020 { if(($fd_total >= 200 && $norfc_over_norfd > 0.02 && - $fc_total / $fd_total < 0.2 + $total_div < 0.2 )){ return $FAIL; } From 95f4522af826317cb01b225aa84eb98b23c0aa7f Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Wed, 17 Nov 2021 15:18:36 +0000 Subject: [PATCH 05/17] fixed errors with divide, FF019 and FF020 subroutines --- .../FragmentFilterRules.pm | 34 ++++++++----------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index 6386310..d9adafb 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -133,16 +133,13 @@ sub reuse_repeats_tabix { } } -sub division { - my ($numerator, $denominator) = @_; - if (( - $numerator > 0 - )){ - my $decimal = $numerator / $denominator - return $decimal; - }else{ - return 0; +sub divide { + my ($num,$den) = @_; + my $decimal = 0; + if ($num > 0){ + $decimal = $num / $den; } + $decimal; } sub flag_001 { @@ -474,9 +471,9 @@ sub flag_019 { my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; use_prev($$RECORD[8]); - my $tumfc_over_tumfd = $division($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]) - my @tum_geno = split(':',$$RECORD[10]); + my $tumfc_over_tumfd = ÷($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]); + if(($tum_geno[$previous_format_hash->{'FC'}] < 3) || ($tumfc_over_tumfd < 0.05)){ return $FAIL; @@ -494,11 +491,11 @@ sub flag_020 { my $fd_total = $nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}]; my $fc_total = $nor_geno[$previous_format_hash->{'FC'}] + $tum_geno[$previous_format_hash->{'FC'}]; - my $total_div = $division($fc_total, $fd_total); + my $total_div = ÷($fc_total, $fd_total); - my $norfc_over_norfd = $division($nor_geno[$previous_format_hash->{'FC'}], $nor_geno[$previous_format_hash->{'FD'}]); + my $norfc_over_norfd = ÷($nor_geno[$previous_format_hash->{'FC'}], $nor_geno[$previous_format_hash->{'FD'}]); - my $tumfc_over_tumfd = $division($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]); + my $tumfc_over_tumfd = ÷($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]); if( ($fd_total < 200 && ( @@ -506,17 +503,14 @@ sub flag_020 { $nor_geno[$previous_format_hash->{'FD'}] >= 10 && $nor_geno[$previous_format_hash->{'FC'}] < ($tum_geno[$previous_format_hash->{'FC'}] * 0.1) )) || ( - ($nor_geno[$previous_format_hash->{'FC'}] == 1 || $nor_geno[$previous_format_hash->{'FC'}] == 2) && $norfc_over_norfd <= 0.05 && - $tumfc_over_tumfd >= 0.2 + $tumfc_over_tumfd >= 0.2 && + ($nor_geno[$previous_format_hash->{'FC'}] == 1 || $nor_geno[$previous_format_hash->{'FC'}] == 2) )){ return $FAIL; } - if(($fd_total >= 200 && - $norfc_over_norfd > 0.02 && - $total_div < 0.2 - )){ + if($fd_total >= 200 && $norfc_over_norfd > 0.02 && $total_div < 0.2){ return $FAIL; } From d90947dacd741f661426edf1a5d49280d8cc6f35 Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Fri, 19 Nov 2021 15:27:14 +0000 Subject: [PATCH 06/17] changes to FF019 and FF020 --- .../FragmentFilterRules.pm | 49 +++++++++---------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index d9adafb..745a3b7 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -133,15 +133,6 @@ sub reuse_repeats_tabix { } } -sub divide { - my ($num,$den) = @_; - my $decimal = 0; - if ($num > 0){ - $decimal = $num / $den; - } - $decimal; -} - sub flag_001 { my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; use_prev($$RECORD[8]); @@ -472,10 +463,12 @@ sub flag_019 { use_prev($$RECORD[8]); my @tum_geno = split(':',$$RECORD[10]); - my $tumfc_over_tumfd = ÷($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]); - if(($tum_geno[$previous_format_hash->{'FC'}] < 3) || - ($tumfc_over_tumfd < 0.05)){ + if($tum_geno[$previous_format_hash->{'FC'}] < 3){ + return $FAIL; + } + # previous test confirms FC/FD can't be 0, so no div0 check required + if ($tum_geno[$previous_format_hash->{'FC'} / $tum_geno[$previous_format_hash->{'FD'} < 0.05){ return $FAIL; } @@ -490,28 +483,30 @@ sub flag_020 { my @tum_geno = split(':',$$RECORD[10]); my $fd_total = $nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}]; - my $fc_total = $nor_geno[$previous_format_hash->{'FC'}] + $tum_geno[$previous_format_hash->{'FC'}]; - my $total_div = ÷($fc_total, $fd_total); - - my $norfc_over_norfd = ÷($nor_geno[$previous_format_hash->{'FC'}], $nor_geno[$previous_format_hash->{'FD'}]); - my $tumfc_over_tumfd = ÷($tum_geno[$previous_format_hash->{'FC'}], $tum_geno[$previous_format_hash->{'FD'}]); - - if( - ($fd_total < 200 && ( + if($fd_total < 200 && $nor_geno[$previous_format_hash->{'FC'}] <= 1 && $nor_geno[$previous_format_hash->{'FD'}] >= 10 && $nor_geno[$previous_format_hash->{'FC'}] < ($tum_geno[$previous_format_hash->{'FC'}] * 0.1) - )) || ( - $norfc_over_norfd <= 0.05 && - $tumfc_over_tumfd >= 0.2 && - ($nor_geno[$previous_format_hash->{'FC'}] == 1 || $nor_geno[$previous_format_hash->{'FC'}] == 2) - )){ + ){ return $FAIL; } - - if($fd_total >= 200 && $norfc_over_norfd > 0.02 && $total_div < 0.2){ + + my $tumfc_over_tumfd = $tum_geno[$previous_format_hash->{'FD'}] > 0 ? $tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] : undef; + my $norfc_over_norfd = $nor_geno[$previous_format_hash->{'FD'}] > 0 ? $nor_geno[$previous_format_hash->{'FC'}] / $nor_geno[$previous_format_hash->{'FD'}] : undef; + + if($fd_total < 200){ + if(($nor_geno[$previous_format_hash->{'FC'}] == 1 || $nor_geno[$previous_format_hash->{'FC'}] == 2) && + $norfc_over_norfd <= 0.05 && + $tumfc_over_tumfd >= 0.2 + ){ return $FAIL; + } + + }else{ + if($norfc_over_norfd > 0.02 && $tumfc_over_tumfd < 0.2){ + return $FAIL; + } } return $PASS; From 02df09e8cc2e51b0a145268deb036bdc236d08e6 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 19 Nov 2021 16:03:25 +0000 Subject: [PATCH 07/17] Link in precommit and updates to circleci --- .circleci/config.yml | 16 ++++ .licenserc.yaml | 93 +++++++++++++++++++++++ .pre-commit-config.yaml | 27 +++++++ LICENCE => LICENSE | 4 +- README.md | 159 +++++++++++++++++++++++++--------------- 5 files changed, 238 insertions(+), 61 deletions(-) create mode 100644 .licenserc.yaml create mode 100644 .pre-commit-config.yaml rename LICENCE => LICENSE (99%) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6de20c2..b4aaa09 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,6 +10,16 @@ version: 2.1 jobs: + license_chk: + machine: + # need machine as want to mount a volume + image: ubuntu-2004:202107-02 + steps: + - checkout + - run: + name: Execute skywalking-eyes check of licenses + command: | + docker run --rm -v $(pwd):/github/workspace apache/skywalking-eyes header check build: environment: IMAGE_NAME: quay.io/wtsicgp/cgppindel @@ -55,7 +65,13 @@ workflows: version: 2.1 build_test: jobs: + - license_chk: + filters: + tags: + only: /.+/ - build: + requires: + - license_chk context: - dockerhub-casmservice - quayio-casmservice diff --git a/.licenserc.yaml b/.licenserc.yaml new file mode 100644 index 0000000..12af0b6 --- /dev/null +++ b/.licenserc.yaml @@ -0,0 +1,93 @@ +header: + license: + spdx-id: AGPL-3.0-or-later + copyright-owner: Genome Research Ltd + content: | + Copyright (c) 2014-2021 Genome Research Ltd + + Author: CASM/Cancer IT + + This file is part of cgpPindel. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + 1. The usage of a range of years within a copyright statement contained within + this distribution should be interpreted as being equivalent to a list of years + including the first and last year specified and all consecutive years between + them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- + 2009, 2011-2012’ should be interpreted as being identical to a statement that + reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright + statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being + identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, + 2009, 2010, 2011, 2012’. + + pattern: | + Copyright \(c\) [-0-9]+ .+ + + Author: .+ + + This file is part of .+ + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or \(at your option\) any later version. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + 1. The usage of a range of years within a copyright statement contained within + this distribution should be interpreted as being equivalent to a list of years + including the first and last year specified and all consecutive years between + them. For example, a copyright statement that reads ‘Copyright \(c\) 2005, 2007- + 2009, 2011-2012’ should be interpreted as being identical to a statement that + reads ‘Copyright \(c\) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright + statement that reads ‘Copyright \(c\) 2005-2012’ should be interpreted as being + identical to a statement that reads ‘Copyright \(c\) 2005, 2006, 2007, 2008, + 2009, 2010, 2011, 2012’. + + paths: + - '**' + + paths-ignore: + - '.circleci' + - '.coveragerc' + - '.dockerignore' + - '.gitignore' + - '.pre-commit-config.yaml' + - 'CHANGES.md' + - 'Dockerfile' + - 'LICENSE' + - 'pyproject.toml' + - 'README.md' + - '**/*.yaml' + - 'tests/data/**/*' + - '**/*.egg-info/PKG-INFO' + - '.pytest_cache/' + - 'build/' + - 'MANIFEST.in' + - 'tests/htmlcov/' + - '.eggs/' + - 'c++/*.cpp' # not ours, distributed via agreement + - 'perl/t/' + - 'perl/rules/' + - 'perl/pm_to_blib' + - 'perl/blib/' + - 'perl/MANIFEST*' + - 'perl/MYMETA*' + - '.github/' + - 'INSTALL' + - 'Makefile' + + comment: on-failure diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3f4f01d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-added-large-files + args: ['--maxkb=3000'] + - id: check-ast + - id: check-executables-have-shebangs + - id: check-shebang-scripts-are-executable + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: detect-aws-credentials + args: [--allow-missing-credentials] + - id: detect-private-key + - id: end-of-file-fixer + - id: name-tests-test + - id: requirements-txt-fixer + - id: trailing-whitespace +- repo: https://github.com/executablebooks/mdformat + rev: 0.7.6 + hooks: + - id: mdformat +- repo: https://github.com/hadolint/hadolint + rev: v2.4.1 + hooks: + - id: hadolint-docker diff --git a/LICENCE b/LICENSE similarity index 99% rename from LICENCE rename to LICENSE index dba13ed..636867c 100644 --- a/LICENCE +++ b/LICENSE @@ -1,7 +1,7 @@ GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (c) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -630,7 +630,7 @@ state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. - Copyright (C) + Copyright (c) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by diff --git a/README.md b/README.md index 129d63c..aec2ba9 100644 --- a/README.md +++ b/README.md @@ -2,41 +2,46 @@ cgpPindel contains the Cancer Genome Projects workflow for [Pindel][pindel-core]. -[![cancerit](https://circleci.com/gh/cancerit/cgpPindel.svg?style=svg)](https://circleci.com/gh/cancerit/cgpPindel) +| Master | Develop | +| --------------------------------------------- | ----------------------------------------------- | +| [![Master Badge][circle-master]][circle-base] | [![Develop Badge][circle-develop]][circle-base] | + +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) The is a lightly modified version of pindel v2.0 with CGP specific processing for: -* Input file generation -* Conversion from pindel text output to: - * tumour and normal BAM alignment files - * VCF - * Application of VCF filters. +- Input file generation +- Conversion from pindel text output to: + - tumour and normal BAM alignment files + - VCF + - Application of VCF filters. + +Details of execution and referencing can be found in the [wiki][cgppindel-wiki] Contents: -* [Docker, Singularity and Dockstore](#docker-singularity-and-dockstore) -* [Dependencies/Install](#dependenciesinstall) -* [Creating a release](#creating-a-release) - * [Preparation](#preparation) - * [Release process](#release-process) - * [Code changes](#code-changes) - * [Testing](#testing) - * [Regression CI](#regression-ci) - * [Public CI](#public-ci) - * [Cutting the release](#cutting-the-release) -* [LICENCE](#licence) +- [Docker, Singularity and Dockstore](#docker-singularity-and-dockstore) +- [Dependencies/Install](#dependenciesinstall) +- [Developers](#developers) + - [Updating licence headers](#updating-licence-headers) + - [Code changes](#code-changes) + - [Testing](#testing) + - [Regression CI](#regression-ci) + - [Public CI](#public-ci) + - [Cutting the release](#cutting-the-release) +- [LICENCE](#licence) ## Docker, Singularity and Dockstore There are pre-built images containing this codebase on quay.io. When pulling an image you must specify the version there is no `latest`. -* [cgpPindel quay.io][quay-repo]: Contained within this repository - * Smallest build required to use cgpPindel - * Not linked to Dockstore (yet) - * Updated most frequently -* [dockstore-cgpwxs][ds-cgpwxs-git]: Contains tools specific to WXS analysis. -* [dockstore-cgpwgs][ds-cgpwgs-git]: Contains additional tools for WGS analysis. +- [cgpPindel quay.io][quay-repo]: Contained within this repository + - Smallest build required to use cgpPindel + - Not linked to Dockstore (yet) + - Updated most frequently +- [dockstore-cgpwxs][ds-cgpwxs-git]: Contains tools specific to WXS analysis. +- [dockstore-cgpwgs][ds-cgpwgs-git]: Contains additional tools for WGS analysis. These were primarily designed for use with dockstore.org but can be used as normal containers. @@ -46,8 +51,8 @@ The docker images are known to work correctly after import into a singularity im When doing a native install please install the following first: -* [PCAP-core v2.0+][pcap-core-rel] -* [cgpVcf v2.0+][cgpvcf-rel] +- [PCAP-core v2.0+][pcap-core-rel] +- [cgpVcf v2.0+][cgpvcf-rel] Please see these for any child dependencies. @@ -63,52 +68,86 @@ are installed into the target area. Please be aware that this expects basic C compilation libraries and tools to be available. -## Creating a release +## Developers -### Preparation +Please use `pre-commit` on this project. You can install to `$HOME/bin` via: -* Commit/push all relevant changes. -* Pull a clean version of the repo and use this for the following steps. +```bash +curl https://pre-commit.com/install-local.py | python - +``` -### Release process +In you checkout please run: -This project is maintained using the [HubFlow][hubflow-docs] methodology. +```bash +pre-commit install +``` -#### Code changes +### Updating licence headers + +Please use [skywalking-eyes](https://github.com/apache/skywalking-eyes). + +Expected workflow: + +Expected workflow: + +```bash +# recent build, change to apache/skywalking-eyes:0.2.0 once released +export DOCKER_IMG=ghcr.io/apache/skywalking-eyes/license-eye +``` + +1. Check state before modifying `.licenserc.yaml`: + - `docker run -it --rm -v $(pwd):/github/workspace $DOCKER_IMG header check` + - You should get some 'valid' here, those without a header as 'invalid' +1. Modify `.licenserc.yaml` +1. Apply the changes: + - `docker run -it --rm -v $(pwd):/github/workspace $DOCKER_IMG header fix` +1. Add/commit changes + +This is executed in the CI pipeline. + +*DO NOT* edit the header in the files, please modify the date component of `content` in `.licenserc.yaml`. The only exception being: + +- `README.md` + +If you need to make more extensive changes to the license carefully test the pattern is functional. + +### Code changes + +This project is maintained using the [HubFlow][hubflow-docs] methodology. 1. Make appropriate changes -2. Update `perl/lib/Sanger/CGP/Pindel.pm` to the correct version (adding rc/beta to end if applicable). -3. Update `CHANGES.md` to show major items. -4. Commit the updated docs and updated module/version. -5. Push commits. +1. Update `perl/lib/Sanger/CGP/Pindel.pm` to the correct version (adding rc/beta to end if applicable). +1. Update `CHANGES.md` to show major items. +1. Commit the updated docs and updated module/version. +1. Push commits. -#### Testing +### Testing -##### Regression CI +#### Regression CI An internal CI system is used to validate each release using real, large scale datasets. -##### Public CI +#### Public CI Circleci is used to: -* Build Docker image (unit tests are part of build) -* Validate expected tools exist -* For tags only: push image to quay.io +- Build Docker image (unit tests are part of build) +- Validate expected tools exist +- For tags only: push image to quay.io CI only runs for: -* Branches with pull-requests -* Default branch (`dev`) -* Tags +- Branches with pull-requests +- Default branch (`dev`) +- Tags #### Cutting the release Internal regression CI processes must be completed prior to this. 1. Check state on [Circleci][circle-repo] -2. Generate the release (add notes to GitHub) -3. Confirm that image has been built on [quay.io][quay-builds] +1. Generate the release (add notes to GitHub) +1. Confirm that image has been pushed to [quay.io][quay-tags] ## LICENCE @@ -144,19 +183,21 @@ identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, ``` -[cgpvcf-rel]: https://github.com/cancerit/cgpVcf/releases -[pcap-core-rel]: https://github.com/cancerit/PCAP-core/releases -[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs -[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs -[pindel-core]: http://gmt.genome.wustl.edu/pindel/current -[hubflow-docs]: https://datasift.github.io/gitflow/ - -[circle-repo]: https://app.circleci.com/pipelines/github/cancerit/cgpPindel -[circle-badge-svg]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=svg - -[circle-badge-link]: https://travis-ci.org/cancerit/cgpPindel.svg?branch=dev + + +[cgppindel-wiki]: https://github.com/cancerit/cgpPindel/wiki +[cgpvcf-rel]: https://github.com/cancerit/cgpVcf/releases +[circle-base]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=shield +[circle-develop]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=shield&branch=dev%3B +[circle-master]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=shield&branch=master%3B +[circle-repo]: https://app.circleci.com/pipelines/github/cancerit/cgpPindel +[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs +[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs +[hubflow-docs]: https://datasift.github.io/gitflow/ +[pcap-core-rel]: https://github.com/cancerit/PCAP-core/releases +[pindel-core]: http://gmt.genome.wustl.edu/pindel/current [quay-repo]: https://quay.io/repository/wtsicgp/cgppindel -[quay-builds]: https://quay.io/repository/wtsicgp/cgppindel?tab=builds +[quay-tags]: https://quay.io/repository/wtsicgp/cgppindel?tab=tags From 9c38b5be54c50bb24b744fc9def645fdf544c5c1 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 19 Nov 2021 16:04:02 +0000 Subject: [PATCH 08/17] remove legacy script --- prerelease.sh | 79 --------------------------------------------------- 1 file changed, 79 deletions(-) delete mode 100755 prerelease.sh diff --git a/prerelease.sh b/prerelease.sh deleted file mode 100755 index fb6967a..0000000 --- a/prerelease.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. -# -# Author: CASM/Cancer IT -# -# This file is part of cgpPindel. -# -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - -set -eu # exit on first error or undefined value in subtitution - -# get current directory -INIT_DIR=`pwd` - -rm -rf blib - -# get location of this file -MY_PATH="`dirname \"$0\"`" # relative -MY_PATH="`( cd \"$MY_PATH\" && pwd )`" # absolutized and normalized -if [ -z "$MY_PATH" ] ; then - # error; for some reason, the path is not accessible - # to the script (e.g. permissions re-evaled after suid) - echo Failed to determine location of script >2 - exit 1 # fail -fi -# change into the location of the script -cd $MY_PATH/perl - -echo '### Running perl tests ###' -rm -rf reports docs pm_to_blib blib -cover -delete -export HARNESS_PERL_SWITCHES=-MDevel::Cover=-db,reports,-select='^lib/*\.pm$',-ignore,'^t/' -rm -rf docs -mkdir -p docs/reports_text -prove -w -I lib t - -echo '### Generating test/pod coverage reports ###' -# removed 'condition' from coverage as '||' 'or' doesn't work properly -cover -coverage branch,subroutine,pod -report_c0 50 -report_c1 85 -report_c2 100 -report html_basic reports -silent |& grep -v '^Perltidy' | grep -v '^##' | grep -v '^1:' -# grep on last command to cleanup an oddity in perltidy -cover -coverage branch,subroutine,pod -report text reports -silent > docs/reports_text/coverage.txt -rm -rf reports/structure perl.reports/digests reports/cover.13 reports/runs -cp reports/coverage.html reports/index.html -mv reports docs/reports_html -unset HARNESS_PERL_SWITCHES - -echo '### Generating POD ###' -mkdir -p docs/pod_html -perl -MPod::Simple::HTMLBatch -e 'Pod::Simple::HTMLBatch::go' lib:bin docs/pod_html > /dev/null - -echo '### Archiving docs folder ###' -tar cz -C $MY_PATH/perl -f docs.tar.gz docs - -# generate manifest, and cleanup -echo '### Generating MANIFEST ###' -# delete incase any files are moved, the make target just adds stuff -rm -f MANIFEST -# cleanup things which could break the manifest -rm -rf install_tmp -perl Makefile.PL > /dev/null -make manifest >& /dev/null -rm -f Makefile MANIFEST.bak pm_to_blib - -# change back to original dir -cd $INIT_DIR From da293ade552ef47baac24d9a9ba67f5b17ab33d7 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 19 Nov 2021 16:20:57 +0000 Subject: [PATCH 09/17] sort out licenses and precommit failures --- .github/ISSUE_TEMPLATE/issue-bug-question.md | 11 ++- CHANGES.md | 92 ++++++++++--------- Dockerfile | 64 ++++++------- c++/filter_pindel_reads.cpp | 1 - perl/Makefile.PL | 34 ++++--- perl/bin/FlagVcf.pl | 34 ++++--- perl/bin/pindel.pl | 34 ++++--- perl/bin/pindel_2_combined_vcf.pl | 34 ++++--- perl/bin/pindel_germ_bed.pl | 34 ++++--- perl/bin/pindel_input_gen.pl | 34 ++++--- perl/bin/pindel_merge_vcf_bam.pl | 34 ++++--- perl/bin/pindel_np_from_vcf.pl | 34 ++++--- perl/bin/pindel_np_remsample.pl | 29 ++++++ perl/bin/prep_np_release.pl | 29 ++++++ perl/lib/Sanger/CGP/Pindel.pm | 37 +++++--- perl/lib/Sanger/CGP/Pindel/Implement.pm | 37 +++++--- perl/lib/Sanger/CGP/Pindel/InputGen.pm | 37 +++++--- perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm | 37 +++++--- .../CGP/Pindel/InputGen/PairToPindel.pm | 38 ++++---- perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm | 37 +++++--- .../Sanger/CGP/Pindel/InputGen/SamHeader.pm | 37 +++++--- .../Sanger/CGP/Pindel/OutputGen/BamUtil.pm | 37 +++++--- .../CGP/Pindel/OutputGen/CombinedRecord.pm | 37 +++++--- .../OutputGen/CombinedRecordGenerator.pm | 37 +++++--- .../CGP/Pindel/OutputGen/PindelRecord.pm | 37 +++++--- .../Pindel/OutputGen/PindelRecordParser.pm | 37 +++++--- .../CGP/Pindel/OutputGen/VcfConverter.pm | 37 +++++--- .../CGP/PindelPostProcessing/AbstractExe.pm | 36 +++++--- .../CGP/PindelPostProcessing/FilterRules.pm | 36 +++++--- .../FragmentFilterRules.pm | 44 +++++---- .../PindelPostProcessing/VcfSoftFlagger.pm | 36 +++++--- perl/t/inputGen.t | 1 - perl/t/inputGenRead.t | 1 - perl/t/vcfPindelFragmentFlagger.t | 4 +- setup.sh | 34 ++++--- 35 files changed, 713 insertions(+), 459 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/issue-bug-question.md b/.github/ISSUE_TEMPLATE/issue-bug-question.md index 91a1173..2e62079 100644 --- a/.github/ISSUE_TEMPLATE/issue-bug-question.md +++ b/.github/ISSUE_TEMPLATE/issue-bug-question.md @@ -1,16 +1,17 @@ ---- +______________________________________________________________________ + name: Issue/Bug/Question about: Includes checks title: '' labels: '' assignees: '' ---- +______________________________________________________________________ Please ensure that you have check the following before raising a new issue: -* [README](https://github.com/cancerit/cgpPindel/blob/dev/README.md) -* [Wiki](https://github.com/cancerit/cgpPindel/wiki) -* Existing issues (closed too) before raising a new item +- [README](https://github.com/cancerit/cgpPindel/blob/dev/README.md) +- [Wiki](https://github.com/cancerit/cgpPindel/wiki) +- Existing issues (closed too) before raising a new item Please provide as much information as possible. diff --git a/CHANGES.md b/CHANGES.md index d43d14d..dfea28a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,107 +2,104 @@ ## 3.5.0 -* Update to core pindel algorithm to allow complex DI events to have longer inserted sequence than deleted - * Masking real events +- Update to core pindel algorithm to allow complex DI events to have longer inserted sequence than deleted + - Masking real events ## 3.4.1 -* Updated Dockerfile to use pcap-core 5.4.0 - htslib/samtools 1.11 +- Updated Dockerfile to use pcap-core 5.4.0 - htslib/samtools 1.11 ## 3.4.0 -* Updated Dockerfile to use pcap-core 5.2.2 -* Modified setup script to use build/*.sh +- Updated Dockerfile to use pcap-core 5.2.2 +- Modified setup script to use build/\*.sh ## 3.3.0 -* I/O hardening, see [milestone 3](https://github.com/cancerit/cgpPindel/milestone/3) +- I/O hardening, see [milestone 3](https://github.com/cancerit/cgpPindel/milestone/3) ## 3.2.2 -* Handle Input files that may have no reads at all, specifically an issue when generating a normal panel. +- Handle Input files that may have no reads at all, specifically an issue when generating a normal panel. ## 3.2.1 -* Added Dockerfile and docker documentation +- Added Dockerfile and docker documentation ## 3.2.0 -* Tabix search for high depth/excluded regions now performed in memory using IntervalTrees - * Reduces runtime of input step by ~50% - * Improved disk access profile - * Zero impact on results +- Tabix search for high depth/excluded regions now performed in memory using IntervalTrees + - Reduces runtime of input step by ~50% + - Improved disk access profile + - Zero impact on results ## 3.1.2 -* 3.0.5 introduced species parsing bug causing single word species names to be invalid. +- 3.0.5 introduced species parsing bug causing single word species names to be invalid. ## 3.1.1 -* Fix regression - ability to cope with chromosomes with no events. +- Fix regression - ability to cope with chromosomes with no events. ## 3.1.0 -* Incorporates updated pindel which improves sensitivity -* Internally interpret QCFAIL to determine if whole pair fails +- Incorporates updated pindel which improves sensitivity +- Internally interpret QCFAIL to determine if whole pair fails ## 3.0.6 -* Fixed version tag +- Fixed version tag ## 3.0.5 -* Handles species names with spaces in it -* modified checks for species,assembly and checksum +- Handles species names with spaces in it +- modified checks for species,assembly and checksum ## 3.0.4 -* Output bug for pindel BAM/CRAM corrected. When more than 1 chr in output files had no reads. +- Output bug for pindel BAM/CRAM corrected. When more than 1 chr in output files had no reads. ## 3.0.3 -* Changes to how germline filter determined resulted in dummy germline bed file not being generated as previously. -* This release reinstates the old behaviour. +- Changes to how germline filter determined resulted in dummy germline bed file not being generated as previously. +- This release reinstates the old behaviour. ## 3.0.2 -* Correct example rule files for *Fragment.lst files to use FFnnn filter types +- Correct example rule files for \*Fragment.lst files to use FFnnn filter types ## 3.0.1 -* Update tabix calls to directly use query_full (solves GRCh38 contig name issues). +- Update tabix calls to directly use query_full (solves GRCh38 contig name issues). ## 3.0.0 -* Germline bed file is now merged for adjacent regions (#31) -* More compressed intermediate files (#55) -* Change to `Const::Fast` where appropriate (#41) -* Removed TG VG from genotype. - * Readgroups are always variable, often 1 in data from last few years - * Not used by our filters. -* Supports BAM/CRAM inputs -* Output will be aligned with inputs - * bam vs cram - * bai vs csi -* Although ground work for csi input/output has been done `Bio::DB::HTS` doesn't support csi indexed input yet. - * Created our own fork at [`cancerit/Bio::DB::HTS`][cancerit-biodbhts] so that this could be enabled. - * You will need to install this manually or use one of our images for this functionallity. - * [dockstore-cgpwxs][ds-cgpwxs-git] - * [dockstore-cgpwxs][ds-cgpwgs-git] +- Germline bed file is now merged for adjacent regions (#31) +- More compressed intermediate files (#55) +- Change to `Const::Fast` where appropriate (#41) +- Removed TG VG from genotype. + - Readgroups are always variable, often 1 in data from last few years + - Not used by our filters. +- Supports BAM/CRAM inputs +- Output will be aligned with inputs + - bam vs cram + - bai vs csi +- Although ground work for csi input/output has been done `Bio::DB::HTS` doesn't support csi indexed input yet. + - Created our own fork at [`cancerit/Bio::DB::HTS`][cancerit-biodbhts] so that this could be enabled. + - You will need to install this manually or use one of our images for this functionallity. + - [dockstore-cgpwxs][ds-cgpwxs-git] + - [dockstore-cgpwxs][ds-cgpwgs-git] -[cancerit-biodbhts]: https://github.com/cancerit/Bio-DB-HTS/releases/tag/v2.10-rc1 -[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs -[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs ## 2.2.5 -* Update tabix->query to tabix->query_full +- Update tabix->query to tabix->query_full ## 2.2.4 -* Force sorting of FILTER field to make records easier to diff. -* Fix sorting of final VCF to handle events with same start better when using comparison tools +- Force sorting of FILTER field to make records easier to diff. +- Fix sorting of final VCF to handle events with same start better when using comparison tools ## 2.2.3 @@ -180,3 +177,8 @@ Found 15321 SNPs common to both files. Found 0 SNPs only in main file. Found 0 SNPs only in second file. After +``` + +[cancerit-biodbhts]: https://github.com/cancerit/Bio-DB-HTS/releases/tag/v2.10-rc1 +[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs +[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs diff --git a/Dockerfile b/Dockerfile index 341fba9..2dee6db 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,35 +1,37 @@ FROM quay.io/wtsicgp/pcap-core:5.6.1 as builder +# hadolint ignore=DL3002 USER root # ALL tool versions used by opt-build.sh -ENV VER_CGPVCF="v2.2.1" -ENV VER_VCFTOOLS="0.1.16" +# need to keep in sync with setup.sh +ENV VER_CGPVCF="v2.2.1"\ + VER_VCFTOOLS="0.1.16" -RUN apt-get -yq update -RUN apt-get install -yq --no-install-recommends locales -RUN apt-get install -yq --no-install-recommends g++ -RUN apt-get install -yq --no-install-recommends make -RUN apt-get install -yq --no-install-recommends gcc -RUN apt-get install -yq --no-install-recommends pkg-config -RUN apt-get install -yq --no-install-recommends zlib1g-dev - -RUN locale-gen en_US.UTF-8 -RUN update-locale LANG=en_US.UTF-8 +# hadolint ignore=DL3008 +RUN apt-get -yq update \ +&& apt-get install -yq --no-install-recommends locales g++ make gcc pkg-config zlib1g-dev \ +&& locale-gen en_US.UTF-8 \ +&& update-locale LANG=en_US.UTF-8 ENV OPT /opt/wtsi-cgp -ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH -ENV PERL5LIB $OPT/lib/perl5 -ENV LD_LIBRARY_PATH $OPT/lib -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 +ENV PATH=$OPT/bin:$OPT/biobambam2/bin:$PATH \ + PERL5LIB=$OPT/lib/perl5 \ + LD_LIBRARY_PATH=$OPT/lib \ + LC_ALL=en_US.UTF-8 \ + LANG=en_US.UTF-8 + +WORKDIR /tmp/build # build tools from other repos -ADD build/opt-build.sh build/ +COPY build/opt-build.sh build/ RUN bash build/opt-build.sh $OPT +COPY build/opt-build-local.sh build/ +COPY c++ c++ +COPY perl perl + # build the tools in this repo, separate to reduce build time on errors -COPY . . RUN bash build/opt-build-local.sh $OPT FROM ubuntu:20.04 @@ -38,8 +40,9 @@ LABEL maintainer="cgphelp@sanger.ac.uk" \ uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Trust Sanger Institute" \ description="cgpPindel docker" -RUN apt-get -yq update -RUN apt-get install -yq --no-install-recommends \ +# hadolint ignore=DL3008 +RUN apt-get -yq update \ +&& apt-get install -yq --no-install-recommends \ apt-transport-https \ locales \ curl \ @@ -59,19 +62,18 @@ google-perftools \ unattended-upgrades && \ unattended-upgrade -d -v && \ apt-get remove -yq unattended-upgrades && \ -apt-get autoremove -yq - -RUN locale-gen en_US.UTF-8 -RUN update-locale LANG=en_US.UTF-8 +apt-get autoremove -yq \ +&& rm -rf /var/lib/apt/lists/* \ +&& locale-gen en_US.UTF-8 \ +&& update-locale LANG=en_US.UTF-8 ENV OPT /opt/wtsi-cgp -ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH -ENV PERL5LIB $OPT/lib/perl5 -ENV LD_LIBRARY_PATH $OPT/lib -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 +ENV PATH=$OPT/bin:$OPT/biobambam2/bin:$PATH \ + PERL5LIB=$OPT/lib/perl5 \ + LD_LIBRARY_PATH=$OPT/lib \ + LC_ALL=en_US.UTF-8 \ + LANG=en_US.UTF-8 -RUN mkdir -p $OPT COPY --from=builder $OPT $OPT ## USER CONFIGURATION diff --git a/c++/filter_pindel_reads.cpp b/c++/filter_pindel_reads.cpp index 690eba1..6d12aaa 100644 --- a/c++/filter_pindel_reads.cpp +++ b/c++/filter_pindel_reads.cpp @@ -937,4 +937,3 @@ void CleanUniquePoints (vector & Input_UP) { Input_UP.clear(); Input_UP = TempUP; } - diff --git a/perl/Makefile.PL b/perl/Makefile.PL index 11411d0..aa083b4 100755 --- a/perl/Makefile.PL +++ b/perl/Makefile.PL @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use ExtUtils::MakeMaker; diff --git a/perl/bin/FlagVcf.pl b/perl/bin/FlagVcf.pl index e07c5fb..f4d3b2f 100755 --- a/perl/bin/FlagVcf.pl +++ b/perl/bin/FlagVcf.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# my $config_path; BEGIN { diff --git a/perl/bin/pindel.pl b/perl/bin/pindel.pl index 2a3bdc8..fe5f617 100755 --- a/perl/bin/pindel.pl +++ b/perl/bin/pindel.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_2_combined_vcf.pl b/perl/bin/pindel_2_combined_vcf.pl index fb64c6b..0e6a625 100755 --- a/perl/bin/pindel_2_combined_vcf.pl +++ b/perl/bin/pindel_2_combined_vcf.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use strict; diff --git a/perl/bin/pindel_germ_bed.pl b/perl/bin/pindel_germ_bed.pl index 814f77b..eb4f771 100755 --- a/perl/bin/pindel_germ_bed.pl +++ b/perl/bin/pindel_germ_bed.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_input_gen.pl b/perl/bin/pindel_input_gen.pl index 3fc82ca..d1c8781 100755 --- a/perl/bin/pindel_input_gen.pl +++ b/perl/bin/pindel_input_gen.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_merge_vcf_bam.pl b/perl/bin/pindel_merge_vcf_bam.pl index 2d755b0..e69ef40 100755 --- a/perl/bin/pindel_merge_vcf_bam.pl +++ b/perl/bin/pindel_merge_vcf_bam.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { use Cwd qw(abs_path); diff --git a/perl/bin/pindel_np_from_vcf.pl b/perl/bin/pindel_np_from_vcf.pl index 832258a..c777467 100755 --- a/perl/bin/pindel_np_from_vcf.pl +++ b/perl/bin/pindel_np_from_vcf.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_np_remsample.pl b/perl/bin/pindel_np_remsample.pl index feb39b6..f517944 100755 --- a/perl/bin/pindel_np_remsample.pl +++ b/perl/bin/pindel_np_remsample.pl @@ -1,4 +1,33 @@ #!/usr/bin/perl +# Copyright (c) 2014-2021 Genome Research Ltd +# +# Author: CASM/Cancer IT +# +# This file is part of cgpPindel. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use strict; use warnings FATAL=>'all'; diff --git a/perl/bin/prep_np_release.pl b/perl/bin/prep_np_release.pl index 8342c6c..8ee44b9 100755 --- a/perl/bin/prep_np_release.pl +++ b/perl/bin/prep_np_release.pl @@ -1,4 +1,33 @@ #!/usr/bin/perl +# Copyright (c) 2014-2021 Genome Research Ltd +# +# Author: CASM/Cancer IT +# +# This file is part of cgpPindel. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use strict; use warnings FATAL=>'all'; diff --git a/perl/lib/Sanger/CGP/Pindel.pm b/perl/lib/Sanger/CGP/Pindel.pm index 2ccc297..858d636 100644 --- a/perl/lib/Sanger/CGP/Pindel.pm +++ b/perl/lib/Sanger/CGP/Pindel.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel; use strict; use Const::Fast qw(const); diff --git a/perl/lib/Sanger/CGP/Pindel/Implement.pm b/perl/lib/Sanger/CGP/Pindel/Implement.pm index d765ee5..cccd3c0 100644 --- a/perl/lib/Sanger/CGP/Pindel/Implement.pm +++ b/perl/lib/Sanger/CGP/Pindel/Implement.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::Implement; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::Implement; use strict; use warnings FATAL => 'all'; diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen.pm b/perl/lib/Sanger/CGP/Pindel/InputGen.pm index 1cb7b87..191b8f2 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm index 25746b8..cff68ca 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::Pair; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::Pair; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm index 06699e4..7c8a36d 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::PairToPindel; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::PairToPindel; use strict; use English qw( -no_match_vars ); @@ -135,4 +142,3 @@ sub _self_anchored_to_pindel { 1; __DATA__ - diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm index 89cc621..58e3d73 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::Read; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::Read; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm index 3048d8d..bfae6b7 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::SamHeader; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::SamHeader; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm index f20abdc..8584de3 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::BamUtil; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::BamUtil; use Sanger::CGP::Pindel; use Sanger::CGP::Pindel::Implement; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm index e140a67..a72f4cd 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::CombinedRecord; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::CombinedRecord; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm index 79dbb1f..55aa479 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::CombinedRecordGenerator; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::CombinedRecordGenerator; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm index 5000ed1..c085646 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::PindelRecord; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::PindelRecord; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm index f3e9298..49c5dd3 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::PindelRecordParser; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::PindelRecordParser; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm index cba97a4..4b545bc 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::VcfConverter; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::VcfConverter; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm index 64044d0..7a641ab 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::AbstractExe; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::AbstractExe; use FindBin; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm index 13dbed9..42bd264 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::FilterRules; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::FilterRules; use strict; use Bio::DB::HTS::Tabix; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index 745a3b7..beb6b7e 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::FragmentFilterRules; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::FragmentFilterRules; use strict; use Bio::DB::HTS::Tabix; @@ -485,13 +493,13 @@ sub flag_020 { my $fd_total = $nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}]; if($fd_total < 200 && - $nor_geno[$previous_format_hash->{'FC'}] <= 1 && - $nor_geno[$previous_format_hash->{'FD'}] >= 10 && + $nor_geno[$previous_format_hash->{'FC'}] <= 1 && + $nor_geno[$previous_format_hash->{'FD'}] >= 10 && $nor_geno[$previous_format_hash->{'FC'}] < ($tum_geno[$previous_format_hash->{'FC'}] * 0.1) ){ return $FAIL; } - + my $tumfc_over_tumfd = $tum_geno[$previous_format_hash->{'FD'}] > 0 ? $tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] : undef; my $norfc_over_norfd = $nor_geno[$previous_format_hash->{'FD'}] > 0 ? $nor_geno[$previous_format_hash->{'FC'}] / $nor_geno[$previous_format_hash->{'FD'}] : undef; @@ -502,7 +510,7 @@ sub flag_020 { ){ return $FAIL; } - + }else{ if($norfc_over_norfd > 0.02 && $tumfc_over_tumfd < 0.2){ return $FAIL; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm index a5a111e..a6d47ab 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::VcfSoftFlagger; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::VcfSoftFlagger; use strict; use Carp; diff --git a/perl/t/inputGen.t b/perl/t/inputGen.t index 0c6123f..a540e9e 100644 --- a/perl/t/inputGen.t +++ b/perl/t/inputGen.t @@ -76,4 +76,3 @@ subtest 'reads_to_disk checks' => sub{ }; done_testing(); - diff --git a/perl/t/inputGenRead.t b/perl/t/inputGenRead.t index 357c7ac..62309e8 100644 --- a/perl/t/inputGenRead.t +++ b/perl/t/inputGenRead.t @@ -45,4 +45,3 @@ $obj = new_ok($MODULE, [\$cleaned, 2]); is($obj->frac_pbq_poor, $EXP_FRAC_PBQ, 'Check poor qual PBQ fraction'); done_testing(); - diff --git a/perl/t/vcfPindelFragmentFlagger.t b/perl/t/vcfPindelFragmentFlagger.t index f595a9c..f663524 100644 --- a/perl/t/vcfPindelFragmentFlagger.t +++ b/perl/t/vcfPindelFragmentFlagger.t @@ -369,7 +369,7 @@ sub _test_FF007{ is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt < 6"); $RECORD = [split("\t",$test2)]; is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt < 6 and rdWt > 8pc"); - + $RECORD = [split("\t",$test3)]; is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt == 6 and rdWt < 8pc"); $RECORD = [split("\t",$test4)]; @@ -720,4 +720,4 @@ sub _test_FF020{ $RECORD = [split("\t",$test16)]; is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 3"); }; -} \ No newline at end of file +} diff --git a/setup.sh b/setup.sh index 48c34d9..f744386 100755 --- a/setup.sh +++ b/setup.sh @@ -1,25 +1,33 @@ #!/bin/bash - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. # ALL tool versions used by opt-build.sh # need to keep in sync with Dockerfile From a4b1e7d9ea0c65782dfe7c54ff25edd1e97666be Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 19 Nov 2021 16:22:52 +0000 Subject: [PATCH 10/17] minor readme correction [ci skip] --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index aec2ba9..1fb4456 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,6 @@ Please use [skywalking-eyes](https://github.com/apache/skywalking-eyes). Expected workflow: -Expected workflow: - ```bash # recent build, change to apache/skywalking-eyes:0.2.0 once released export DOCKER_IMG=ghcr.io/apache/skywalking-eyes/license-eye From 80e43334c698cf53cdfb61e53d0526d95ec9da00 Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Fri, 19 Nov 2021 16:32:02 +0000 Subject: [PATCH 11/17] fixed the missing brackets --- perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index 745a3b7..49efcfd 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -468,7 +468,7 @@ sub flag_019 { return $FAIL; } # previous test confirms FC/FD can't be 0, so no div0 check required - if ($tum_geno[$previous_format_hash->{'FC'} / $tum_geno[$previous_format_hash->{'FD'} < 0.05){ + if ($tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] < 0.05){ return $FAIL; } From ce672d13010d7453783a59e6791c17b612bdb009 Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Wed, 1 Dec 2021 16:45:58 +0000 Subject: [PATCH 12/17] corrected logic in FF020 --- .../FragmentFilterRules.pm | 18 +++---- perl/t/vcfPindelFragmentFlagger.t | 52 ++++++++++++------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index d05e252..a74026f 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -490,34 +490,34 @@ sub flag_020 { my @nor_geno = split(':',$$RECORD[9]); my @tum_geno = split(':',$$RECORD[10]); - my $fd_total = $nor_geno[$previous_format_hash->{'FD'}] + $tum_geno[$previous_format_hash->{'FD'}]; + my $nor_fd = $nor_geno[$previous_format_hash->{'FD'}]; - if($fd_total < 200 && + if($nor_fd < 200 && $nor_geno[$previous_format_hash->{'FC'}] <= 1 && $nor_geno[$previous_format_hash->{'FD'}] >= 10 && - $nor_geno[$previous_format_hash->{'FC'}] < ($tum_geno[$previous_format_hash->{'FC'}] * 0.1) + $nor_geno[$previous_format_hash->{'FC'}] <= ($tum_geno[$previous_format_hash->{'FC'}] * 0.1) ){ - return $FAIL; + return $PASS; } my $tumfc_over_tumfd = $tum_geno[$previous_format_hash->{'FD'}] > 0 ? $tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] : undef; my $norfc_over_norfd = $nor_geno[$previous_format_hash->{'FD'}] > 0 ? $nor_geno[$previous_format_hash->{'FC'}] / $nor_geno[$previous_format_hash->{'FD'}] : undef; - if($fd_total < 200){ + if($nor_fd < 200){ if(($nor_geno[$previous_format_hash->{'FC'}] == 1 || $nor_geno[$previous_format_hash->{'FC'}] == 2) && $norfc_over_norfd <= 0.05 && $tumfc_over_tumfd >= 0.2 ){ - return $FAIL; + return $PASS; } }else{ - if($norfc_over_norfd > 0.02 && $tumfc_over_tumfd < 0.2){ - return $FAIL; + if($norfc_over_norfd <= 0.02 && $tumfc_over_tumfd >= 0.2){ + return $PASS; } } - return $PASS; + return $FAIL; } 1; diff --git a/perl/t/vcfPindelFragmentFlagger.t b/perl/t/vcfPindelFragmentFlagger.t index f663524..5ed82e4 100644 --- a/perl/t/vcfPindelFragmentFlagger.t +++ b/perl/t/vcfPindelFragmentFlagger.t @@ -659,9 +659,9 @@ sub _test_FF019{ sub _test_FF020{ my ($filter_hash) = @_; subtest "Test rule FF020" => sub { - my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 20:100 0:100'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:100 0:100'; - my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 20:100 20:100'; + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 19:100'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:200 19:100'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 21:100'; my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:10 20:99'; my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:10 20:101'; my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 20:111'; @@ -673,8 +673,12 @@ sub _test_FF020{ my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:40 20:99'; my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:39 20:99'; my $test14 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 9:111'; - my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 10:111'; + my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:100 20:101'; my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:10 10:100'; + my $test17 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 0:0'; + my $test18 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 4:200 1:100'; + my $test19 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:200 20:100'; + my $test20 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:201 19:100'; my $sub = $filter_hash->{test}; @@ -688,36 +692,44 @@ sub _test_FF020{ is($filter_hash->{tag}, 'INFO/LEN',"_test_FF019 check the correct info tag has been set for the rule"); $RECORD = [split("\t",$test1)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD == 200 WtFC / WtFD > 0.02 totalFC / totalFD < 0.2"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD > 0.02 tumFC / tumFD < 0.2"); $RECORD = [split("\t",$test2)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD == 200 WtFC / WtFD < 0.02 totalFC"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD < 0.02 tumFC / tumFD < 0.2"); $RECORD = [split("\t",$test3)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD == 200 WtFC / WtFD > 0.02 totalFC totalFC / totalFD > 0.2"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD > 0.02 tumFC / tumFD > 0.2"); $RECORD = [split("\t",$test4)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 10 WtFC < MtFC * 0.1);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 10 WtFC < MtFC * 0.1);"); $RECORD = [split("\t",$test5)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 0 WtFD == 10 WtFC < MtFC * 0.1);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 0 WtFD == 10 WtFC < MtFC * 0.1);"); $RECORD = [split("\t",$test6)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 11 WtFC < MtFC * 0.1);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 11 WtFC < MtFC * 0.1);"); $RECORD = [split("\t",$test7)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 9 WtFC / WtFD > 0.05);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 9 WtFC / WtFD > 0.05);"); $RECORD = [split("\t",$test8)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD == 0.2);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD == 0.2);"); $RECORD = [split("\t",$test9)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD < 0.2);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD < 0.2);"); $RECORD = [split("\t",$test10)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD == 0.2);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD == 0.2);"); $RECORD = [split("\t",$test11)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD > 0.2);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD > 0.2);"); $RECORD = [split("\t",$test12)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD > 0.2);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD > 0.2);"); $RECORD = [split("\t",$test13)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 2 WtFC / WtFD > 0.05 MtFC / MtFD > 0.2);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD > 0.05 MtFC / MtFD > 0.2);"); $RECORD = [split("\t",$test14)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 11 WtFC > MtFC * 0.1);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 11 WtFC > MtFC * 0.1 MtFC / MtFD < 0.2);"); $RECORD = [split("\t",$test15)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 1 WtFD == 11 WtFC == MtFC * 0.1);"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 100 WtFC == MtFC * 0.1 MtFC / MtFD < 0.2);"); $RECORD = [split("\t",$test16)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 totalFD < 200 WtFC == 3"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 3"); + $RECORD = [split("\t",$test17)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 divid by 0"); + $RECORD = [split("\t",$test18)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD == 0.02 tumFC / tumFD < 0.2"); + $RECORD = [split("\t",$test19)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD == 200 WtFC / WtFD < 0.02 tumFC / tumFD == 0.2"); + $RECORD = [split("\t",$test20)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD > 200 WtFC / WtFD > 0.02 tumFC / tumFD < 0.2"); }; } From 3871983ce71ca2d7470f8681731cedae1e236a01 Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Tue, 7 Dec 2021 10:10:39 +0000 Subject: [PATCH 13/17] tweak to FF020 to make FD>=200 less strict --- .../FragmentFilterRules.pm | 2 +- perl/t/vcfPindelFragmentFlagger.t | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index a74026f..f2ee1ae 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -512,7 +512,7 @@ sub flag_020 { } }else{ - if($norfc_over_norfd <= 0.02 && $tumfc_over_tumfd >= 0.2){ + if($norfc_over_norfd <= 0.02 && $tumfc_over_tumfd >= 0.1){ return $PASS; } } diff --git a/perl/t/vcfPindelFragmentFlagger.t b/perl/t/vcfPindelFragmentFlagger.t index 5ed82e4..72c4dcc 100644 --- a/perl/t/vcfPindelFragmentFlagger.t +++ b/perl/t/vcfPindelFragmentFlagger.t @@ -659,9 +659,9 @@ sub _test_FF019{ sub _test_FF020{ my ($filter_hash) = @_; subtest "Test rule FF020" => sub { - my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 19:100'; - my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:200 19:100'; - my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 21:100'; + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 9:100'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:200 9:100'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 11:100'; my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:10 20:99'; my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:10 20:101'; my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 20:111'; @@ -676,9 +676,9 @@ sub _test_FF020{ my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:100 20:101'; my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:10 10:100'; my $test17 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 0:0'; - my $test18 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 4:200 1:100'; - my $test19 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:200 20:100'; - my $test20 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:201 19:100'; + my $test18 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 4:201 1:100'; + my $test19 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:201 10:100'; + my $test20 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:201 9:100'; my $sub = $filter_hash->{test}; @@ -726,10 +726,10 @@ sub _test_FF020{ $RECORD = [split("\t",$test17)]; is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 divid by 0"); $RECORD = [split("\t",$test18)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD == 0.02 tumFC / tumFD < 0.2"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD > 200 WtFC / WtFD == 0.02 tumFC / tumFD < 0.1"); $RECORD = [split("\t",$test19)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD == 200 WtFC / WtFD < 0.02 tumFC / tumFD == 0.2"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD > 200 WtFC / WtFD < 0.02 tumFC / tumFD == 0.1"); $RECORD = [split("\t",$test20)]; - is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD > 200 WtFC / WtFD > 0.02 tumFC / tumFD < 0.2"); + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD > 200 WtFC / WtFD > 0.02 tumFC / tumFD < 0.1"); }; } From 9bf2f6b389ad5298d4c7b586437f1db5415c1c67 Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Wed, 8 Dec 2021 11:42:03 +0000 Subject: [PATCH 14/17] updated ruleset for dermatlas --- perl/rules/dermatlasRulesFragment.lst | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 perl/rules/dermatlasRulesFragment.lst diff --git a/perl/rules/dermatlasRulesFragment.lst b/perl/rules/dermatlasRulesFragment.lst new file mode 100644 index 0000000..cf50e63 --- /dev/null +++ b/perl/rules/dermatlasRulesFragment.lst @@ -0,0 +1,10 @@ +Sanger::CGP::PindelPostProcessing::FragmentFilterRules +FF001 +FF002 +FF003 +FF004 +FF005 +FF006 +FF007 +FF019 +FF020 From b8b0c9fd5e36b3d4f26257260524cb6ad21b6c6f Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Fri, 10 Dec 2021 15:05:53 +0000 Subject: [PATCH 15/17] rename ruleset file to pulldownFfpeRulesFragment.lst --- .../{dermatlasRulesFragment.lst => pulldownFfpeRulesFragment.lst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename perl/rules/{dermatlasRulesFragment.lst => pulldownFfpeRulesFragment.lst} (100%) diff --git a/perl/rules/dermatlasRulesFragment.lst b/perl/rules/pulldownFfpeRulesFragment.lst similarity index 100% rename from perl/rules/dermatlasRulesFragment.lst rename to perl/rules/pulldownFfpeRulesFragment.lst From 68ed4b61b191b2e2c37fcda9443498a9c1025c91 Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Mon, 13 Dec 2021 14:23:03 +0000 Subject: [PATCH 16/17] details for new release --- CHANGES.md | 3 +++ perl/lib/Sanger/CGP/Pindel.pm | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index dfea28a..6d4fa71 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,8 @@ # CHANGES +## 3.6.0 +- Addition of `FF019` and `FF020` flags, new flag rule set `pulldownFfpeRulesFragment.lst` including FF019 and FF020 made + ## 3.5.0 - Update to core pindel algorithm to allow complex DI events to have longer inserted sequence than deleted diff --git a/perl/lib/Sanger/CGP/Pindel.pm b/perl/lib/Sanger/CGP/Pindel.pm index 858d636..af28e50 100644 --- a/perl/lib/Sanger/CGP/Pindel.pm +++ b/perl/lib/Sanger/CGP/Pindel.pm @@ -33,7 +33,7 @@ use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '3.5.0'; +our $VERSION = '3.6.0'; our @EXPORT = qw($VERSION); 1; From b6c66032e493595e2be9237f099b48d7c7a5fb3f Mon Sep 17 00:00:00 2001 From: Thomas Clarke Date: Mon, 13 Dec 2021 14:25:08 +0000 Subject: [PATCH 17/17] CHANGES.md formatting --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 6d4fa71..bd42643 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,8 @@ # CHANGES ## 3.6.0 -- Addition of `FF019` and `FF020` flags, new flag rule set `pulldownFfpeRulesFragment.lst` including FF019 and FF020 made +- Addition of `FF019` and `FF020` flags +- New flag rule set `pulldownFfpeRulesFragment.lst` including FF019 and FF020 made ## 3.5.0