Skip to content

Commit

Permalink
426 (#464)
Browse files Browse the repository at this point in the history
* fix mispelling script name in doc

* add string_sep_to_hash subroutine

* fix #426 add --value_insensitive param and possibility to use a list as input of --value param
  • Loading branch information
Juke34 authored Jun 3, 2024
1 parent a10755d commit b3ac370
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 53 deletions.
130 changes: 83 additions & 47 deletions bin/agat_sp_filter_feature_by_attribute_value.pl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
my $primaryTag=undef;
my $opt_output= undef;
my $opt_value = undef;
my $opt_value_insensitive = undef;
my $opt_attribute = undef;
my $opt_test = "=";
my $opt_gff = undef;
Expand All @@ -25,12 +26,13 @@
my @copyARGV=@ARGV;
if ( !GetOptions( 'f|ref|reffile|gff=s' => \$opt_gff,
'value=s' => \$opt_value,
'value_insensitive!' => \$opt_value_insensitive,
"p|type|l=s" => \$primaryTag,
'a|attribute=s' => \$opt_attribute,
't|test=s' => \$opt_test,
'o|output=s' => \$opt_output,
'v|verbose!' => \$opt_verbose,
'c|config=s' => \$config,
'c|config=s' => \$config,
'h|help!' => \$opt_help ) )
{
pod2usage( { -message => 'Failed to parse command line',
Expand Down Expand Up @@ -60,11 +62,6 @@
if($opt_test ne "<" and $opt_test ne ">" and $opt_test ne "<=" and $opt_test ne ">=" and $opt_test ne "="){
print "The test to apply is Wrong: $opt_test.\nWe want something among this list: <,>,<=,>=,! or =.";exit;
}
if( ! looks_like_number($opt_value) ){
if($opt_test eq "="){$opt_test="eq";}
elsif($opt_test eq "!"){$opt_test="ne";}
else{ print "This test $opt_test is not possible with string value.";exit; }
}

###############
# Manage Output
Expand Down Expand Up @@ -110,10 +107,32 @@
}
}

# Transform value list into hash
my $value_hash = string_sep_to_hash({ string => $opt_value,
separator => ","
});

foreach my $value (keys %{$value_hash}){
if( ! looks_like_number($value) ){
if($opt_test ne "=" and $opt_test ne "!"){
print "This test $opt_test is not possible with string value.\n";
exit;
}
}
}

use Data::Dumper;
print Dumper($value_hash);

# start with some interesting information
my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime;
$stringPrint .= "\nusage: $0 @copyARGV\n";
$stringPrint .= "We will discard $print_feature_string that have the attribute $opt_attribute with the value $opt_test $opt_value.\n";
$stringPrint .= "We will discard $print_feature_string that have the attribute $opt_attribute with the value $opt_test $opt_value";
if ($opt_value_insensitive){
$stringPrint .= "case insensitive.\n";
}else{
$stringPrint .= "case sensitive.\n";
}

if ($opt_output){
print $ostreamReport $stringPrint;
Expand Down Expand Up @@ -145,7 +164,7 @@
foreach my $feature_l1 ( @{$hash_sortBySeq->{$seqid}{$tag_l1}} ){
my $id_l1 = lc($feature_l1->_tag_value('ID'));

$removeit = check_feature($feature_l1, 'level1', \@ptagList, $opt_attribute, $opt_test, $opt_value);
$removeit = check_feature($feature_l1, 'level1');
# we can remove feature L1 now because we are looping over $hash_sortBySeq not $hash_omniscient
if ($removeit){
my $cases = remove_l1_and_relatives($hash_omniscient, $feature_l1, $fhout_discarded);
Expand All @@ -167,7 +186,7 @@
my @list_fl2 = @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}};
foreach my $feature_l2 ( @list_fl2 ) {

$removeit = check_feature($feature_l2,'level2', \@ptagList, $opt_attribute, $opt_test, $opt_value);
$removeit = check_feature($feature_l2,'level2');
if ($removeit){
push @list_l2_to_remove, [$feature_l2, $tag_l1, $id_l1, $fhout_discarded];
next;
Expand All @@ -182,7 +201,7 @@
my @list_fl3 = @{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}};
foreach my $feature_l3 ( @list_fl3 ) {

$removeit = check_feature($feature_l3, 'level3', \@ptagList, $opt_attribute, $opt_test, $opt_value);
$removeit = check_feature($feature_l3, 'level3');
if ($removeit){
push @list_l3_to_remove, [$feature_l3, $tag_l1, $id_l1, $tag_l2, $id_l2, $fhout_discarded];
}
Expand Down Expand Up @@ -239,61 +258,71 @@
##

sub check_feature{
my ($feature, $level, $ptagList, $opt_attribute, $opt_test, $opt_value)=@_;
my ($feature, $level)=@_;

my $removeit=undef;
my $primary_tag=$feature->primary_tag;

# check primary tag (feature type) to handle
foreach my $ptag (@$ptagList){
foreach my $ptag (@ptagList){

if($ptag eq "all"){
$removeit = should_we_remove_feature($feature, $opt_attribute, $opt_test, $opt_value);
$removeit = should_we_remove_feature($feature);
}
elsif(lc($ptag) eq $level){
$removeit = should_we_remove_feature($feature, $opt_attribute, $opt_test, $opt_value);
$removeit = should_we_remove_feature($feature);
}
elsif(lc($ptag) eq lc($primary_tag) ){
$removeit = should_we_remove_feature($feature, $opt_attribute, $opt_test, $opt_value);
$removeit = should_we_remove_feature($feature);
}
}
return $removeit;
}

sub should_we_remove_feature{
my ($feature, $opt_attribute, $opt_test, $opt_value)=@_;
my ($feature)=@_;

if ($feature->has_tag($opt_attribute)){

# get list of values for the attribute
my @values = $feature->get_tag_values($opt_attribute);
my @file_values = $feature->get_tag_values($opt_attribute);

# if we found among the values one pass the test we return 1
foreach my $value (@values){

if ($opt_test eq "eq"){
if ($value eq $opt_value){return 1; }
}
elsif ($opt_test eq "ne"){
if ($value ne $opt_value){return 1; }
}
elsif ($opt_test eq "="){
if ($value == $opt_value){return 1; }
}
elsif ($opt_test eq "!"){
if ($value != $opt_value){return 1; }
}
elsif ($opt_test eq ">"){
if ($value > $opt_value){return 1; }
}
elsif ($opt_test eq "<"){
if ($value < $opt_value){return 1; }
}
elsif ($opt_test eq "<="){
if ($value <= $opt_value){return 1; }
}
elsif ($opt_test eq ">="){
if ($value >= $opt_value){return 1; }
foreach my $file_value (@file_values){

foreach my $given_value (keys %{$value_hash}){
# Deal with insensitive for template
if ($opt_value_insensitive){
$given_value = lc($given_value);
$file_value = lc($file_value);
}
# for string values replace = by eq and ! by ne and avoid other type of test
if ( ! looks_like_number ($given_value) or ! looks_like_number ($file_value)){
if ($opt_test eq "="){
if ($file_value eq $given_value){return 1; }
}
elsif ($opt_test eq "!"){
if ($file_value ne $given_value){return 1; }
}
}
elsif ($opt_test eq "="){
if ($file_value == $given_value){return 1; }
}
elsif ($opt_test eq "!"){
if ($file_value != $given_value){return 1; }
}
elsif ($opt_test eq ">"){
if ($file_value > $given_value){return 1; }
}
elsif ($opt_test eq "<"){
if ($file_value < $given_value){return 1; }
}
elsif ($opt_test eq "<="){
if ($file_value <= $given_value){return 1; }
}
elsif ($opt_test eq ">="){
if ($file_value >= $given_value){return 1; }
}
}
}
}
Expand All @@ -304,7 +333,7 @@ sub should_we_remove_feature{
=head1 NAME
agat_sp_select_feature_by_attribute_value.pl
agat_sp_filter_feature_by_attribute_value.pl
=head1 DESCRIPTION
Expand All @@ -317,8 +346,8 @@ =head1 DESCRIPTION
=head1 SYNOPSIS
agat_sp_select_feature_by_attribute_value.pl --gff infile.gff --value 1 -t "=" [ --output outfile ]
agat_sp_select_feature_by_attribute_value.pl --help
agat_sp_filter_feature_by_attribute_value.pl --gff infile.gff --value 1 -t "=" [ --output outfile ]
agat_sp_filter_feature_by_attribute_value.pl --help
=head1 OPTIONS
Expand All @@ -343,10 +372,17 @@ =head1 OPTIONS
=item B<--value>
Value to check in the attribute
Value(s) to check in the attribute. Case sensitive. List of values must be coma separated.
=item B<--value_insensitive>
Bolean. Deactivated by default. When activated the values provided by the --value parameter are handled case insensitive.
=item B<-t> or B<--test>
Test to apply (> < = >= <=). default value "=". If you use one of these two character >, <, please don't forget to quote you parameter liket that "<=". Else your terminal will complain.
Test to apply (> < = ! >= <=). default value "=".
If you use one of these two character >, <, please don't forget to quote the
parameter like that "<=" otherwise your terminal will complain.
Only = and ! tests can be used to compare string values.
=item B<-o> or B<--output>
Expand Down
20 changes: 15 additions & 5 deletions docs/tools/agat_sp_filter_feature_by_attribute_value.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# agat\_sp\_select\_feature\_by\_attribute\_value.pl
# agat\_sp\_filter\_feature\_by\_attribute\_value.pl

## DESCRIPTION

Expand All @@ -12,8 +12,8 @@ removing all children of a feature will automatically remove this feature too.
## SYNOPSIS

```
agat_sp_select_feature_by_attribute_value.pl --gff infile.gff --value 1 -t "=" [ --output outfile ]
agat_sp_select_feature_by_attribute_value.pl --help
agat_sp_filter_feature_by_attribute_value.pl --gff infile.gff --value 1 -t "=" [ --output outfile ]
agat_sp_filter_feature_by_attribute_value.pl --help
```

## OPTIONS
Expand All @@ -37,10 +37,20 @@ agat_sp_select_feature_by_attribute_value.pl --help

- **--value**

Value to check in the attribute
Value(s) to check in the attribute. Case sensitive. List of values must be coma separated.

- **--value\_insensitive**

Bolean. Deactivated by default. When activated the values provided by the --value parameter are handled case insensitive.


- **-t** or **--test**
Test to apply (> < = >= <=). default value "=". If you use one of these two character >, <, please don't forget to quote you parameter liket that "<=". Else your terminal will complain.

Test to apply (> < = ! >= <=). default value "=".
If you use one of these two character >, <, please don't forget to quote the
parameter like that "<=" otherwise your terminal will complain.
Only = and ! tests can be used to compare string values.

- **-o** or **--output**

Output GFF file. If no output file is specified, the output will be
Expand Down
25 changes: 24 additions & 1 deletion lib/AGAT/Utilities.pm
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(exists_keys exists_undef_value get_proper_codon_table surround_text
sizedPrint activate_warning_limit print_time dual_print file_text_line print_wrap_text);
sizedPrint activate_warning_limit print_time dual_print file_text_line print_wrap_text
string_sep_to_hash);

sub import {
AGAT::Utilities->export_to_level(1, @_); # to be able to load the EXPORT functions when direct call; (normal case)
Expand Down Expand Up @@ -310,4 +311,26 @@ sub dual_print{
}
}

# @Purpose: transform a String with separator into hash
# @input: 2 => string, char (the char is the separator)
# @output 1 => hash
sub string_sep_to_hash {
my $sub_name = (caller(0))[3];
# -------------- INPUT --------------
my ($args) = @_;
# Check we receive a hash as ref
if(ref($args) ne 'HASH'){ warn "Hash Arguments expected for $sub_name. Please check the call.\n";exit; }
# Fill the parameters
my ($string, $separator);
if( defined($args->{string})) {$string = $args->{string};} else{ print "String parameter mandatory to use $sub_name!"; exit; }
if( defined($args->{separator})) {$separator = $args->{separator};} else{ $separator = " ";}

my %hash_result;
my @values = split(/$separator/, $string);
foreach my $value (@values){
$hash_result{$value}++;
}
return \%hash_result;
}

1;

0 comments on commit b3ac370

Please sign in to comment.