From 8e5515d42337eb534ab8aac851b7f4f3b4bea488 Mon Sep 17 00:00:00 2001 From: Li Date: Sun, 11 Dec 2022 21:16:37 -0500 Subject: [PATCH] Make the minimum number of matches required for a valid read-contig overlap an option --- README.md | 1 + main.cpp | 12 ++++++++++++ run-trust4 | 8 +++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1afba76..7010d35 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ TRUST4 is also available form [Bioconda](https://anaconda.org/bioconda/trust4). --UMI STRING: if -b, bam field for UMI; if -1 -2/-u, file containing UMIs (default: not used) --umiRange INT INT CHAR: start, end(-1 for lenght-1), strand in a UMI is the true UMI (default: 0 -1 +) --barcode-level STRING: barcode is for cell or molecule (default: cell) + --minHitLen INT: the minimal hit length for a valid overlap (default: auto) --mateIdSuffixLen INT: the suffix length in read id for mate. (default: not used) --skipMateExtension: do not extend assemblies with mate information, useful for SMART-seq (default: not used) --abnormalUnmapFlag: the flag in BAM for the unmapped read-pair is nonconcordant (default: not set) diff --git a/main.cpp b/main.cpp index 72c6771..c999ad7 100644 --- a/main.cpp +++ b/main.cpp @@ -26,6 +26,7 @@ char usage[] = "./trust4 [OPTIONS]:\n" "\t-t INT: number of threads (default: 1)\n" "\t-c STRING: the path to the kmer count file\n" "\t-k INT: the starting k-mer size for indexing contigs (default: 9)\n" + "\t--minHitLen INT: the minimal hit length for a valid overlap (default: auto)\n" "\t--skipMateExtension: skip the step of extension assemblies with mate-pair information\n" ///"\t--noV: do not assemble the full length V gene (default: not used)\n" "\t--trimLevel INT: 0: no trim; 1: trim low quality; 2: trim unmatched (default: 1)\n" @@ -50,6 +51,7 @@ static struct option long_options[] = { { "keepNoBarcode", no_argument, 0, 10003 }, { "UMI", required_argument, 0, 10004}, { "skipMateExtension", no_argument, 0, 10005}, + { "minHitLen", required_argument, 0, 10006}, { (char *)0, 0, 0, 0} } ; @@ -227,6 +229,7 @@ int main( int argc, char *argv[] ) int maxReadLen = -1 ; int firstReadLen = -1 ; int trimLevel = 1 ; + int minHitLen = -1 ; bool hasMate = false ; bool hasBarcode = false ; bool hasUmi = false ; @@ -306,6 +309,10 @@ int main( int argc, char *argv[] ) { skipMateExtension = true ; } + else if ( c == 10006 ) // minHitLen + { + minHitLen = atoi( optarg ) ; + } else { fprintf( stderr, "%s", usage ) ; @@ -1075,6 +1082,11 @@ int main( int argc, char *argv[] ) else seqSet.SetHitLenRequired( 17 ) ; } + + if (minHitLen != -1) + { + seqSet.SetHitLenRequired( minHitLen ) ; + } if ( firstReadLen > 200 || trimLevel > 1 ) changeKmerLengthThreshold /= 2 ; diff --git a/run-trust4 b/run-trust4 index e662188..b2e38de 100755 --- a/run-trust4 +++ b/run-trust4 @@ -7,7 +7,7 @@ use Cwd 'cwd' ; use Cwd 'abs_path' ; use File::Basename ; -die "TRUST4 v1.0.8-r425 usage: ./run-trust4 [OPTIONS]:\n". +die "TRUST4 v1.0.8-r426 usage: ./run-trust4 [OPTIONS]:\n". "Required:\n". #"\t[Input]:\n". "\t-b STRING: path to bam file\n". @@ -29,6 +29,7 @@ die "TRUST4 v1.0.8-r425 usage: ./run-trust4 [OPTIONS]:\n". "\t--UMI STRING: if -b, bam field for UMI; if -1 -2/-u, file containing UMIs (default: not used)\n". "\t--umiRange INT INT CHAR: start, end(-1 for lenght-1), strand in a UMI is the true UMI (default: 0 -1 +)\n". "\t--barcodeLevel STRING: barcode is for cell or molecule (default: cell)". + "\t--minHitLen INT: the minimal hit length for a valid overlap (default: auto)\n". "\t--mateIdSuffixLen INT: the suffix length in read id for mate. (default: not used)\n". "\t--skipMateExtension: do not extend assemblies with mate information, useful for SMART-seq (default: not used)\n". "\t--abnormalUnmapFlag: the flag in BAM for the unmapped read-pair is nonconcordant (default: not set)\n". @@ -238,6 +239,11 @@ for ( $i = 0 ; $i < @ARGV ; ++$i ) $i += 3 ; } + elsif ( $ARGV[$i] eq "--minHitLen" ) + { + $mainArgs .= " ".$ARGV[$i]." ".$ARGV[$i + 1] ; + ++$i ; + } elsif ( $ARGV[$i] eq "--outputReadAssignment" ) { $outputReadAssignment = 1 ;