From 14c7c453a2ae3fda94f54a3714350c7496583235 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Sat, 3 Feb 2024 15:45:32 -0600 Subject: [PATCH] add option for filenames with param values --- partition-before-pggb | 25 ++++++++++++++++++++----- pggb | 25 ++++++++++++++++++++----- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/partition-before-pggb b/partition-before-pggb index 31b13f1..a6142f1 100755 --- a/partition-before-pggb +++ b/partition-before-pggb @@ -27,7 +27,7 @@ exclude_delim="#" # seqwish's default values MIN_MATCH_LENGTH=23 SPARSE_FACTOR=0 -TRANSCLOSE_BATCH=10000000 +TRANSCLOSE_BATCH=10M # seqwish's parameters min_match_length=$MIN_MATCH_LENGTH @@ -80,6 +80,7 @@ threads=$THREADS poa_threads=0 keep_intermediate_files=false compress=false +names_with_params=false show_version=false show_help=false @@ -118,7 +119,7 @@ fi # read the options cmd=$0" "$@ -TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"` +TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,names-with-params,vcf-spec:,version -n 'pggb' -- "$@"` eval set -- "$TEMP" # extract options and their arguments into variables. @@ -163,6 +164,7 @@ while true ; do -T|--poa-threads) poa_threads=$2 ; shift 2 ;; -A|--keep-temp-files) keep_intermediate_files=true ; shift ;; -Z|--compress) compress=true ; shift ;; + --names-with-params) names_with_params=true ; shift ;; --version) show_version=true ; shift ;; -h|--help) show_help=true ; shift ;; --) shift ; break ;; @@ -249,6 +251,7 @@ if [ "$show_help" == true ]; then echo " -A, --keep-temp-files keep intermediate graphs" echo " -Z, --compress compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs with pigz," echo " and variant (.vcf) outputs with bgzip" + echo " --names-with-params put parameter values in filenames, instead of hashes" echo " --version display the version of pggb" echo " -h, --help this text" echo @@ -347,14 +350,22 @@ else fi if [[ "$input_paf" == false ]]; then - prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7) + if [[ "$names_with_params" == true ]]; then + prefix_paf="$input_fasta"."$paf_spec" + else + prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7) + fi else prefix_paf="$input_paf" fi # Graph induction -prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7) +if [[ "$names_with_params" == true ]]; then + prefix_seqwish="$prefix_paf".k$min_match_length-f$sparse_factor-B$transclose_batch +else + prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7) +fi # poa param suggestions from minimap2 # - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence @@ -379,7 +390,11 @@ else fi block_id_min=$(echo "scale=4; $map_pct_id / 100.0" | bc) -prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth +if [[ "$names_with_params" == true ]]; then + prefix_smoothed="$prefix_seqwish".h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding.smooth +else + prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth +fi prefix_smoothed_output="$prefix_smoothed" diff --git a/pggb b/pggb index 1ff8306..3c293bb 100755 --- a/pggb +++ b/pggb @@ -27,7 +27,7 @@ exclude_delim="#" # seqwish's default values MIN_MATCH_LENGTH=23 SPARSE_FACTOR=0 -TRANSCLOSE_BATCH=10000000 +TRANSCLOSE_BATCH=10M # seqwish's parameters min_match_length=$MIN_MATCH_LENGTH @@ -80,6 +80,7 @@ threads=$THREADS poa_threads=0 keep_intermediate_files=false compress=false +names_with_params=false show_version=false show_help=false @@ -118,7 +119,7 @@ fi # read the options cmd=$0" "$@ -TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"` +TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,names-with-params,vcf-spec:,version -n 'pggb' -- "$@"` eval set -- "$TEMP" # extract options and their arguments into variables. @@ -163,6 +164,7 @@ while true ; do -T|--poa-threads) poa_threads=$2 ; shift 2 ;; -A|--keep-temp-files) keep_intermediate_files=true ; shift ;; -Z|--compress) compress=true ; shift ;; + --names-with-params) names_with_params=true ; shift ;; --version) show_version=true ; shift ;; -h|--help) show_help=true ; shift ;; --) shift ; break ;; @@ -249,6 +251,7 @@ if [ "$show_help" == true ]; then echo " -A, --keep-temp-files keep intermediate graphs" echo " -Z, --compress compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs with pigz," echo " and variant (.vcf) outputs with bgzip" + echo " --names-with-params put parameter values in filenames, instead of hashes" echo " --version display the version of pggb" echo " -h, --help this text" echo @@ -347,14 +350,22 @@ else fi if [[ "$input_paf" == false ]]; then - prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7) + if [[ "$names_with_params" == true ]]; then + prefix_paf="$input_fasta"."$paf_spec" + else + prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7) + fi else prefix_paf="$input_paf" fi # Graph induction -prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7) +if [[ "$names_with_params" == true ]]; then + prefix_seqwish="$prefix_paf".k$min_match_length-f$sparse_factor-B$transclose_batch +else + prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7) +fi # poa param suggestions from minimap2 # - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence @@ -379,7 +390,11 @@ else fi block_id_min=$(echo "scale=4; $map_pct_id / 100.0" | bc) -prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth +if [[ "$names_with_params" == true ]]; then + prefix_smoothed="$prefix_seqwish".h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding.smooth +else + prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth +fi prefix_smoothed_output="$prefix_smoothed"