Skip to content

Commit

Permalink
Merge pull request #372 from pangenome/naming
Browse files Browse the repository at this point in the history
add option for filenames with param values
  • Loading branch information
AndreaGuarracino authored Feb 3, 2024
2 parents 34de9a0 + 14c7c45 commit 6ffe7f9
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 10 deletions.
25 changes: 20 additions & 5 deletions partition-before-pggb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude_delim="#"
# seqwish's default values
MIN_MATCH_LENGTH=23
SPARSE_FACTOR=0
TRANSCLOSE_BATCH=10000000
TRANSCLOSE_BATCH=10M

# seqwish's parameters
min_match_length=$MIN_MATCH_LENGTH
Expand Down Expand Up @@ -80,6 +80,7 @@ threads=$THREADS
poa_threads=0
keep_intermediate_files=false
compress=false
names_with_params=false
show_version=false
show_help=false

Expand Down Expand Up @@ -118,7 +119,7 @@ fi

# read the options
cmd=$0" "$@
TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"`
TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,names-with-params,vcf-spec:,version -n 'pggb' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
Expand Down Expand Up @@ -163,6 +164,7 @@ while true ; do
-T|--poa-threads) poa_threads=$2 ; shift 2 ;;
-A|--keep-temp-files) keep_intermediate_files=true ; shift ;;
-Z|--compress) compress=true ; shift ;;
--names-with-params) names_with_params=true ; shift ;;
--version) show_version=true ; shift ;;
-h|--help) show_help=true ; shift ;;
--) shift ; break ;;
Expand Down Expand Up @@ -249,6 +251,7 @@ if [ "$show_help" == true ]; then
echo " -A, --keep-temp-files keep intermediate graphs"
echo " -Z, --compress compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs with pigz,"
echo " and variant (.vcf) outputs with bgzip"
echo " --names-with-params put parameter values in filenames, instead of hashes"
echo " --version display the version of pggb"
echo " -h, --help this text"
echo
Expand Down Expand Up @@ -347,14 +350,22 @@ else
fi

if [[ "$input_paf" == false ]]; then
prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7)
if [[ "$names_with_params" == true ]]; then
prefix_paf="$input_fasta"."$paf_spec"
else
prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7)
fi
else
prefix_paf="$input_paf"
fi


# Graph induction
prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7)
if [[ "$names_with_params" == true ]]; then
prefix_seqwish="$prefix_paf".k$min_match_length-f$sparse_factor-B$transclose_batch
else
prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7)
fi

# poa param suggestions from minimap2
# - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence
Expand All @@ -379,7 +390,11 @@ else
fi

block_id_min=$(echo "scale=4; $map_pct_id / 100.0" | bc)
prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth
if [[ "$names_with_params" == true ]]; then
prefix_smoothed="$prefix_seqwish".h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding.smooth
else
prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth
fi
prefix_smoothed_output="$prefix_smoothed"


Expand Down
25 changes: 20 additions & 5 deletions pggb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude_delim="#"
# seqwish's default values
MIN_MATCH_LENGTH=23
SPARSE_FACTOR=0
TRANSCLOSE_BATCH=10000000
TRANSCLOSE_BATCH=10M

# seqwish's parameters
min_match_length=$MIN_MATCH_LENGTH
Expand Down Expand Up @@ -80,6 +80,7 @@ threads=$THREADS
poa_threads=0
keep_intermediate_files=false
compress=false
names_with_params=false
show_version=false
show_help=false

Expand Down Expand Up @@ -118,7 +119,7 @@ fi

# read the options
cmd=$0" "$@
TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"`
TEMP=`getopt -o i:o:D:a:p:c:g:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,hg-filter-ani-diff:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,names-with-params,vcf-spec:,version -n 'pggb' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
Expand Down Expand Up @@ -163,6 +164,7 @@ while true ; do
-T|--poa-threads) poa_threads=$2 ; shift 2 ;;
-A|--keep-temp-files) keep_intermediate_files=true ; shift ;;
-Z|--compress) compress=true ; shift ;;
--names-with-params) names_with_params=true ; shift ;;
--version) show_version=true ; shift ;;
-h|--help) show_help=true ; shift ;;
--) shift ; break ;;
Expand Down Expand Up @@ -249,6 +251,7 @@ if [ "$show_help" == true ]; then
echo " -A, --keep-temp-files keep intermediate graphs"
echo " -Z, --compress compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs with pigz,"
echo " and variant (.vcf) outputs with bgzip"
echo " --names-with-params put parameter values in filenames, instead of hashes"
echo " --version display the version of pggb"
echo " -h, --help this text"
echo
Expand Down Expand Up @@ -347,14 +350,22 @@ else
fi

if [[ "$input_paf" == false ]]; then
prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7)
if [[ "$names_with_params" == true ]]; then
prefix_paf="$input_fasta"."$paf_spec"
else
prefix_paf="$input_fasta".$(echo "$paf_spec" | sha256sum | head -c 7)
fi
else
prefix_paf="$input_paf"
fi


# Graph induction
prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7)
if [[ "$names_with_params" == true ]]; then
prefix_seqwish="$prefix_paf".k$min_match_length-f$sparse_factor-B$transclose_batch
else
prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7)
fi

# poa param suggestions from minimap2
# - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence
Expand All @@ -379,7 +390,11 @@ else
fi

block_id_min=$(echo "scale=4; $map_pct_id / 100.0" | bc)
prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth
if [[ "$names_with_params" == true ]]; then
prefix_smoothed="$prefix_seqwish".h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding.smooth
else
prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth
fi
prefix_smoothed_output="$prefix_smoothed"


Expand Down

0 comments on commit 6ffe7f9

Please sign in to comment.