From b36ab2d29a3015b880be57705d9fff3c3b36c195 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Wed, 9 Aug 2023 14:00:57 +0200 Subject: [PATCH] check PanSN in the input FASTA --- partition-before-pggb | 16 ++++++++++++++-- pggb | 22 ++++++++++++++++++---- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/partition-before-pggb b/partition-before-pggb index 1489f85..de252d3 100755 --- a/partition-before-pggb +++ b/partition-before-pggb @@ -157,12 +157,12 @@ fi # Mandatory parameters if [[ "$input_fasta" == false || $n_mappings == false ]]; then show_help=true - >&2 echo "ERROR: mandatory arguments -i and -n" + >&2 echo "[pggb] ERROR: mandatory arguments -i and -n" fi if (( "$n_mappings" < 2 )); then show_help=true - >&2 echo "ERROR: -n must be greater than or equal to 2" + >&2 echo "[pggb] ERROR: -n must be greater than or equal to 2" fi if [ $show_help == true ]; then @@ -432,6 +432,18 @@ reporting: multiqc: $multiqc EOT +# Check Pangenome Sequence Naming (PanSN) +if [ ! -f "${input_fasta}.fai" ]; then + echo "[pggb] ERROR: Index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'." + exit 1 +fi +warning_emitted=0 +cut -f 1 "${input_fasta}.fai" | while read -r line; do + if [[ ! $line =~ ^([^#]+#)+[^#]+$ ]] && [[ $warning_emitted -eq 0 ]]; then + echo "[pggb] Warning: there are sequence names (like '$line') that do not match the Pangenome Sequence Naming (PanSN)." + warning_emitted=1 + fi +done #------------------------------------------------------------------------------- echo -e "\nRunning partitioning\n" >> "$log_file" diff --git a/pggb b/pggb index 821ee80..4ca3319 100755 --- a/pggb +++ b/pggb @@ -97,7 +97,7 @@ parse_numeric() { echo $value return 0 else - echo "ERROR: Invalid input" >&2 + echo "[pggb] ERROR: Invalid input" >&2 return 1 fi fi @@ -106,7 +106,7 @@ parse_numeric() { M|m) value=$(echo "$value * 1000000" | bc) ;; G|g) value=$(echo "$value * 1000000000" | bc) ;; T|t) value=$(echo "$value * 1000000000000" | bc) ;; - *) echo "ERROR: Invalid suffix or unsupported suffix. Supported metric suffixes are k, K, m, M, g, G, t, T." >&2; return 1 ;; + *) echo "[pggb] ERROR: Invalid suffix or unsupported suffix. Supported metric suffixes are k, K, m, M, g, G, t, T." >&2; return 1 ;; esac printf "%.0f" $value } @@ -180,12 +180,12 @@ fi # Mandatory parameters if [[ "$input_fasta" == false || $n_mappings == false ]]; then show_help=true - >&2 echo "ERROR: mandatory arguments -i and -n" + >&2 echo "[pggb] ERROR: mandatory arguments -i and -n" fi if (( "$n_mappings" < 2 )); then show_help=true - >&2 echo "ERROR: -n must be greater than or equal to 2" + >&2 echo "[pggb] ERROR: -n must be greater than or equal to 2" fi if [ $show_help == true ]; then @@ -456,6 +456,20 @@ EOT echo -e "\nRunning pggb\n" >> "$log_file" + +# Check Pangenome Sequence Naming (PanSN) +if [ ! -f "${input_fasta}.fai" ]; then + echo "[pggb] ERROR: Index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'." + exit 1 +fi +warning_emitted=0 +cut -f 1 "${input_fasta}.fai" | while read -r line; do + if [[ ! $line =~ ^([^#]+#)+[^#]+$ ]] && [[ $warning_emitted -eq 0 ]]; then + echo "[pggb] Warning: there are sequence names (like '$line') that do not match the Pangenome Sequence Naming (PanSN)." + warning_emitted=1 + fi +done + if [[ "$input_paf" == false ]]; then if [[ ! -s "$prefix_paf".alignments.$mapper.paf || $resume == false ]]; then