From b6558a56790f16d941c637999cf553bd23e5eacf Mon Sep 17 00:00:00 2001 From: padraicc Date: Mon, 24 Jun 2024 16:02:25 +0200 Subject: [PATCH 1/2] feat: redo sample order extraction --- config/sample_order.tsv | 25 ------------------------- config/sample_replacement.tsv | 24 ------------------------ scripts/extract_samples_info.py | 16 ++++++---------- 3 files changed, 6 insertions(+), 59 deletions(-) delete mode 100644 config/sample_order.tsv delete mode 100644 config/sample_replacement.tsv diff --git a/config/sample_order.tsv b/config/sample_order.tsv deleted file mode 100644 index 133418a..0000000 --- a/config/sample_order.tsv +++ /dev/null @@ -1,25 +0,0 @@ -Sample Order Sample Name -sample_001 D22-07608 -sample_002 D24-03890 -sample_003 D24-03891 -sample_004 D24-03892 -sample_005 D20-05240 -sample_006 D23-09349 -sample_007 D23-09384 -sample_008 D24-01401 -sample_009 D24-01640 -sample_010 D24-02504 -sample_011 D24-03945 -sample_012 D24-03888 -sample_013 D24-03946 -sample_014 D24-03889 -sample_015 D24-03952 -sample_016 D23-08138 -sample_017 D24-03982 -sample_018 D24-04023 -sample_019 D24-04029 -sample_020 D24-04028 -sample_021 D24-03986 -sample_022 D24-03987 -sample_023 D24-03985 -sample_024 D24-04082 diff --git a/config/sample_replacement.tsv b/config/sample_replacement.tsv deleted file mode 100644 index 6f6dc85..0000000 --- a/config/sample_replacement.tsv +++ /dev/null @@ -1,24 +0,0 @@ -D22-07608 sample_001 -D24-03890 sample_002 -D24-03891 sample_003 -D24-03892 sample_004 -D20-05240 sample_005 -D23-09349 sample_006 -D23-09384 sample_007 -D24-01401 sample_008 -D24-01640 sample_009 -D24-02504 sample_010 -D24-03945 sample_011 -D24-03888 sample_012 -D24-03946 sample_013 -D24-03889 sample_014 -D24-03952 sample_015 -D23-08138 sample_016 -D24-03982 sample_017 -D24-04023 sample_018 -D24-04029 sample_019 -D24-04028 sample_020 -D24-03986 sample_021 -D24-03987 sample_022 -D24-03985 sample_023 -D24-04082 sample_024 diff --git a/scripts/extract_samples_info.py b/scripts/extract_samples_info.py index 0376093..a5aff7b 100644 --- a/scripts/extract_samples_info.py +++ b/scripts/extract_samples_info.py @@ -70,17 +70,13 @@ def get_sample_sheet_order(fastq_path): fq1_name = os.path.basename(fastq_path) # get the 's#' part of the illumina fastq file name - sample_order = fq1_name.split('_')[2] + fq_filename = fq1_name.split('_')[1:] # fq filename without sample id + sample_order = [i for i in fq_filename if i.startswith('S')][0] numeric_order = int(sample_order[1:]) return numeric_order -def format_sample_order(numeric_order): - - return f"sample_{numeric_order:03}" - - def main(samples_file, units_file, order_file, replacement_file): try: @@ -100,7 +96,7 @@ def main(samples_file, units_file, order_file, replacement_file): except ValueError: # manually create cols with NAs when no trio present samples["trioid"] = ["NA"] * samples.shape[0] samples["trio_member"] = ["NA"] * samples.shape[0] - + # get the trio member in english samples["trio_member"] = samples.apply( lambda x: translate_trio_member(x.trio_member, x.sex), axis=1) @@ -124,8 +120,8 @@ def main(samples_file, units_file, order_file, replacement_file): ["sample_order", "sample"]].drop_duplicates().sort_values( by="sample_order") - sample_order_df["sample_order"] = sample_order_df["sample_order"].apply( - format_sample_order) + sample_order_df["sample_order"] = [ + f"sample_{i:03}" for i in range(1, sample_order_df.shape[0] + 1)] sample_order_df = sample_order_df.rename( columns={"sample_order": "Sample Order", "sample": "Sample Name"}) @@ -152,4 +148,4 @@ def main(samples_file, units_file, order_file, replacement_file): help='Path to the units file') args = parser.parse_args() main(args.samples_file, args.units_file, args.sample_order, - args.sample_replacement) + args.sample_replacement) \ No newline at end of file From 384ad67c48b43a5f9633478345b3ecdd8dc28284 Mon Sep 17 00:00:00 2001 From: padraicc Date: Mon, 24 Jun 2024 16:05:21 +0200 Subject: [PATCH 2/2] feat: add new sample order and replacement example files --- config/sample_order.tsv | 2 ++ config/sample_replacement.tsv | 1 + 2 files changed, 3 insertions(+) create mode 100644 config/sample_order.tsv create mode 100644 config/sample_replacement.tsv diff --git a/config/sample_order.tsv b/config/sample_order.tsv new file mode 100644 index 0000000..e7d6b6d --- /dev/null +++ b/config/sample_order.tsv @@ -0,0 +1,2 @@ +Sample Order Sample Name +sample_001 NA12878-1 diff --git a/config/sample_replacement.tsv b/config/sample_replacement.tsv new file mode 100644 index 0000000..ca0bde6 --- /dev/null +++ b/config/sample_replacement.tsv @@ -0,0 +1 @@ +NA12878-1 sample_001