Skip to content

Commit

Permalink
Merge pull request #74 from clinical-genomics-uppsala/clean_config_dir
Browse files Browse the repository at this point in the history
feat: redo sample order extraction
  • Loading branch information
padraicc authored Jun 24, 2024
2 parents d4e94f4 + 384ad67 commit aca2c26
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 58 deletions.
25 changes: 1 addition & 24 deletions config/sample_order.tsv
Original file line number Diff line number Diff line change
@@ -1,25 +1,2 @@
Sample Order Sample Name
sample_001 D22-07608
sample_002 D24-03890
sample_003 D24-03891
sample_004 D24-03892
sample_005 D20-05240
sample_006 D23-09349
sample_007 D23-09384
sample_008 D24-01401
sample_009 D24-01640
sample_010 D24-02504
sample_011 D24-03945
sample_012 D24-03888
sample_013 D24-03946
sample_014 D24-03889
sample_015 D24-03952
sample_016 D23-08138
sample_017 D24-03982
sample_018 D24-04023
sample_019 D24-04029
sample_020 D24-04028
sample_021 D24-03986
sample_022 D24-03987
sample_023 D24-03985
sample_024 D24-04082
sample_001 NA12878-1
25 changes: 1 addition & 24 deletions config/sample_replacement.tsv
Original file line number Diff line number Diff line change
@@ -1,24 +1 @@
D22-07608 sample_001
D24-03890 sample_002
D24-03891 sample_003
D24-03892 sample_004
D20-05240 sample_005
D23-09349 sample_006
D23-09384 sample_007
D24-01401 sample_008
D24-01640 sample_009
D24-02504 sample_010
D24-03945 sample_011
D24-03888 sample_012
D24-03946 sample_013
D24-03889 sample_014
D24-03952 sample_015
D23-08138 sample_016
D24-03982 sample_017
D24-04023 sample_018
D24-04029 sample_019
D24-04028 sample_020
D24-03986 sample_021
D24-03987 sample_022
D24-03985 sample_023
D24-04082 sample_024
NA12878-1 sample_001
16 changes: 6 additions & 10 deletions scripts/extract_samples_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,13 @@ def get_sample_sheet_order(fastq_path):

fq1_name = os.path.basename(fastq_path)
# get the 's#' part of the illumina fastq file name
sample_order = fq1_name.split('_')[2]
fq_filename = fq1_name.split('_')[1:] # fq filename without sample id
sample_order = [i for i in fq_filename if i.startswith('S')][0]
numeric_order = int(sample_order[1:])

return numeric_order


def format_sample_order(numeric_order):

return f"sample_{numeric_order:03}"


def main(samples_file, units_file, order_file, replacement_file):

try:
Expand All @@ -100,7 +96,7 @@ def main(samples_file, units_file, order_file, replacement_file):
except ValueError: # manually create cols with NAs when no trio present
samples["trioid"] = ["NA"] * samples.shape[0]
samples["trio_member"] = ["NA"] * samples.shape[0]

# get the trio member in english
samples["trio_member"] = samples.apply(
lambda x: translate_trio_member(x.trio_member, x.sex), axis=1)
Expand All @@ -124,8 +120,8 @@ def main(samples_file, units_file, order_file, replacement_file):
["sample_order", "sample"]].drop_duplicates().sort_values(
by="sample_order")

sample_order_df["sample_order"] = sample_order_df["sample_order"].apply(
format_sample_order)
sample_order_df["sample_order"] = [
f"sample_{i:03}" for i in range(1, sample_order_df.shape[0] + 1)]

sample_order_df = sample_order_df.rename(
columns={"sample_order": "Sample Order", "sample": "Sample Name"})
Expand All @@ -152,4 +148,4 @@ def main(samples_file, units_file, order_file, replacement_file):
help='Path to the units file')
args = parser.parse_args()
main(args.samples_file, args.units_file, args.sample_order,
args.sample_replacement)
args.sample_replacement)

0 comments on commit aca2c26

Please sign in to comment.