Skip to content

Commit

Permalink
update call to snpseq_metadata so tsv files are exported
Browse files Browse the repository at this point in the history
  • Loading branch information
b97pla committed May 16, 2024
1 parent c4a83ab commit 35ab4d7
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 13 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ runfolder, as well as combine project and sequencing run metadata and export to

- You will need python >=3.8

- The service will expect the [snpseq_metadata](https://github.com/Molmed/snpseq_metadata) Python package to be
- The service will expect the [snpseq_metadata](https://github.com/Molmed/snpseq_metadata) (>= v2.2.0) Python package to be
available in the environment (refer to the
[README](https://github.com/Molmed/snpseq_metadata/blob/main/README.md#installation) for installation instructions).

- The service needs the url of an accessible [snpseq-data](https://gitlab.snpseq.medsci.uu.se/shared/snpseq-data)
service.
- Unless data extracted from Clarity LIMS will be supplied in a separate json file, the service needs the url of an
accessible [snpseq-data](https://gitlab.snpseq.medsci.uu.se/shared/snpseq-data) service.

### Deploy

Expand Down
4 changes: 2 additions & 2 deletions metadata_service/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ def export_runfolder_metadata(self, runfolder_extract, snpseq_data_extract, outd
f"--outdir {outdir} " \
f"{runfolder_extract} " \
f"{snpseq_data_extract} " \
f"xml"
f"xml tsv"
self.run_process(cmdline)
return [
os.path.join(
outdir,
xmlfile)
for xmlfile in os.listdir(outdir)
if xmlfile.endswith(".xml")
if xmlfile.endswith(".xml") or xmlfile.endswith(".tsv")
]
25 changes: 17 additions & 8 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def export_runfolder_metadata(self, *args):
projects[i] = get_projects_from_jsonfile(jsonfile)

for srcfile in filter(
lambda f: f.endswith(".xml"),
lambda f: f.endswith(".xml") or f.endswith(".tsv"),
os.listdir(srcdir)):
outfile = srcfile
for prj_s, prj_c in zip(projects[0], projects[1]):
Expand Down Expand Up @@ -220,13 +220,22 @@ async def _export_helper(
# pass the name of the lims cache json file as a query parameter
request_url = f"{request_url}?lims_data={os.path.basename(test_snpseq_data_path)}"

expected_files = sorted([
os.path.join(
metadatadir,
f"{prj}-{typ}.xml")
for prj in projects[1]
for typ in ["experiment", "run"]
])
expected_files = []
for prj in projects[1]:
expected_files.append(
os.path.join(
metadatadir,
f"{prj}.metadata.ena.tsv"
)
)
for typ in ["experiment", "run"]:
expected_files.append(
os.path.join(
metadatadir,
f"{prj}-{typ}.xml"
)
)
expected_files = sorted(expected_files)

resp = await cli.get(request_url)
json_resp = await resp.json()
Expand Down
8 changes: 8 additions & 0 deletions tests/test_data/AB-1234.metadata.ena.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FileType fastq Read submission file type
study sample design_description library_construction_protocol library_name library_strategy library_source library_selection library_layout insert_size instrument_model forward_file_name forward_file_md5 reverse_file_name reverse_file_md5
AB-1234 AB-1234-SampleB olink explore 1536 Sample_AB-1234-SampleB_AB-1234-SampleB_2-2283 OTHER OTHER PADLOCK_PROBES_CAPTURE_METHOD PAIRED 353 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleB/AB-1234-SampleA-1_S1_L001_R1_001.fastq.gz 8df33e868951c1e56b6831a2df0ff87d 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleB/AB-1234-SampleA-1_S1_L001_R2_001.fastq.gz 77bc05ab62ea44a3e5051e5bd558c126
AB-1234 AB-1234-SampleB olink explore 1536 Sample_AB-1234-SampleB_AB-1234-SampleB_2-2283 OTHER OTHER PADLOCK_PROBES_CAPTURE_METHOD PAIRED 353 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleB/AB-1234-SampleA-1_S1_L002_R1_001.fastq.gz 09d5a72ee196c5493562ce5890134519 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleB/AB-1234-SampleA-1_S1_L002_R2_001.fastq.gz cd664b5be5ac69168c92e7c9002f7f1e
AB-1234 AB-1234-SampleA-1 TruSeq DNA PCR-Free Sample Preparation kit LT Sample_AB-1234-SampleA-1_AB-1234-SampleA-1_2-2271 WGS GENOMIC RANDOM PAIRED 351 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-1/AB-1234-SampleA-1_S2_L001_R1_001.fastq.gz 55a35bd5ee20f507368bca2eb14b72d8 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-1/AB-1234-SampleA-1_S2_L001_R2_001.fastq.gz e34c9ddcd657d59808981796718295e5
AB-1234 AB-1234-SampleA-1 TruSeq DNA PCR-Free Sample Preparation kit LT Sample_AB-1234-SampleA-1_AB-1234-SampleA-1_2-2271 WGS GENOMIC RANDOM PAIRED 351 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-1/AB-1234-SampleA-1_S2_L002_R1_001.fastq.gz 2e8abc3dce5dd0faeeeda7210b013507 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-1/AB-1234-SampleA-1_S2_L002_R2_001.fastq.gz 0e172ee34f38c7af2613df53b98e8a09
AB-1234 AB-1234-SampleA-2 thruplex smarter dna-seq kit Sample_AB-1234-SampleA-2_AB-1234-SampleA-2_2-2272 WGS GENOMIC RANDOM PAIRED 352 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-2/AB-1234-SampleA-2_S3_L001_R1_001.fastq.gz 55a35bd5ee20f507368bca2eb14b72d8 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-2/AB-1234-SampleA-2_S3_L001_R2_001.fastq.gz e34c9ddcd657d59808981796718295e5
AB-1234 AB-1234-SampleA-2 thruplex smarter dna-seq kit Sample_AB-1234-SampleA-2_AB-1234-SampleA-2_2-2272 WGS GENOMIC RANDOM PAIRED 352 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-2/AB-1234-SampleA-2_S3_L002_R1_001.fastq.gz 2e8abc3dce5dd0faeeeda7210b013507 210415_A00001_0123_BXYZ321XY/Unaligned/AB-1234/Sample_AB-1234-SampleA-2/AB-1234-SampleA-2_S3_L002_R2_001.fastq.gz 0e172ee34f38c7af2613df53b98e8a09
9 changes: 9 additions & 0 deletions tests/test_data/CD-5678.metadata.ena.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FileType fastq Read submission file type
study sample design_description library_construction_protocol library_name library_strategy library_source library_selection library_layout insert_size instrument_model forward_file_name forward_file_md5 reverse_file_name reverse_file_md5
Project_CD-5678 CD-5678-SampleA-1 twist human core exome Sample_CD-5678-SampleA-1_CD-5678-SampleA-1_2-2274 TARGETED_CAPTURE GENOMIC HYBRID_SELECTION PAIRED 354 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-1/CD-5678-SampleA-1_S4_L001_R1_001.fastq.gz bc9ced62f0b4652cfe7c9087d6e021d6 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-1/CD-5678-SampleA-1_S4_L001_R2_001.fastq.gz 04ddbf4edfebb115c975921dcef41eee
Project_CD-5678 CD-5678-SampleA-1 twist human core exome Sample_CD-5678-SampleA-1_CD-5678-SampleA-1_2-2274 TARGETED_CAPTURE GENOMIC HYBRID_SELECTION PAIRED 354 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-1/CD-5678-SampleA-1_S4_L003_R1_001.fastq.gz 4d2b9097ba63d360ba4b3503d1ff688a 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-1/CD-5678-SampleA-1_S4_L003_R2_001.fastq.gz afc6fc392eb0604ffc533300b53e4266
Project_CD-5678 CD-5678-SampleA-2 nebnext enzymatic methyl-seq kit Sample_CD-5678-SampleA-2_CD-5678-SampleA-2_2-2275 BISULFITE_SEQ GENOMIC RANDOM PAIRED 355 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-2/CD-5678-SampleA-2_S5_L001_R1_001.fastq.gz fb2d74511d69ab4200049939d307d3ee 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-2/CD-5678-SampleA-2_S5_L001_R2_001.fastq.gz 7b368131f221cfe8c130f8e9ac28f7f8
Project_CD-5678 CD-5678-SampleA-2 nebnext enzymatic methyl-seq kit Sample_CD-5678-SampleA-2_CD-5678-SampleA-2_2-2275 BISULFITE_SEQ GENOMIC RANDOM PAIRED 355 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-2/CD-5678-SampleA-2_S5_L003_R1_001.fastq.gz 8c9dc5c0a556a6a87d336b3f2892d95f 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleA-2/CD-5678-SampleA-2_S5_L003_R2_001.fastq.gz 311238fb068964e201a294797d7a9b12
Project_CD-5678 CD-5678-SampleB TruSeq DNA PCR-Free Sample Preparation kit LT Sample_CD-5678-SampleB_CD-5678-SampleB_2-2276 WGS GENOMIC RANDOM PAIRED 356 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleB/CD-5678-SampleB_S6_L001_R1_001.fastq.gz 8820f6ff98ed4da12200ff472552d17d 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleB/CD-5678-SampleB_S6_L001_R2_001.fastq.gz c7e460d24f81375ab25a8a5485a6d279
Project_CD-5678 CD-5678-SampleB TruSeq DNA PCR-Free Sample Preparation kit LT Sample_CD-5678-SampleB_CD-5678-SampleB_2-2276 WGS GENOMIC RANDOM PAIRED 356 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleB/CD-5678-SampleB_S6_L003_R1_001.fastq.gz 37435fbea7ccbb85784b8fa79e49427b 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleB/CD-5678-SampleB_S6_L003_R2_001.fastq.gz 566006c970f7dcaf37441ccc31450d02
Project_CD-5678 CD-5678-SampleB TruSeq DNA PCR-Free Sample Preparation kit LT Sample_CD-5678-SampleB-2_CD-5678-SampleB_2-2277 WGS GENOMIC RANDOM PAIRED 347 Illumina NovaSeq 6000 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleB-2/CD-5678-SampleB_S7_L003_R1_001.fastq.gz 0b186113fa683201a10bd3f11cb24f7a 210415_A00001_0123_BXYZ321XY/Unaligned/Project_CD-5678/Sample_CD-5678-SampleB-2/CD-5678-SampleB_S7_L003_R2_001.fastq.gz 00cc3e3f589c57c38569445f68d773c0
3 changes: 3 additions & 0 deletions tests/test_data/EF-9012.metadata.ena.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FileType Read submission file type
study sample design_description library_construction_protocol library_name library_strategy library_source library_selection library_layout insert_size instrument_model forward_file_name forward_file_md5 reverse_file_name reverse_file_md5
EF-9012 EF-9012-608 TruSeq DNA PCR-Free Sample Preparation kit LT Sample_EF-9012-608_Sample_EF-9012-608 WGS GENOMIC RANDOM PAIRED 358 HiSeq X Ten

0 comments on commit 35ab4d7

Please sign in to comment.