From f04f015353d46ed194953efa2008d050f7615438 Mon Sep 17 00:00:00 2001 From: b97pla Date: Wed, 17 Apr 2024 14:07:31 +0200 Subject: [PATCH] allow passing a cached snpseq-data json export to service --- config/app.yaml | 4 +- metadata_service/handlers.py | 50 ++++++++++----- pyproject.toml | 2 +- tests/config/app.yaml | 2 +- tests/test_app.py | 114 ++++++++++++++++++++++++++++++++--- 5 files changed, 145 insertions(+), 27 deletions(-) diff --git a/config/app.yaml b/config/app.yaml index 91d700b..e7c92bf 100644 --- a/config/app.yaml +++ b/config/app.yaml @@ -7,7 +7,9 @@ port: 8345 base_url: /api/1.0 # the location on the server under which runfolders are stored -datadir: tests/resources +# the parameters "host" and "runfolder" can be used inside curly brackets (e.g. {host}) and will be substituted +# for each request +datadir: tests/resources/{host}/runfolders/{runfolder} # the url where the snpseq-data service can be accessed snpseq_data_url: http://localhost:9191 diff --git a/metadata_service/handlers.py b/metadata_service/handlers.py index ad72ea1..fe2592b 100644 --- a/metadata_service/handlers.py +++ b/metadata_service/handlers.py @@ -2,6 +2,7 @@ import logging import os import pathlib +import shutil import tempfile import importlib.metadata @@ -36,29 +37,50 @@ async def export(self, request): try: host = request.match_info["host"] runfolder = request.match_info["runfolder"] + lims_data = request.query.get("lims_data") runfolder_path = pathlib.Path( - request.app["config"].get("datadir", "."), - host, - "runfolders", - runfolder) + request.app["config"].get("datadir", ".").format( + host=host, + runfolder=runfolder + ) + ) metadata_export_path = os.path.join(runfolder_path, "metadata") with tempfile.TemporaryDirectory(prefix="extract", suffix="runfolder") as outdir: - runfolder_extract = self.process_runner.extract_runfolder_metadata( - runfolder_path, - outdir) - lims_data = await request.app['session'].request_snpseq_data_metadata( - runfolder_path, - outdir) + # unless a previous LIMS-export is passed as a parameter, do a request to the + # snpseq-data web service + if not lims_data: + lims_data = await request.app['session'].request_snpseq_data_metadata( + runfolder_path, + outdir + ) + else: + lims_data_src = pathlib.Path( + metadata_export_path, + lims_data + ) + lims_data = pathlib.Path( + outdir, + lims_data + ) + shutil.copy(lims_data_src, lims_data) + snpseq_data_extract = self.process_runner.extract_snpseq_data_metadata( lims_data, - outdir) + outdir + ) + + runfolder_extract = self.process_runner.extract_runfolder_metadata( + runfolder_path, + outdir + ) metadata_export = self.process_runner.export_runfolder_metadata( - runfolder_extract, - snpseq_data_extract, - metadata_export_path) + runfolder_extract, + snpseq_data_extract, + metadata_export_path + ) return aiohttp.web.json_response({'metadata': metadata_export}, status=200) except Exception as ex: diff --git a/pyproject.toml b/pyproject.toml index e3ed5e8..1fb0ad9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ include = ["metadata_service*"] [project] name = "metadata-service" -version = "1.0.0" +version = "1.1.0" authors = [ {name = "SNP&SEQ Technology Platform, Uppsala University", email = "seq@medsci.uu.se" }, ] diff --git a/tests/config/app.yaml b/tests/config/app.yaml index 8ed4ef9..f8688e5 100644 --- a/tests/config/app.yaml +++ b/tests/config/app.yaml @@ -2,6 +2,6 @@ port: 9345 base_url: /api/1.0 -datadir: tests +datadir: tests/{host}/runfolders/{runfolder} snpseq_data_url: http://localhost:9191 snpseq_metadata_executable: /Users/pontus/Documents/code/snpseq_metadata/venv_/bin/snpseq_metadata diff --git a/tests/test_app.py b/tests/test_app.py index 9855e1e..7b63884 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -60,8 +60,8 @@ async def snpseq_data_server(aiohttp_server, load_config): """ async def snpseq_data(request): q = request.query - response_path = pathlib.Path( - f"{load_config['datadir']}/test_data/{q['name']}.lims.json") + fcid = q.get("name") + response_path = pathlib.Path("tests", "test_data", f"{fcid}.lims.json") with open(response_path) as fh: data = json.load(fh) @@ -87,6 +87,18 @@ async def snpseq_data(request): yield await aiohttp_server(app, port=int(port)) +def get_projects_from_jsonfile(jsonfile, pattern=None): + projects = [] + pattern = pattern or r'"project(?:_id)?": "(\w{2}-\d{4})"' + with open(jsonfile, "r") as fh: + for line in fh: + m = re.search(pattern, line) + if m is not None: + projects.append(m.group(1)) + + return list(sorted(list(set(projects)))) + + class SnpseqDataTestRequest(metadata_service.clients.SnpseqDataRequest): async def external_session(self, app): @@ -125,13 +137,26 @@ def export_runfolder_metadata(self, *args): outdir = args[-1] srcdir = os.path.join("tests", "test_data") outfiles = [] + + # extract the "original" project names from the lims export and the "tweaked" from the lims + # extract + projects = [[], []] + for i, jsonfile in enumerate((args[1], args[1].replace(".ngi.json", ".json"))): + projects[i] = get_projects_from_jsonfile(jsonfile) + for srcfile in filter( lambda f: f.endswith(".xml"), os.listdir(srcdir)): - outfiles.append(os.path.join(outdir, srcfile)) + outfile = srcfile + for prj_s, prj_c in zip(projects[0], projects[1]): + outfile = outfile.replace(prj_s, prj_c) + outfiles.append( + os.path.join(outdir, outfile) + ) shutil.copy( os.path.join(srcdir, srcfile), - outfiles[-1]) + outfiles[-1] + ) return outfiles @@ -143,28 +168,67 @@ async def test_version(cli): assert ver["version"] == importlib.metadata.version('metadata-service') -async def test_export(snpseq_data_server, cli, test_runfolder): +async def _export_helper( + snpseq_data_server, + cli, + test_runfolder, + test_snpseq_data_path, + lims_data_cache +): base_url = cli.server.app["config"].get("base_url", "") datadir = cli.server.app["config"]["datadir"] host = "test_data" runfolder = test_runfolder + datadir = datadir.format( + host=host, + runfolder=runfolder + ) metadatadir = os.path.join( datadir, - host, - "runfolders", - runfolder, "metadata") shutil.rmtree(metadatadir, ignore_errors=True) + + request_url = f"{base_url}/export/{host}/{runfolder}" + projects = [[], []] + projects[0] = get_projects_from_jsonfile(test_snpseq_data_path) + projects[1] = list(projects[0]) + + if lims_data_cache: + os.makedirs(metadatadir) + + # copy the LIMS export file to the metadata dir + lims_data = os.path.join( + metadatadir, + os.path.basename(test_snpseq_data_path) + ) + + # tweak the project names so that we can make sure that the cache was used and not + # the mocked snpseq-data file + projects[1] = [ + "-".join([ + prj.split("-")[0][::-1], + prj.split("-")[1][::-1] + ]) for prj in projects[0] + ] + with open(test_snpseq_data_path, "r") as rh, open(lims_data, "w") as wh: + for line in rh: + for prj_s, prj_c in zip(*projects): + line = line.replace(prj_s, prj_c) + wh.write(line) + + # pass the name of the lims cache json file as a query parameter + request_url = f"{request_url}?lims_data={os.path.basename(test_snpseq_data_path)}" + expected_files = sorted([ os.path.join( metadatadir, f"{prj}-{typ}.xml") - for prj in ["AB-1234", "CD-5678", "EF-9012"] + for prj in projects[1] for typ in ["experiment", "run"] ]) - resp = await cli.get(f"{base_url}/export/{host}/{runfolder}") + resp = await cli.get(request_url) json_resp = await resp.json() assert resp.status == 200 @@ -175,3 +239,33 @@ async def test_export(snpseq_data_server, cli, test_runfolder): assert os.path.exists(metafile) shutil.rmtree(metadatadir) + + +async def test_export_with_lims_api( + snpseq_data_server, + cli, + test_runfolder, + test_snpseq_data_path +): + await _export_helper( + snpseq_data_server, + cli, + test_runfolder, + test_snpseq_data_path, + lims_data_cache=False, + ) + + +async def test_export_with_lims_cache( + snpseq_data_server, + cli, + test_runfolder, + test_snpseq_data_path +): + await _export_helper( + snpseq_data_server, + cli, + test_runfolder, + test_snpseq_data_path, + lims_data_cache=True, + )