Molmed · b97pla · Apr 30, 2024 · May 11, 2023 · Apr 24, 2024
diff --git a/config/app.yaml b/config/app.yaml
@@ -7,7 +7,9 @@ port: 8345
 base_url: /api/1.0
 
 # the location on the server under which runfolders are stored
-datadir: tests/resources
+# the parameters "host" and "runfolder" can be used inside curly brackets (e.g. {host}) and will be substituted
+# for each request
+datadir: tests/resources/{host}/runfolders/{runfolder}
 
 # the url where the snpseq-data service can be accessed
 snpseq_data_url: http://localhost:9191

diff --git a/metadata_service/handlers.py b/metadata_service/handlers.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import pathlib
+import shutil
 import tempfile
 
 import importlib.metadata
@@ -36,29 +37,50 @@ async def export(self, request):
         try:
             host = request.match_info["host"]
             runfolder = request.match_info["runfolder"]
+            lims_data = request.query.get("lims_data")
 
             runfolder_path = pathlib.Path(
-                request.app["config"].get("datadir", "."),
-                host,
-                "runfolders",
-                runfolder)
+                request.app["config"].get("datadir", ".").format(
+                    host=host,
+                    runfolder=runfolder
+                )
+            )
             metadata_export_path = os.path.join(runfolder_path, "metadata")
 
             with tempfile.TemporaryDirectory(prefix="extract", suffix="runfolder") as outdir:
-                runfolder_extract = self.process_runner.extract_runfolder_metadata(
-                    runfolder_path,
-                    outdir)
-                lims_data = await request.app['session'].request_snpseq_data_metadata(
-                    runfolder_path,
-                    outdir)
+                # unless a previous LIMS-export is passed as a parameter, do a request to the
+                # snpseq-data web service
+                if not lims_data:
+                    lims_data = await request.app['session'].request_snpseq_data_metadata(
+                        runfolder_path,
+                        outdir
+                    )
+                else:
+                    lims_data_src = pathlib.Path(
+                        metadata_export_path,
+                        lims_data
+                    )
+                    lims_data = pathlib.Path(
+                        outdir,
+                        lims_data
+                    )
+                    shutil.copy(lims_data_src, lims_data)
+
                 snpseq_data_extract = self.process_runner.extract_snpseq_data_metadata(
                     lims_data,
-                    outdir)
+                    outdir
+                )
+
+                runfolder_extract = self.process_runner.extract_runfolder_metadata(
+                    runfolder_path,
+                    outdir
+                )
 
                 metadata_export = self.process_runner.export_runfolder_metadata(
                         runfolder_extract,
                         snpseq_data_extract,
-                        metadata_export_path)
+                        metadata_export_path
+                )
 
             return aiohttp.web.json_response({'metadata': metadata_export}, status=200)
         except Exception as ex:

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ include = ["metadata_service*"]
 
 [project]
 name = "metadata-service"
-version = "1.0.0"
+version = "1.1.0"
 authors = [
     {name = "SNP&SEQ Technology Platform, Uppsala University", email = "[email protected]" },
 ]

diff --git a/tests/config/app.yaml b/tests/config/app.yaml
@@ -2,6 +2,6 @@
 
 port: 9345
 base_url: /api/1.0
-datadir: tests
+datadir: tests/{host}/runfolders/{runfolder}
 snpseq_data_url: http://localhost:9191
 snpseq_metadata_executable: /Users/pontus/Documents/code/snpseq_metadata/venv_/bin/snpseq_metadata
diff --git a/tests/test_app.py b/tests/test_app.py
@@ -60,8 +60,8 @@ async def snpseq_data_server(aiohttp_server, load_config):
     """
     async def snpseq_data(request):
         q = request.query
-        response_path = pathlib.Path(
-            f"{load_config['datadir']}/test_data/{q['name']}.lims.json")
+        fcid = q.get("name")
+        response_path = pathlib.Path("tests", "test_data", f"{fcid}.lims.json")
         with open(response_path) as fh:
             data = json.load(fh)
 
@@ -87,6 +87,18 @@ async def snpseq_data(request):
     yield await aiohttp_server(app, port=int(port))
 
 
+def get_projects_from_jsonfile(jsonfile, pattern=None):
+    projects = []
+    pattern = pattern or r'"project(?:_id)?": "(\w{2}-\d{4})"'
+    with open(jsonfile, "r") as fh:
+        for line in fh:
+            m = re.search(pattern, line)
+            if m is not None:
+                projects.append(m.group(1))
+
+    return list(sorted(list(set(projects))))
+
+
 class SnpseqDataTestRequest(metadata_service.clients.SnpseqDataRequest):
 
     async def external_session(self, app):
@@ -125,13 +137,26 @@ def export_runfolder_metadata(self, *args):
         outdir = args[-1]
         srcdir = os.path.join("tests", "test_data")
         outfiles = []
+
+        # extract the "original" project names from the lims export and the "tweaked" from the lims
+        # extract
+        projects = [[], []]
+        for i, jsonfile in enumerate((args[1], args[1].replace(".ngi.json", ".json"))):
+            projects[i] = get_projects_from_jsonfile(jsonfile)
+
         for srcfile in filter(
                 lambda f: f.endswith(".xml"),
                 os.listdir(srcdir)):
-            outfiles.append(os.path.join(outdir, srcfile))
+            outfile = srcfile
+            for prj_s, prj_c in zip(projects[0], projects[1]):
+                outfile = outfile.replace(prj_s, prj_c)
+            outfiles.append(
+                os.path.join(outdir, outfile)
+            )
             shutil.copy(
                 os.path.join(srcdir, srcfile),
-                outfiles[-1])
+                outfiles[-1]
+            )
         return outfiles
 
 
@@ -143,28 +168,67 @@ async def test_version(cli):
     assert ver["version"] == importlib.metadata.version('metadata-service')
 
 
-async def test_export(snpseq_data_server, cli, test_runfolder):
+async def _export_helper(
+        snpseq_data_server,
+        cli,
+        test_runfolder,
+        test_snpseq_data_path,
+        lims_data_cache
+):
     base_url = cli.server.app["config"].get("base_url", "")
     datadir = cli.server.app["config"]["datadir"]
     host = "test_data"
     runfolder = test_runfolder
+    datadir = datadir.format(
+        host=host,
+        runfolder=runfolder
+    )
     metadatadir = os.path.join(
         datadir,
-        host,
-        "runfolders",
-        runfolder,
         "metadata")
 
     shutil.rmtree(metadatadir, ignore_errors=True)
+
+    request_url = f"{base_url}/export/{host}/{runfolder}"
+    projects = [[], []]
+    projects[0] = get_projects_from_jsonfile(test_snpseq_data_path)
+    projects[1] = list(projects[0])
+
+    if lims_data_cache:
+        os.makedirs(metadatadir)
+
+        # copy the LIMS export file to the metadata dir
+        lims_data = os.path.join(
+            metadatadir,
+            os.path.basename(test_snpseq_data_path)
+        )
+
+        # tweak the project names so that we can make sure that the cache was used and not
+        # the mocked snpseq-data file
+        projects[1] = [
+            "-".join([
+                prj.split("-")[0][::-1],
+                prj.split("-")[1][::-1]
+            ]) for prj in projects[0]
+        ]
+        with open(test_snpseq_data_path, "r") as rh, open(lims_data, "w") as wh:
+            for line in rh:
+                for prj_s, prj_c in zip(*projects):
+                    line = line.replace(prj_s, prj_c)
+                wh.write(line)
+
+        # pass the name of the lims cache json file as a query parameter
+        request_url = f"{request_url}?lims_data={os.path.basename(test_snpseq_data_path)}"
+
     expected_files = sorted([
         os.path.join(
             metadatadir,
             f"{prj}-{typ}.xml")
-        for prj in ["AB-1234", "CD-5678", "EF-9012"]
+        for prj in projects[1]
         for typ in ["experiment", "run"]
     ])
 
-    resp = await cli.get(f"{base_url}/export/{host}/{runfolder}")
+    resp = await cli.get(request_url)
     json_resp = await resp.json()
 
     assert resp.status == 200
@@ -175,3 +239,33 @@ async def test_export(snpseq_data_server, cli, test_runfolder):
         assert os.path.exists(metafile)
 
     shutil.rmtree(metadatadir)
+
+
+async def test_export_with_lims_api(
+        snpseq_data_server,
+        cli,
+        test_runfolder,
+        test_snpseq_data_path
+):
+    await _export_helper(
+        snpseq_data_server,
+        cli,
+        test_runfolder,
+        test_snpseq_data_path,
+        lims_data_cache=False,
+    )
+
+
+async def test_export_with_lims_cache(
+        snpseq_data_server,
+        cli,
+        test_runfolder,
+        test_snpseq_data_path
+):
+    await _export_helper(
+        snpseq_data_server,
+        cli,
+        test_runfolder,
+        test_snpseq_data_path,
+        lims_data_cache=True,
+    )
diff --git a/tests/test_clients.py b/tests/test_clients.py
@@ -22,7 +22,9 @@ async def test_snpseq_data_client(
         test_runfolder,
         test_snpseq_data_path,
         test_snpseq_data_json):
-    rq = SnpseqDataRequest(external_url=f"http://{snpseq_data_server.host}:{snpseq_data_server.port}")
+    rq = SnpseqDataRequest(
+        external_url=f"http://{snpseq_data_server.host}:{snpseq_data_server.port}"
+    )
     rq.session = aiohttp.ClientSession(rq.external_url)
     flowcell_id = rq.flowcellid_from_runfolder(test_runfolder)
     request_urls = (