Skip to content

Commit

Permalink
allow passing a cached snpseq-data json export to service
Browse files Browse the repository at this point in the history
  • Loading branch information
b97pla committed Apr 17, 2024
1 parent 421ff3c commit f04f015
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 27 deletions.
4 changes: 3 additions & 1 deletion config/app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ port: 8345
base_url: /api/1.0

# the location on the server under which runfolders are stored
datadir: tests/resources
# the parameters "host" and "runfolder" can be used inside curly brackets (e.g. {host}) and will be substituted
# for each request
datadir: tests/resources/{host}/runfolders/{runfolder}

# the url where the snpseq-data service can be accessed
snpseq_data_url: http://localhost:9191
Expand Down
50 changes: 36 additions & 14 deletions metadata_service/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import pathlib
import shutil
import tempfile

import importlib.metadata
Expand Down Expand Up @@ -36,29 +37,50 @@ async def export(self, request):
try:
host = request.match_info["host"]
runfolder = request.match_info["runfolder"]
lims_data = request.query.get("lims_data")

runfolder_path = pathlib.Path(
request.app["config"].get("datadir", "."),
host,
"runfolders",
runfolder)
request.app["config"].get("datadir", ".").format(
host=host,
runfolder=runfolder
)
)
metadata_export_path = os.path.join(runfolder_path, "metadata")

with tempfile.TemporaryDirectory(prefix="extract", suffix="runfolder") as outdir:
runfolder_extract = self.process_runner.extract_runfolder_metadata(
runfolder_path,
outdir)
lims_data = await request.app['session'].request_snpseq_data_metadata(
runfolder_path,
outdir)
# unless a previous LIMS-export is passed as a parameter, do a request to the
# snpseq-data web service
if not lims_data:
lims_data = await request.app['session'].request_snpseq_data_metadata(
runfolder_path,
outdir
)
else:
lims_data_src = pathlib.Path(
metadata_export_path,
lims_data
)
lims_data = pathlib.Path(
outdir,
lims_data
)
shutil.copy(lims_data_src, lims_data)

snpseq_data_extract = self.process_runner.extract_snpseq_data_metadata(
lims_data,
outdir)
outdir
)

runfolder_extract = self.process_runner.extract_runfolder_metadata(
runfolder_path,
outdir
)

metadata_export = self.process_runner.export_runfolder_metadata(
runfolder_extract,
snpseq_data_extract,
metadata_export_path)
runfolder_extract,
snpseq_data_extract,
metadata_export_path
)

return aiohttp.web.json_response({'metadata': metadata_export}, status=200)
except Exception as ex:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include = ["metadata_service*"]

[project]
name = "metadata-service"
version = "1.0.0"
version = "1.1.0"
authors = [
{name = "SNP&SEQ Technology Platform, Uppsala University", email = "[email protected]" },
]
Expand Down
2 changes: 1 addition & 1 deletion tests/config/app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

port: 9345
base_url: /api/1.0
datadir: tests
datadir: tests/{host}/runfolders/{runfolder}
snpseq_data_url: http://localhost:9191
snpseq_metadata_executable: /Users/pontus/Documents/code/snpseq_metadata/venv_/bin/snpseq_metadata
114 changes: 104 additions & 10 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ async def snpseq_data_server(aiohttp_server, load_config):
"""
async def snpseq_data(request):
q = request.query
response_path = pathlib.Path(
f"{load_config['datadir']}/test_data/{q['name']}.lims.json")
fcid = q.get("name")
response_path = pathlib.Path("tests", "test_data", f"{fcid}.lims.json")
with open(response_path) as fh:
data = json.load(fh)

Expand All @@ -87,6 +87,18 @@ async def snpseq_data(request):
yield await aiohttp_server(app, port=int(port))


def get_projects_from_jsonfile(jsonfile, pattern=None):
projects = []
pattern = pattern or r'"project(?:_id)?": "(\w{2}-\d{4})"'
with open(jsonfile, "r") as fh:
for line in fh:
m = re.search(pattern, line)
if m is not None:
projects.append(m.group(1))

return list(sorted(list(set(projects))))


class SnpseqDataTestRequest(metadata_service.clients.SnpseqDataRequest):

async def external_session(self, app):
Expand Down Expand Up @@ -125,13 +137,26 @@ def export_runfolder_metadata(self, *args):
outdir = args[-1]
srcdir = os.path.join("tests", "test_data")
outfiles = []

# extract the "original" project names from the lims export and the "tweaked" from the lims
# extract
projects = [[], []]
for i, jsonfile in enumerate((args[1], args[1].replace(".ngi.json", ".json"))):
projects[i] = get_projects_from_jsonfile(jsonfile)

for srcfile in filter(
lambda f: f.endswith(".xml"),
os.listdir(srcdir)):
outfiles.append(os.path.join(outdir, srcfile))
outfile = srcfile
for prj_s, prj_c in zip(projects[0], projects[1]):
outfile = outfile.replace(prj_s, prj_c)
outfiles.append(
os.path.join(outdir, outfile)
)
shutil.copy(
os.path.join(srcdir, srcfile),
outfiles[-1])
outfiles[-1]
)
return outfiles


Expand All @@ -143,28 +168,67 @@ async def test_version(cli):
assert ver["version"] == importlib.metadata.version('metadata-service')


async def test_export(snpseq_data_server, cli, test_runfolder):
async def _export_helper(
snpseq_data_server,
cli,
test_runfolder,
test_snpseq_data_path,
lims_data_cache
):
base_url = cli.server.app["config"].get("base_url", "")
datadir = cli.server.app["config"]["datadir"]
host = "test_data"
runfolder = test_runfolder
datadir = datadir.format(
host=host,
runfolder=runfolder
)
metadatadir = os.path.join(
datadir,
host,
"runfolders",
runfolder,
"metadata")

shutil.rmtree(metadatadir, ignore_errors=True)

request_url = f"{base_url}/export/{host}/{runfolder}"
projects = [[], []]
projects[0] = get_projects_from_jsonfile(test_snpseq_data_path)
projects[1] = list(projects[0])

if lims_data_cache:
os.makedirs(metadatadir)

# copy the LIMS export file to the metadata dir
lims_data = os.path.join(
metadatadir,
os.path.basename(test_snpseq_data_path)
)

# tweak the project names so that we can make sure that the cache was used and not
# the mocked snpseq-data file
projects[1] = [
"-".join([
prj.split("-")[0][::-1],
prj.split("-")[1][::-1]
]) for prj in projects[0]
]
with open(test_snpseq_data_path, "r") as rh, open(lims_data, "w") as wh:
for line in rh:
for prj_s, prj_c in zip(*projects):
line = line.replace(prj_s, prj_c)
wh.write(line)

# pass the name of the lims cache json file as a query parameter
request_url = f"{request_url}?lims_data={os.path.basename(test_snpseq_data_path)}"

expected_files = sorted([
os.path.join(
metadatadir,
f"{prj}-{typ}.xml")
for prj in ["AB-1234", "CD-5678", "EF-9012"]
for prj in projects[1]
for typ in ["experiment", "run"]
])

resp = await cli.get(f"{base_url}/export/{host}/{runfolder}")
resp = await cli.get(request_url)
json_resp = await resp.json()

assert resp.status == 200
Expand All @@ -175,3 +239,33 @@ async def test_export(snpseq_data_server, cli, test_runfolder):
assert os.path.exists(metafile)

shutil.rmtree(metadatadir)


async def test_export_with_lims_api(
snpseq_data_server,
cli,
test_runfolder,
test_snpseq_data_path
):
await _export_helper(
snpseq_data_server,
cli,
test_runfolder,
test_snpseq_data_path,
lims_data_cache=False,
)


async def test_export_with_lims_cache(
snpseq_data_server,
cli,
test_runfolder,
test_snpseq_data_path
):
await _export_helper(
snpseq_data_server,
cli,
test_runfolder,
test_snpseq_data_path,
lims_data_cache=True,
)

0 comments on commit f04f015

Please sign in to comment.