From f36435e012ec302d0e2ed6825b162fc6cfdfd08d Mon Sep 17 00:00:00 2001 From: Daisie Huang Date: Tue, 19 Nov 2024 12:00:33 -0800 Subject: [PATCH 1/4] remove unused result classes --- htsget_ingest.py | 1 - ingest_operations.py | 1 - ingest_result.py | 23 ----------------------- 3 files changed, 25 deletions(-) delete mode 100644 ingest_result.py diff --git a/htsget_ingest.py b/htsget_ingest.py index 2f94a3b..396be15 100644 --- a/htsget_ingest.py +++ b/htsget_ingest.py @@ -5,7 +5,6 @@ import os import re import json -from ingest_result import IngestServerException, IngestUserException, IngestResult import requests import sys from urllib.parse import urlparse diff --git a/ingest_operations.py b/ingest_operations.py index cbd3897..d31b18f 100644 --- a/ingest_operations.py +++ b/ingest_operations.py @@ -6,7 +6,6 @@ import urllib.parse import auth -from ingest_result import * import katsu_ingest import htsget_ingest from opa_ingest import remove_user_from_dataset, add_user_to_dataset diff --git a/ingest_result.py b/ingest_result.py deleted file mode 100644 index 1cf4667..0000000 --- a/ingest_result.py +++ /dev/null @@ -1,23 +0,0 @@ -class IngestResult(): - def __init__(self, value=None): - self.value = value - -class IngestSuccess(IngestResult): - pass - -class IngestPermissionsException(IngestResult): - pass - -class IngestServerException(IngestResult): - pass - -class IngestUserException(IngestResult): - pass - -class IngestValidationException(IngestUserException): - def __init__(self, value, validation_errors): - super().__init__(value) - self.validation_errors = validation_errors - -class IngestCohortException(IngestUserException): - pass \ No newline at end of file From e291c8f1fb9e5a9b6345c723df5b72da8123a1e4 Mon Sep 17 00:00:00 2001 From: Daisie Huang Date: Tue, 19 Nov 2024 12:01:24 -0800 Subject: [PATCH 2/4] rename cohort to program --- htsget_ingest.py | 20 ++++++++++---------- ingest_operations.py | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/htsget_ingest.py b/htsget_ingest.py index 396be15..c7b1879 100644 --- a/htsget_ingest.py +++ b/htsget_ingest.py @@ -46,7 +46,7 @@ def link_genomic_data(sample, do_not_index=False): genomic_drs_obj["id"] = sample["genomic_file_id"] genomic_drs_obj["name"] = sample["genomic_file_id"] genomic_drs_obj["description"] = sample["metadata"]["sequence_type"] - genomic_drs_obj["cohort"] = sample["program_id"] + genomic_drs_obj["program"] = sample["program_id"] genomic_drs_obj["reference_genome"] = sample["metadata"]["reference"] genomic_drs_obj["version"] = "v1" if "contents" not in genomic_drs_obj: @@ -70,7 +70,7 @@ def link_genomic_data(sample, do_not_index=False): "id": clin_sample["submitter_sample_id"], "name": clin_sample["submitter_sample_id"], "description": "sample", - "cohort": sample["program_id"], + "program": sample["program_id"], "version": "v1", "contents": [] } @@ -148,7 +148,7 @@ def add_file_drs_object(genomic_drs_obj, file, type, headers): "id": file['name'], "name": file['name'], "description": type, - "cohort": genomic_drs_obj["cohort"], + "program": genomic_drs_obj["program"], "version": "v1" } access_method = get_access_method(file["access_method"]) @@ -263,7 +263,7 @@ def htsget_ingest(ingest_json, do_not_index=False): statistics = {} for program_id in program_ids: url = f"{HTSGET_URL}/htsget/v1/samples" - response = requests.get(url, headers=headers, params={"cohort": program_id}) + response = requests.get(url, headers=headers, params={"program": program_id}) if response.status_code == 200: for sample in response.json(): if program_id not in statistics: @@ -278,13 +278,13 @@ def htsget_ingest(ingest_json, do_not_index=False): result["errors"] = f"Could not collect completeness stats for program: {response.text}" for program_id in statistics: - # get the cohort - url = f"{HTSGET_URL}/ga4gh/drs/v1/cohorts" + # get the program + url = f"{HTSGET_URL}/ga4gh/drs/v1/programs" response = requests.get(f"{url}/{program_id}", headers=headers) if response.status_code == 200: - cohort = response.json() - cohort["statistics"] = statistics[program_id] - response = requests.post(url, headers=headers, json=cohort) + program = response.json() + program["statistics"] = statistics[program_id] + response = requests.post(url, headers=headers, json=program) if response.status_code != 200: result["errors"] = f"Could not add statistics for program: {response.text}" else: @@ -354,7 +354,7 @@ def check_genomic_data(dataset, token): def delete_program(program_id, token): headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} - url = f"{HTSGET_URL}/ga4gh/drs/v1/cohorts/{program_id}" + url = f"{HTSGET_URL}/ga4gh/drs/v1/programs/{program_id}" return requests.delete(url, headers=headers) diff --git a/ingest_operations.py b/ingest_operations.py index d31b18f..4f82867 100644 --- a/ingest_operations.py +++ b/ingest_operations.py @@ -21,7 +21,7 @@ "SUCCESS": 0, "UNAUTHORIZED": 1, "VALIDATION": 2, - "COHORTEXISTS": 3, + "PROGRAMEXISTS": 3, "INTERNAL": 4, "AUTHORIZATIONERR": 5 } @@ -31,7 +31,7 @@ def generateResponse(result, response_code): 0: ("Success", 200), 1: ("Unauthorized", 403), 2: ("Validation error", 422), - 3: ("Cohort exists", 422), + 3: ("Program exists", 422), 4: ("Internal CanDIG error", 500), 5: ("Authorization error", 401) } From 309ebc7c24941de60948e66722380df0ebdc0d48 Mon Sep 17 00:00:00 2001 From: Daisie Huang Date: Wed, 20 Nov 2024 11:03:32 -0800 Subject: [PATCH 3/4] catch exceptions in daemon --- daemon.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/daemon.py b/daemon.py index bc28ea0..f29cea9 100644 --- a/daemon.py +++ b/daemon.py @@ -28,8 +28,11 @@ def ingest_file(file_path): json_data = json_data["katsu"] programs = list(json_data.keys()) for program_id in programs: - ingest_results, status_code = ingest_schemas(json_data[program_id]["schemas"]) - results[program_id] = ingest_results + try: + ingest_results, status_code = ingest_schemas(json_data[program_id]["schemas"]) + results[program_id] = ingest_results + except Exception as e: + results[program_id] = f"Exception: {type(e)} {str(e)}" elif "htsget" in json_data: do_not_index = False if "do_not_index" in json_data: @@ -37,8 +40,11 @@ def ingest_file(file_path): json_data = json_data["htsget"] programs = list(json_data.keys()) for program_id in programs: - ingest_results, status_code = htsget_ingest(json_data[program_id], do_not_index) - results[program_id] = ingest_results + try: + ingest_results, status_code = htsget_ingest(json_data[program_id], do_not_index) + results[program_id] = ingest_results + except Exception as e: + results[program_id] = f"Exception: {type(e)} {str(e)}" with open(results_path, "w") as f: json.dump(results, f) os.remove(file_path) From d4a00d1e2c64931102c0ac2c928109e7b5ca8dc2 Mon Sep 17 00:00:00 2001 From: Daisie Huang Date: Wed, 20 Nov 2024 13:09:37 -0800 Subject: [PATCH 4/4] Update daemon.py --- daemon.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/daemon.py b/daemon.py index f29cea9..631d122 100644 --- a/daemon.py +++ b/daemon.py @@ -20,8 +20,13 @@ def ingest_file(file_path): json_data = None results = {} results_path = os.path.join(DAEMON_PATH, "results", os.path.basename(file_path)) - with open(file_path) as f: - json_data = json.load(f) + try: + with open(file_path) as f: + json_data = json.load(f) + except Exception as e: + message = f"Couldn't load data from {file_path}: {type(e)} {str(e)}" + logger.error(message) + results["error"] = message if json_data is not None: logger.info(f"Ingesting {file_path}") if "katsu" in json_data: