From f51f8e1dbcffc0316255ff9db7d302006c21186e Mon Sep 17 00:00:00 2001 From: Chandra Y Date: Tue, 6 Feb 2024 13:30:00 -0600 Subject: [PATCH] Adjust latest to match logging changes from main --- src/app.py | 93 +++++++++++++++++++++++++++++---------------- src/data_loading.py | 80 ++++++++++++++++++++------------------ 2 files changed, 103 insertions(+), 70 deletions(-) diff --git a/src/app.py b/src/app.py index 1be64c1..d66110d 100644 --- a/src/app.py +++ b/src/app.py @@ -1,7 +1,10 @@ +from datetime import datetime from flask import Flask, jsonify, request +from os import environ import os import pandas as pd import csv +import logging # from data_processing import * from data_loading import * @@ -12,7 +15,10 @@ data_access_type = os.environ.get('DATA_ACCESS_TYPE') current_folder = os.path.dirname(__file__) +DATA_PATH = os.path.join(current_folder,'data') ASSETS_PATH = os.path.join(current_folder,'assets') +# Path to Report files at TACC +api_root = environ.get("API_ROOT") local_data_path = os.environ.get("LOCAL_DATA_PATH","") local_data_date = os.environ.get("LOCAL_DATA_DATE","") @@ -124,6 +130,23 @@ app = Flask(__name__) app.debug = True +gunicorn_logger = logging.getLogger('gunicorn.error') +app.logger = logging.getLogger("datastore_app") +app.logger.handlers = gunicorn_logger.handlers +app.logger.setLevel(logging.DEBUG) + +logger = logging.getLogger('werkzeug') +logger.addHandler = gunicorn_logger.handlers +logger.setLevel(logging.DEBUG) + +@app.before_request +def before_request_log(): + app.logger.debug(f"{request.remote_addr} \"{request.method} {request.url}\"") + +@app.after_request +def after_request_log(response): + app.logger.debug(f"{request.remote_addr} \"{request.method} {request.url}\" {response.status_code}") + return response # APIS: try to load new data, if doesn't work, get most recent @app.route("/api/apis") @@ -144,10 +167,11 @@ def api_imaging(): global api_data_cache try: + tapis_token = get_tapis_token(request) if not api_data_index['imaging'] or not check_data_current(datetime.strptime(api_data_index['imaging'], datetime_format)): if data_access_type != 'LOCAL': data_date = datetime.now().strftime(datetime_format) - imaging_data = get_api_imaging_data(request) + imaging_data = get_api_imaging_data(tapis_token) else: data_date = local_data_date imaging_data = get_local_imaging_data(imaging_filepath, qc_filepath) @@ -158,27 +182,28 @@ def api_imaging(): return jsonify({'date': api_data_index['imaging'], 'data': api_data_cache['imaging']}) except Exception as e: - traceback.print_exc() + app.logger.error(("Error in imaging API request: {0}").format(str(e))) + return jsonify('error: {}'.format(e)) + +@app.route("/api/consort") +def api_consort(): + global datetime_format + global api_data_index + global api_data_cache + try: + tapis_token = get_tapis_token(request) + if not api_data_index['consort'] or not check_data_current(datetime.strptime(api_data_index['consort'], datetime_format)): + api_date = datetime.now().strftime(datetime_format) + consort_data_json = get_api_consort_data(tapis_token) + if consort_data_json: + api_data_cache['consort'] = consort_data_json + api_data_index['consort'] = api_date + return jsonify({'date': api_data_index['consort'], 'data': api_data_cache['consort']}) + except Exception as e: + app.logger.error(("Error in consort API request: {0}").format(str(e))) return jsonify('error: {}'.format(e)) -# @app.route("/api/consort") -# def api_consort(): -# global datetime_format -# global api_data_index -# global api_data_cache -# # try: -# if not api_data_index['consort'] or not check_data_current(datetime.strptime(api_data_index['consort'], datetime_format)): -# api_date = datetime.now().strftime(datetime_format) -# consort_data_json = get_api_consort_data(request) -# if consort_data_json: -# api_data_cache['consort'] = consort_data_json -# api_data_index['consort'] = api_date -# return jsonify({'date': api_data_index['consort'], 'data': api_data_cache['consort']}) -# # except Exception as e: -# # traceback.print_exc() -# # return jsonify('error: {}'.format(e)) - -# # get_api_consort_data +# get_api_consort_data @app.route("/api/blood") @@ -187,11 +212,12 @@ def api_blood(): global api_data_index global api_data_cache try: + tapis_token = get_tapis_token(request) if not api_data_index['blood'] or not check_data_current(datetime.strptime(api_data_index['blood'], datetime_format)): if data_access_type != 'LOCAL': data_date = datetime.now().strftime(datetime_format) - blood_data, blood_data_request_status = get_api_blood_data(request) + blood_data, blood_data_request_status = get_api_blood_data(tapis_token) else: data_date = local_data_date blood_data, blood_data_request_status = get_local_blood_data(blood1_filepath, blood2_filepath) @@ -208,7 +234,7 @@ def api_blood(): return jsonify({'date': api_data_index['blood'], 'data': api_data_cache['blood']}) except Exception as e: - traceback.print_exc() + app.logger.error(("Error in blood API request: {0}").format(str(e))) return jsonify('error: {}'.format(e)) @@ -220,11 +246,12 @@ def api_subjects(): global subjects_raw_cols_for_reports try: + tapis_token = get_tapis_token(request) if not api_data_index['subjects'] or not check_data_current(datetime.strptime(api_data_index['subjects'], datetime_format)): # api_date = datetime.now().strftime(datetime_format) if data_access_type != 'LOCAL': data_date = datetime.now().strftime(datetime_format) - latest_subjects_json = get_api_subjects_json(request) + latest_subjects_json = get_api_subjects_json(tapis_token) else: data_date = local_data_date latest_subjects_json = get_local_subjects_raw(subjects1_filepath, subjects2_filepath) @@ -232,14 +259,14 @@ def api_subjects(): # if latest_subjects_json: latest_data = process_subjects_data(latest_subjects_json,subjects_raw_cols_for_reports,screening_sites, display_terms_dict, display_terms_dict_multi) - + app.logger.info(f"Caching subjects api response data. Date: {data_date}") api_data_index['subjects'] = data_date api_data_cache['subjects'] = latest_data return jsonify({'date': api_data_index['subjects'], 'data': api_data_cache['subjects']}) except Exception as e: - traceback.print_exc() + app.logger.error(("Error in subjects API request: {0}").format(str(e))) return jsonify('error: {}'.format(e)) @app.route("/api/monitoring") @@ -257,20 +284,22 @@ def api_monitoring(): else: latest_monitoring_json_tuple = get_local_monitoring_data(monitoring_data_filepath) - latest_monitoring_json = latest_monitoring_json_tuple[0] + latest_monitoring_json = latest_monitoring_json_tuple[0] + app.logger.info(latest_monitoring_json.keys()) - api_data_index['monitoring'] = latest_monitoring_json['date'] - api_data_cache['monitoring'] = latest_monitoring_json['data'] + #Convert filename timestamp format "%Y%m%dT%H%M%SZ" to "%m/%d/%Y, %H:%M:%S" + date_format = "%Y%m%dT%H%M%SZ" + data_date = latest_monitoring_json['date'] + formatted_date = datetime.strptime(data_date, date_format).strftime("%m/%d/%Y, %H:%M:%S") + api_data_index['monitoring'] = formatted_date + api_data_cache['monitoring'] = latest_monitoring_json['data'] - # This is currently in timestamp string format. may need to change this later if we want to standardize format - # print(api_data_index['monitoring']) - return jsonify({'date': api_data_index['monitoring'], 'data': api_data_cache['monitoring']}) except Exception as e: - traceback.print_exc() + app.logger.error(("Error in monitoring API request: {0}").format(str(e))) return jsonify('error: {}'.format(e)) @app.route("/api/subjects_debug") diff --git a/src/data_loading.py b/src/data_loading.py index e45b567..0af005f 100644 --- a/src/data_loading.py +++ b/src/data_loading.py @@ -11,9 +11,13 @@ import datetime from datetime import datetime +from retrying import retry +from flask import jsonify import logging -logger = logging.getLogger(__name__) +files_api_root = os.environ.get('FILES_API_ROOT') +portal_api_root = os.environ.get('PORTAL_API_ROOT') +logger = logging.getLogger("datastore_app") # ---------------------------------------------------------------------------- @@ -31,12 +35,38 @@ DATA_PATH = os.path.join(current_folder,local_data_path) ASSETS_PATH = os.path.join(current_folder,'assets') +# ---------------------------------------------------------------------------- +# Common utils +# ---------------------------------------------------------------------------- +class MissingPortalSessionIdException(Exception): + '''Custom Exception for Misisng Session Id''' + +class TapisTokenRetrievalException(Exception): + '''Custom Exception for Tapis Token retrieval error''' + +def handle_exception(ex, api_message): + '''Handle errors for api requests. Provide error code for categorizing response''' + logger.error(("Error in {0} request: {1}").format(api_message, str(ex))) + error_code = 'DATA_ERROR' + if isinstance(ex, MissingPortalSessionIdException): + error_code = "MISSING_SESSION_ID" + elif isinstance(ex, TapisTokenRetrievalException): + error_code = "INVALID_TAPIS_TOKEN" + json_data = { + 'error_code':error_code, + 'error':str(ex) + } + return jsonify(json_data) # ---------------------------------------------------------------------------- # Updating data checks # ---------------------------------------------------------------------------- -def check_data_current(data_date): +def check_data_current(api_request, data_date): '''test to see if the date in a data dictionary is from after 10am on the same day as checking.''' + if api_request.args.get('ignore_cache') == 'True': + logger.info('Ignoring cache for the request.') + return False + now = datetime.now() if data_date.date() == now.date(): @@ -225,28 +255,11 @@ def get_local_monitoring_data(monitoring_data_filepath): # ---------------------------------------------------------------------------- # LOAD DATA FROM API # ---------------------------------------------------------------------------- -# Get Tapis token if authorized to access data files -def get_tapis_token(api_request): - try: - response = requests.get(portal_api_root + '/auth/tapis/', cookies=api_request.cookies) - #headers={'cookie':'coresessionid=' + api_request.cookies.get('coresessionid')}) - if response: - tapis_token = response.json()['token'] - return tapis_token - else: - logger.warning("Unauthorized to access tapis token") - raise Exception - except Exception as e: - logger.warning('portal api error: {}'.format(e)) - return False - -def get_api_consort_data(api_request, +def get_api_consort_data(tapis_token, report='consort', report_suffix = 'consort-data-[mcc]-latest.csv'): '''Load data for a specified consort file. Handle 500 server errors''' try: - tapis_token = get_tapis_token(api_request) - if tapis_token: cosort_columns = ['source','target','value', 'mcc'] consort_df = pd.DataFrame(columns=cosort_columns) @@ -290,11 +303,9 @@ def get_api_consort_data(api_request, ## Function to rebuild dataset from apis -def get_api_imaging_data(api_request): +def get_api_imaging_data(tapis_token): ''' Load data from imaging api. Return bad status notice if hits Tapis API''' - try: - tapis_token = get_tapis_token(api_request) - + try: if tapis_token: # IMAGING imaging_filepath = '/'.join([files_api_root,'imaging','imaging-log-latest.csv']) @@ -320,19 +331,17 @@ def get_api_imaging_data(api_request): return imaging_data_json else: - logger.warning("Unauthorized attempt to access Imaging data") - return None + raise TapisTokenRetrievalException() except Exception as e: traceback.print_exc() return "exception: {}".format(e) ## Monitoring data for Briha's app -def get_api_monitoring_data(api_request): +def get_api_monitoring_data(tapis_token): ''' Load blood data from api''' try: current_datetime = datetime.now() - tapis_token = get_tapis_token(api_request) if tapis_token: # Monitoring @@ -349,8 +358,7 @@ def get_api_monitoring_data(api_request): return monitoring_data_json, monitoring_request_status else: - logger.warning("Unauthorized attempt to access Monitoring data") - return None + raise TapisTokenRetrievalException() except Exception as e: traceback.print_exc() @@ -401,19 +409,16 @@ def get_api_blood_data(api_request): return blood_data_json, request_status else: - logger.warning("Unauthorized attempt to access Blood data") - return None + raise TapisTokenRetrievalException() except Exception as e: traceback.print_exc() return None -def get_api_subjects_json(api_request): +def get_api_subjects_json(tapis_token): ''' Load subjects data from api. Note data needs to be cleaned, etc. to create properly formatted data product''' - try: - tapis_token = get_tapis_token(api_request) - + try: if tapis_token: # Load Json Data subjects1_filepath = '/'.join([files_api_root,'subjects','subjects-1-latest.json']) @@ -437,8 +442,7 @@ def get_api_subjects_json(api_request): return subjects_json else: - logger.warning("Unauthorized attempt to access Subjects data") - return None + raise TapisTokenRetrievalException() except Exception as e: traceback.print_exc()