From b7dc6ff526311052ccd04563007cd0d2b250f32d Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Fri, 8 Oct 2021 17:05:22 +1100 Subject: [PATCH 01/13] TEMP: attempt pytest --- seerpy/graphql.py | 40 +++++ seerpy/seerpy.py | 112 +++++++++++-- tests/test_data/study_label_groups.json | 22 +++ tests/test_data/study_label_groups_empty.json | 7 + tests/test_data/study_label_groups_many.json | 44 ++++++ tests/test_seerpy.py | 148 +++++++++++++++++- 6 files changed, 351 insertions(+), 22 deletions(-) create mode 100644 tests/test_data/study_label_groups.json create mode 100644 tests/test_data/study_label_groups_empty.json create mode 100644 tests/test_data/study_label_groups_many.json diff --git a/seerpy/graphql.py b/seerpy/graphql.py index bd4462c..e03d80c 100644 --- a/seerpy/graphql.py +++ b/seerpy/graphql.py @@ -210,6 +210,46 @@ def get_string_from_list_of_dicts(list_of_dicts): } }""" +# GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED = """ +# query studies($study_ids: [String], +# $limit: PaginationAmount, +# $offset: Int, +# $max_labelgroups: PaginationAmount = 100, +# ) { +# studies (studyIds: $study_ids, limit: $limit, offset: $offset) { +# id +# name +# labelGroups(limit: $max_labelgroups) { +# id +# name +# description +# labelType +# numberOfLabels +# } +# } +# }""" + + +GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED = """ + query getStudyLabelGroups( + $study_id: String!, + $limit: PaginationAmount, + $offset: Int + ) { + study(id: $study_id) { + id + name + labelGroups(limit: $limit, offset: $offset) { + id + name + description + labelType + numberOfLabels + } + } + }""" + + def get_channel_groups_query_string(study_id): return """ diff --git a/seerpy/seerpy.py b/seerpy/seerpy.py index e8a4da7..e12bd7a 100644 --- a/seerpy/seerpy.py +++ b/seerpy/seerpy.py @@ -934,43 +934,77 @@ def get_labels_string_dataframe(self, study_id, label_group_id, from_time=0, to_ 'labelString.s': 'labels.startTime' }) return label_group + + def get_study_label_groups(self, study_id, limit=50): + """Given a study_id, it returns all the labelgroups. - def get_label_groups_for_studies(self, study_ids, limit=50): - """ - Get label group information for all provided study IDs. + Parameters + ---------- + study_id : str + limit : int, optional + Batch size for repeated API calls + Returns + ------- + label_groups : dict + Keys included: 'id', 'labelGroups' and 'name' + """ + results = self.get_paginated_response(graphql.GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED, + variable_values=dict(study_id=study_id), + limit=limit, + object_path=["study"], + iteration_path=["labelGroups"], + ) + return results + + def get_label_groups_for_studies(self, study_ids, limit=50, labelgroup_limit=50): + """Get label group information for all provided study IDs. + Parameters ---------- study_ids : str or list of str One or more unique IDs, each identifying a study limit : int, optional Batch size for repeated API calls + labelgroup_limit: int, optional + Batch size for paginating at the label groups level. Returns ------- label_groups : list of dict Keys included: 'id', 'labelGroups' and 'name' + """ if isinstance(study_ids, str): study_ids = [study_ids] - - variable_values = {'study_ids': study_ids} - return self.get_paginated_response(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED, - variable_values, limit, ['studies']) - - def get_label_groups_for_studies_dataframe(self, study_ids, limit=50): - """ - Get label group information for all provided study IDs as a DataFrame. See - `get_label_groups_for_studies()` for details. + results = [] + for study_id in study_ids: + _results = self.get_study_label_groups(study_id, limit=labelgroup_limit) + results.append(_results) + return results + + def get_label_groups_for_studies_dataframe(self, study_ids, labelgroup_limit=100, limit=50): + """Get label group information for all provided study IDs as a DataFrame. + See `get_label_groups_for_studies()` for details. + + Parameters + ---------- + study_ids : str or list of str + One or more unique IDs, each identifying a study + limit : int, optional + Batch size for repeated API calls + labelgroup_limit: int, optional + Batch size for paginating at the label groups level. Returns ------- label_groups_df : pd.DataFrame Columns with details on name, id, type, number of labels, study ID and name """ - # TODO: can we use json_normalize or pandas_flatten for this? label_groups = [] - for study in self.get_label_groups_for_studies(study_ids, limit): + for study in self.get_label_groups_for_studies(study_ids, + limit=limit, + labelgroup_limit=labelgroup_limit): for label_group in study['labelGroups']: label_group['labelGroup.id'] = label_group.pop('id') label_group['labelGroup.name'] = label_group.pop('name') @@ -982,6 +1016,56 @@ def get_label_groups_for_studies_dataframe(self, study_ids, limit=50): label_groups.append(label_group) return pd.DataFrame(label_groups) + + def get_label_groups_for_studies_old(self, study_ids, limit=50): + """ + Get label group information for all provided study IDs. + + Parameters + ---------- + study_ids : str or list of str + One or more unique IDs, each identifying a study + limit : int, optional + Batch size for repeated API calls + + Returns + ------- + label_groups : list of dict + Keys included: 'id', 'labelGroups' and 'name' + """ + if isinstance(study_ids, str): + study_ids = [study_ids] + + variable_values = {'study_ids': study_ids} + return self.get_paginated_response(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED, + variable_values, limit, ['studies']) + + # def get_label_groups_for_studies_dataframe(self, study_ids, limit=50): + # """ + # Get label group information for all provided study IDs as a DataFrame. See + # `get_label_groups_for_studies()` for details. + + # Returns + # ------- + # label_groups_df : pd.DataFrame + # Columns with details on name, id, type, number of labels, study ID and name + # limit : int, optional + # Batch size for repeated API calls + # """ + # # TODO: can we use json_normalize or pandas_flatten for this? + # label_groups = [] + # for study in self.get_label_groups_for_studies(study_ids, limit): + # for label_group in study['labelGroups']: + # label_group['labelGroup.id'] = label_group.pop('id') + # label_group['labelGroup.name'] = label_group.pop('name') + # label_group['labelGroup.description'] = label_group.pop('description') + # label_group['labelGroup.labelType'] = label_group.pop('labelType') + # label_group['labelGroup.numberOfLabels'] = label_group.pop('numberOfLabels') + # label_group['id'] = study['id'] + # label_group['name'] = study['name'] + # label_groups.append(label_group) + # return pd.DataFrame(label_groups) + def get_viewed_times_dataframe(self, study_id, limit=250, offset=0): """ Get timestamp info about all parts of a study that have been viewed by various users. diff --git a/tests/test_data/study_label_groups.json b/tests/test_data/study_label_groups.json new file mode 100644 index 0000000..ff57888 --- /dev/null +++ b/tests/test_data/study_label_groups.json @@ -0,0 +1,22 @@ +{ + "study": { + "id": "study-1-id", + "labelGroups": [ + { + "description": "description", + "id": "label-group-1-id", + "labelType": "default", + "numberOfLabels": 2, + "name": "LabelGroup 1" + }, + { + "description": "description", + "id": "label-group-2-id", + "labelType": "default", + "numberOfLabels": 0, + "name": "LabelGroup 2" + } + ], + "name": "study 1 name" + } +} \ No newline at end of file diff --git a/tests/test_data/study_label_groups_empty.json b/tests/test_data/study_label_groups_empty.json new file mode 100644 index 0000000..d3dfea2 --- /dev/null +++ b/tests/test_data/study_label_groups_empty.json @@ -0,0 +1,7 @@ +{ + "study": { + "id": "study-1-id", + "labelGroups": [], + "name": "study 1 name" + } +} \ No newline at end of file diff --git a/tests/test_data/study_label_groups_many.json b/tests/test_data/study_label_groups_many.json new file mode 100644 index 0000000..2c31508 --- /dev/null +++ b/tests/test_data/study_label_groups_many.json @@ -0,0 +1,44 @@ +[{ + "study": { + "id": "study-1-id", + "labelGroups": [ + { + "description": "description", + "id": "label-group-1-id", + "labelType": "default", + "numberOfLabels": 2, + "name": "LabelGroup 1" + }, + { + "description": "description", + "id": "label-group-2-id", + "labelType": "default", + "numberOfLabels": 0, + "name": "LabelGroup 2" + } + ], + "name": "study 1 name" + } +},{ + "study": { + "id": "study-2-id", + "labelGroups": [ + { + "description": "description", + "id": "label-group-1-id", + "labelType": "default", + "numberOfLabels": 2, + "name": "LabelGroup 1" + }, + { + "description": "description", + "id": "label-group-2-id", + "labelType": "default", + "numberOfLabels": 0, + "name": "LabelGroup 2" + } + ], + "name": "study 2 name" + } +} +] \ No newline at end of file diff --git a/tests/test_seerpy.py b/tests/test_seerpy.py index 3be36fc..1c9b840 100644 --- a/tests/test_seerpy.py +++ b/tests/test_seerpy.py @@ -71,7 +71,7 @@ def test_query_variables_are_passed_on_initial_failure(self, gql_client, unused_ class TestPaginatedQuery: @classmethod def check_paginated_query(cls, seer_connect, function_to_test, function_args=None, - function_kwargs=None, expected_result=None, query_response=None): + function_kwargs=None, expected_result=None, query_response=None, iteration_path=None): """Generic function to test a function which calls Seer_connect.get_paginated_response().""" # run test with mock.patch.object(SeerConnect, 'get_paginated_response', @@ -101,9 +101,10 @@ def check_paginated_query(cls, seer_connect, function_to_test, function_args=Non assert not missing_path_items, ( f'object path item(s) {missing_path_items} not found in query string {query_string}') - iteration_path = None + iteration_path = iteration_path if len(paginate.call_args[0]) > 4: iteration_path = paginate.call_args[0][4] + print(f"iteration path: {iteration_path}") missing_path_items = [ path_item for path_item in iteration_path if path_item not in query_string ] @@ -187,12 +188,143 @@ def test_get_studies_by_id(self, gql_client, unused_sleep, seer_connect): function_args=[['study-1-id', 'study-2-id']], response_file='studies.json', empty_response={'studies': []}) - def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect): - # run test and check result - self.check_paginated_query_with_data_variations( - gql_client, seer_connect, function_to_test=seer_connect.get_label_groups_for_studies, - function_args=[['study-1-id', 'study-2-id']], response_file='label_groups.json', - empty_response={'studies': []}) + # def test_get_study_label_groups(self, gql_client, unused_sleep, seer_connect): + # # run test and check result + # self.check_paginated_query_with_data_variations( + # gql_client, seer_connect, function_to_test=seer_connect.get_study_label_groups, + # function_args=['study-1-id'], response_file='study_label_groups.json', + # expected_result={ + # "id": "study-1-id", + # "labelGroups": [ + # { + # "description": "description", + # "id": "label-group-1-id", + # "labelType": "default", + # "numberOfLabels": 2, + # "name": "LabelGroup 1" + # }, + # { + # "description": "description", + # "id": "label-group-2-id", + # "labelType": "default", + # "numberOfLabels": 0, + # "name": "LabelGroup 2" + # } + # ], + # "name": "study 1 name" + # } + # # empty_response={ + # # "study": { + # # 'id': 'study-1-id', + # # 'name': 'study 1 name', + # # 'labelGroups': [] + # # } + # # } + # ) + + # def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect): + # # run test and check result + # self.check_paginated_query_with_data_variations( + # gql_client, seer_connect, function_to_test=seer_connect.get_label_groups_for_studies, + # function_args=[['study-1-id', 'study-2-id']], response_file='label_groups.json', + # empty_response={'studies': []}) + + # def test_get_study_label_groups(self, gql_client, unused_sleep, seer_connect): + # # setup + # side_effects = [] + + # # this is the call in get_user_ids_in_user_cohort() + # with open(TEST_DATA_DIR / "study_label_groups.json", "r") as f: + # query_data = json.load(f) + # print(query_data) + # side_effects.append(query_data) + # # expected_result = [lg for lg in query_data['study']['labelGroups']] + # expected_result = query_data['study'] + # print(expected_result) + # # with open(TEST_DATA_DIR / "study_label_groups.json", "r") as f: + # # side_effects.append(json.load(f)) + + # gql_client.return_value.execute.side_effect = side_effects + + # # run test and check result + # self.check_paginated_query(seer_connect, seer_connect.get_study_label_groups, + # function_args=['study-1-id'], expected_result=expected_result) + + def test_get_study_label_groups(self, gql_client, unused_sleep, seer_connect): + # setup + side_effects = [] + + # # this is the call in get_user_ids_in_user_cohort() + with open(TEST_DATA_DIR / "study_label_groups_many.json", "r") as f: + query_data = json.load(f) + print(query_data) + side_effects = [item for item in query_data] + # # expected_result = [lg for lg in query_data['study']['labelGroups']] + # expected_result = query_data['study'] + # print(expected_result) + # # with open(TEST_DATA_DIR / "study_label_groups.json", "r") as f: + # # side_effects.append(json.load(f)) + + gql_client.return_value.execute.side_effect = side_effects + + + # get_study_label_groups + # # run test and check result + # self.check_paginated_query(seer_connect, seer_connect.get_study_label_groups, + # function_args=['study-1-id'], expected_result=expected_result) + + function_to_test = seer_connect.get_study_label_groups + # ====================================================================== + with mock.patch.object(SeerConnect, 'get_paginated_response', + wraps=seer_connect.get_paginated_response) as paginate: + result = function_to_test('study-1-id') + + # # check result + # assert paginate.call_args + + # query_string = paginate.call_args[0][0] + # assert 'limit' in query_string + # assert 'offset' in query_string + + # variable_values = paginate.call_args[0][1] + # missing_keys = [key for key in variable_values.keys() if key not in query_string] + # assert not missing_keys, f'key(s) {missing_keys} not found in query string {query_string}' + + # object_path = paginate.call_args[0][3] + # missing_path_items = [ + # path_item for path_item in object_path if path_item not in query_string + # ] + # assert not missing_path_items, ( + # f'object path item(s) {missing_path_items} not found in query string {query_string}') + + # iteration_path = iteration_path + # if len(paginate.call_args[0]) > 4: + # iteration_path = paginate.call_args[0][4] + # print(f"iteration path: {iteration_path}") + # missing_path_items = [ + # path_item for path_item in iteration_path if path_item not in query_string + # ] + # assert not missing_path_items, (f'iteration path item(s) {missing_path_items} not found' + # f' in query string {query_string}') + + # if query_response: + # expected_result = query_response + # for path_item in object_path: + # expected_result = expected_result[path_item] + + # if iteration_path: + # iteration_result = expected_result + # for iteration_path_item in iteration_path: + # iteration_result = iteration_result[iteration_path_item] + # if not iteration_result: + # # if this is empty, the expected result is empty + # expected_result = iteration_result + + # if expected_result: + # assert result == expected_result + + + def test_get_documents_for_studies(self, gql_client, unused_sleep, seer_connect): # run test and check result From 79fbd2b06c9daf7bb2d45c5e7d115e3865321157 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Mon, 11 Oct 2021 16:19:25 +1100 Subject: [PATCH 02/13] FEAT: add labelgroups tests --- seerpy/seerpy.py | 4 +- tests/test_data/__init__.py | 0 tests/test_data/label_groups_for_studies.py | 99 ++++++++++++ tests/test_data/label_groups_for_study.py | 62 ++++++++ tests/test_seerpy.py | 165 ++++---------------- 5 files changed, 191 insertions(+), 139 deletions(-) create mode 100644 tests/test_data/__init__.py create mode 100644 tests/test_data/label_groups_for_studies.py create mode 100644 tests/test_data/label_groups_for_study.py diff --git a/seerpy/seerpy.py b/seerpy/seerpy.py index e12bd7a..8c96830 100644 --- a/seerpy/seerpy.py +++ b/seerpy/seerpy.py @@ -935,7 +935,7 @@ def get_labels_string_dataframe(self, study_id, label_group_id, from_time=0, to_ }) return label_group - def get_study_label_groups(self, study_id, limit=50): + def get_label_groups_for_study(self, study_id, limit=50): """Given a study_id, it returns all the labelgroups. Parameters @@ -979,7 +979,7 @@ def get_label_groups_for_studies(self, study_ids, limit=50, labelgroup_limit=50) study_ids = [study_ids] results = [] for study_id in study_ids: - _results = self.get_study_label_groups(study_id, limit=labelgroup_limit) + _results = self.get_label_groups_for_study(study_id, limit=labelgroup_limit) results.append(_results) return results diff --git a/tests/test_data/__init__.py b/tests/test_data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data/label_groups_for_studies.py b/tests/test_data/label_groups_for_studies.py new file mode 100644 index 0000000..5680c1b --- /dev/null +++ b/tests/test_data/label_groups_for_studies.py @@ -0,0 +1,99 @@ +import io +import pandas as pd + +# INDIVIDUAL RESPONSES IT GETS FROM CALLING client.get_label_groups_for_study() +individual_study_responses = [ + { + "id": "study1_id", + "name": "study1_name", + "labelGroups": [ + { + "name": "study1_labelgroup1_name", + "description": "study1_labelgroup1_description", + "id": "study1_labelgroup1_id", + "labelType": "default", + "numberOfLabels": 101, + }, + { + "name": "study1_labelgroup2_name", + "description": "study1_labelgroup2_description", + "id": "study1_labelgroup2_id", + "labelType": "default", + "numberOfLabels": 102, + } + ] + }, + { + "id": "study2_id", + "name": "study2_name", + "labelGroups": [ + { + "name": "study2_labelgroup1_name", + "description": "study2_labelgroup1_description", + "id": "study2_labelgroup1_id", + "labelType": "default", + "numberOfLabels": 201, + }, + { + "name": "study2_labelgroup2_name", + "description": "study2_labelgroup2_description", + "id": "study2_labelgroup2_id", + "labelType": "default", + "numberOfLabels": 202, + } + ] + }, +] + +expected_seerpy_response = [ + { + "id": "study1_id", + "name": "study1_name", + "labelGroups": [ + { + "name": "study1_labelgroup1_name", + "description": "study1_labelgroup1_description", + "id": "study1_labelgroup1_id", + "labelType": "default", + "numberOfLabels": 101, + }, + { + "name": "study1_labelgroup2_name", + "description": "study1_labelgroup2_description", + "id": "study1_labelgroup2_id", + "labelType": "default", + "numberOfLabels": 102, + } + ] + }, + { + "id": "study2_id", + "name": "study2_name", + "labelGroups": [ + { + "name": "study2_labelgroup1_name", + "description": "study2_labelgroup1_description", + "id": "study2_labelgroup1_id", + "labelType": "default", + "numberOfLabels": 201, + }, + { + "name": "study2_labelgroup2_name", + "description": "study2_labelgroup2_description", + "id": "study2_labelgroup2_id", + "labelType": "default", + "numberOfLabels": 202, + } + ] + }, +] + +csv = """ +labelGroup.id,labelGroup.name,labelGroup.description,labelGroup.labelType,labelGroup.numberOfLabels,id,name +study1_labelgroup1_id,study1_labelgroup1_name,study1_labelgroup1_description,default,101,study1_id,study1_name +study1_labelgroup2_id,study1_labelgroup2_name,study1_labelgroup2_description,default,102,study1_id,study1_name +study2_labelgroup1_id,study2_labelgroup1_name,study2_labelgroup1_description,default,201,study2_id,study2_name +study2_labelgroup2_id,study2_labelgroup2_name,study2_labelgroup2_description,default,202,study2_id,study2_name +""" + +expected_seerpy_df = pd.read_csv(io.StringIO(csv)) diff --git a/tests/test_data/label_groups_for_study.py b/tests/test_data/label_groups_for_study.py new file mode 100644 index 0000000..122a0fa --- /dev/null +++ b/tests/test_data/label_groups_for_study.py @@ -0,0 +1,62 @@ + +# RAW RESPONSES IT GETS FROM GRAPH QL QUERY +raw_paginated_responses = [ + { + "study": { + "id": "study1_id", + "name": "study1_name", + "labelGroups": [ + { + "name": "labelgroup1_name", + "description": "labelgroup1_description", + "id": "labelgroup1_id", + "labelType": "default", + "numberOfLabels": 2, + }, + ] + } + }, + { + "study": { + "id": "study1_id", + "name": "study1_name", + "labelGroups": [ + { + "name": "labelgroup2_name", + "description": "labelgroup2_description", + "id": "labelgroup2_id", + "labelType": "default", + "numberOfLabels": 5, + } + ] + } + }, + { + "study": { + "id": "study1_id", + "name": "study1_name", + "labelGroups": [] + } + }, +] + +expected_seerpy_response = { + "id": "study1_id", + "name": "study1_name", + "labelGroups": [ + { + "name": "labelgroup1_name", + "description": "labelgroup1_description", + "id": "labelgroup1_id", + "labelType": "default", + "numberOfLabels": 2, + }, + { + "name": "labelgroup2_name", + "description": "labelgroup2_description", + "id": "labelgroup2_id", + "labelType": "default", + "numberOfLabels": 5, + } + ] +} \ No newline at end of file diff --git a/tests/test_seerpy.py b/tests/test_seerpy.py index 1c9b840..171ca95 100644 --- a/tests/test_seerpy.py +++ b/tests/test_seerpy.py @@ -11,6 +11,9 @@ from seerpy.seerpy import SeerConnect import seerpy.graphql as graphql +from tests.test_data import label_groups_for_study +from tests.test_data import label_groups_for_studies + # having a class is useful to allow patches to be shared across mutliple test functions, but then # pylint complains that the methods could be a function. this disables that warning. # pylint:disable=no-self-use @@ -188,143 +191,6 @@ def test_get_studies_by_id(self, gql_client, unused_sleep, seer_connect): function_args=[['study-1-id', 'study-2-id']], response_file='studies.json', empty_response={'studies': []}) - # def test_get_study_label_groups(self, gql_client, unused_sleep, seer_connect): - # # run test and check result - # self.check_paginated_query_with_data_variations( - # gql_client, seer_connect, function_to_test=seer_connect.get_study_label_groups, - # function_args=['study-1-id'], response_file='study_label_groups.json', - # expected_result={ - # "id": "study-1-id", - # "labelGroups": [ - # { - # "description": "description", - # "id": "label-group-1-id", - # "labelType": "default", - # "numberOfLabels": 2, - # "name": "LabelGroup 1" - # }, - # { - # "description": "description", - # "id": "label-group-2-id", - # "labelType": "default", - # "numberOfLabels": 0, - # "name": "LabelGroup 2" - # } - # ], - # "name": "study 1 name" - # } - # # empty_response={ - # # "study": { - # # 'id': 'study-1-id', - # # 'name': 'study 1 name', - # # 'labelGroups': [] - # # } - # # } - # ) - - # def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect): - # # run test and check result - # self.check_paginated_query_with_data_variations( - # gql_client, seer_connect, function_to_test=seer_connect.get_label_groups_for_studies, - # function_args=[['study-1-id', 'study-2-id']], response_file='label_groups.json', - # empty_response={'studies': []}) - - # def test_get_study_label_groups(self, gql_client, unused_sleep, seer_connect): - # # setup - # side_effects = [] - - # # this is the call in get_user_ids_in_user_cohort() - # with open(TEST_DATA_DIR / "study_label_groups.json", "r") as f: - # query_data = json.load(f) - # print(query_data) - # side_effects.append(query_data) - # # expected_result = [lg for lg in query_data['study']['labelGroups']] - # expected_result = query_data['study'] - # print(expected_result) - # # with open(TEST_DATA_DIR / "study_label_groups.json", "r") as f: - # # side_effects.append(json.load(f)) - - # gql_client.return_value.execute.side_effect = side_effects - - # # run test and check result - # self.check_paginated_query(seer_connect, seer_connect.get_study_label_groups, - # function_args=['study-1-id'], expected_result=expected_result) - - def test_get_study_label_groups(self, gql_client, unused_sleep, seer_connect): - # setup - side_effects = [] - - # # this is the call in get_user_ids_in_user_cohort() - with open(TEST_DATA_DIR / "study_label_groups_many.json", "r") as f: - query_data = json.load(f) - print(query_data) - side_effects = [item for item in query_data] - # # expected_result = [lg for lg in query_data['study']['labelGroups']] - # expected_result = query_data['study'] - # print(expected_result) - # # with open(TEST_DATA_DIR / "study_label_groups.json", "r") as f: - # # side_effects.append(json.load(f)) - - gql_client.return_value.execute.side_effect = side_effects - - - # get_study_label_groups - # # run test and check result - # self.check_paginated_query(seer_connect, seer_connect.get_study_label_groups, - # function_args=['study-1-id'], expected_result=expected_result) - - function_to_test = seer_connect.get_study_label_groups - # ====================================================================== - with mock.patch.object(SeerConnect, 'get_paginated_response', - wraps=seer_connect.get_paginated_response) as paginate: - result = function_to_test('study-1-id') - - # # check result - # assert paginate.call_args - - # query_string = paginate.call_args[0][0] - # assert 'limit' in query_string - # assert 'offset' in query_string - - # variable_values = paginate.call_args[0][1] - # missing_keys = [key for key in variable_values.keys() if key not in query_string] - # assert not missing_keys, f'key(s) {missing_keys} not found in query string {query_string}' - - # object_path = paginate.call_args[0][3] - # missing_path_items = [ - # path_item for path_item in object_path if path_item not in query_string - # ] - # assert not missing_path_items, ( - # f'object path item(s) {missing_path_items} not found in query string {query_string}') - - # iteration_path = iteration_path - # if len(paginate.call_args[0]) > 4: - # iteration_path = paginate.call_args[0][4] - # print(f"iteration path: {iteration_path}") - # missing_path_items = [ - # path_item for path_item in iteration_path if path_item not in query_string - # ] - # assert not missing_path_items, (f'iteration path item(s) {missing_path_items} not found' - # f' in query string {query_string}') - - # if query_response: - # expected_result = query_response - # for path_item in object_path: - # expected_result = expected_result[path_item] - - # if iteration_path: - # iteration_result = expected_result - # for iteration_path_item in iteration_path: - # iteration_result = iteration_result[iteration_path_item] - # if not iteration_result: - # # if this is empty, the expected result is empty - # expected_result = iteration_result - - # if expected_result: - # assert result == expected_result - - - def test_get_documents_for_studies(self, gql_client, unused_sleep, seer_connect): # run test and check result @@ -982,3 +848,28 @@ def test_empty(self, gql_client, unused_sleep, seer_connect): # check result assert result == expected_result + + +@mock.patch('time.sleep', return_value=None) +@mock.patch('seerpy.seerpy.GQLClient', autospec=True) +class TestLabelGroups: + def test_get_label_groups_for_study(self, gql_client, unused_sleep, seer_connect): + raw_paginated_responses = label_groups_for_study.raw_paginated_responses + expected_seerpy_response = label_groups_for_study.expected_seerpy_response + + gql_client.return_value.execute.side_effect = raw_paginated_responses + response = seer_connect.get_label_groups_for_study("study1") + assert response == expected_seerpy_response + + def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect): + with mock.patch.object(seer_connect, "get_label_groups_for_study") as mock_stdy_labelgroups: + mock_stdy_labelgroups.side_effect = label_groups_for_studies.individual_study_responses + response = seer_connect.get_label_groups_for_studies(["study1","study2"]) + assert label_groups_for_studies.expected_seerpy_response == response + + def test_get_label_groups_for_studies_dataframe(self, gql_client, unused_sleep, seer_connect): + with mock.patch.object(seer_connect, "get_label_groups_for_study") as mock_stdy_labelgroups: + mock_stdy_labelgroups.side_effect = label_groups_for_studies.individual_study_responses + response = seer_connect.get_label_groups_for_studies_dataframe(["study1","study2"]) + assert label_groups_for_studies.expected_seerpy_df.equals(response) + print(response) From 6831d5db14347454162110a715383e36ca0f99f5 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Mon, 11 Oct 2021 16:20:27 +1100 Subject: [PATCH 03/13] CHORE: remove unecessary files --- tests/test_data/study_label_groups.json | 22 ---------- tests/test_data/study_label_groups_empty.json | 7 --- tests/test_data/study_label_groups_many.json | 44 ------------------- 3 files changed, 73 deletions(-) delete mode 100644 tests/test_data/study_label_groups.json delete mode 100644 tests/test_data/study_label_groups_empty.json delete mode 100644 tests/test_data/study_label_groups_many.json diff --git a/tests/test_data/study_label_groups.json b/tests/test_data/study_label_groups.json deleted file mode 100644 index ff57888..0000000 --- a/tests/test_data/study_label_groups.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "study": { - "id": "study-1-id", - "labelGroups": [ - { - "description": "description", - "id": "label-group-1-id", - "labelType": "default", - "numberOfLabels": 2, - "name": "LabelGroup 1" - }, - { - "description": "description", - "id": "label-group-2-id", - "labelType": "default", - "numberOfLabels": 0, - "name": "LabelGroup 2" - } - ], - "name": "study 1 name" - } -} \ No newline at end of file diff --git a/tests/test_data/study_label_groups_empty.json b/tests/test_data/study_label_groups_empty.json deleted file mode 100644 index d3dfea2..0000000 --- a/tests/test_data/study_label_groups_empty.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "study": { - "id": "study-1-id", - "labelGroups": [], - "name": "study 1 name" - } -} \ No newline at end of file diff --git a/tests/test_data/study_label_groups_many.json b/tests/test_data/study_label_groups_many.json deleted file mode 100644 index 2c31508..0000000 --- a/tests/test_data/study_label_groups_many.json +++ /dev/null @@ -1,44 +0,0 @@ -[{ - "study": { - "id": "study-1-id", - "labelGroups": [ - { - "description": "description", - "id": "label-group-1-id", - "labelType": "default", - "numberOfLabels": 2, - "name": "LabelGroup 1" - }, - { - "description": "description", - "id": "label-group-2-id", - "labelType": "default", - "numberOfLabels": 0, - "name": "LabelGroup 2" - } - ], - "name": "study 1 name" - } -},{ - "study": { - "id": "study-2-id", - "labelGroups": [ - { - "description": "description", - "id": "label-group-1-id", - "labelType": "default", - "numberOfLabels": 2, - "name": "LabelGroup 1" - }, - { - "description": "description", - "id": "label-group-2-id", - "labelType": "default", - "numberOfLabels": 0, - "name": "LabelGroup 2" - } - ], - "name": "study 2 name" - } -} -] \ No newline at end of file From a509311235e8b4f47d62226aa0d04ce748a7b2b9 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Mon, 11 Oct 2021 16:41:20 +1100 Subject: [PATCH 04/13] CHORE: remove redundant queries --- seerpy/graphql.py | 36 ---------------------------------- seerpy/seerpy.py | 50 ----------------------------------------------- 2 files changed, 86 deletions(-) diff --git a/seerpy/graphql.py b/seerpy/graphql.py index e03d80c..75d8d65 100644 --- a/seerpy/graphql.py +++ b/seerpy/graphql.py @@ -193,42 +193,6 @@ def get_string_from_list_of_dicts(list_of_dicts): } }""" -GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED = """ - query studies($study_ids: [String], - $limit: PaginationAmount, - $offset: Int) { - studies (studyIds: $study_ids, limit: $limit, offset: $offset) { - id - name - labelGroups { - id - name - description - labelType - numberOfLabels - } - } - }""" - -# GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED = """ -# query studies($study_ids: [String], -# $limit: PaginationAmount, -# $offset: Int, -# $max_labelgroups: PaginationAmount = 100, -# ) { -# studies (studyIds: $study_ids, limit: $limit, offset: $offset) { -# id -# name -# labelGroups(limit: $max_labelgroups) { -# id -# name -# description -# labelType -# numberOfLabels -# } -# } -# }""" - GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED = """ query getStudyLabelGroups( diff --git a/seerpy/seerpy.py b/seerpy/seerpy.py index 8c96830..ebb58ad 100644 --- a/seerpy/seerpy.py +++ b/seerpy/seerpy.py @@ -1016,56 +1016,6 @@ def get_label_groups_for_studies_dataframe(self, study_ids, labelgroup_limit=100 label_groups.append(label_group) return pd.DataFrame(label_groups) - - def get_label_groups_for_studies_old(self, study_ids, limit=50): - """ - Get label group information for all provided study IDs. - - Parameters - ---------- - study_ids : str or list of str - One or more unique IDs, each identifying a study - limit : int, optional - Batch size for repeated API calls - - Returns - ------- - label_groups : list of dict - Keys included: 'id', 'labelGroups' and 'name' - """ - if isinstance(study_ids, str): - study_ids = [study_ids] - - variable_values = {'study_ids': study_ids} - return self.get_paginated_response(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED, - variable_values, limit, ['studies']) - - # def get_label_groups_for_studies_dataframe(self, study_ids, limit=50): - # """ - # Get label group information for all provided study IDs as a DataFrame. See - # `get_label_groups_for_studies()` for details. - - # Returns - # ------- - # label_groups_df : pd.DataFrame - # Columns with details on name, id, type, number of labels, study ID and name - # limit : int, optional - # Batch size for repeated API calls - # """ - # # TODO: can we use json_normalize or pandas_flatten for this? - # label_groups = [] - # for study in self.get_label_groups_for_studies(study_ids, limit): - # for label_group in study['labelGroups']: - # label_group['labelGroup.id'] = label_group.pop('id') - # label_group['labelGroup.name'] = label_group.pop('name') - # label_group['labelGroup.description'] = label_group.pop('description') - # label_group['labelGroup.labelType'] = label_group.pop('labelType') - # label_group['labelGroup.numberOfLabels'] = label_group.pop('numberOfLabels') - # label_group['id'] = study['id'] - # label_group['name'] = study['name'] - # label_groups.append(label_group) - # return pd.DataFrame(label_groups) - def get_viewed_times_dataframe(self, study_id, limit=250, offset=0): """ Get timestamp info about all parts of a study that have been viewed by various users. From c68c23f13eb629345b9519a8360f5103fac95eb9 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Mon, 11 Oct 2021 16:41:42 +1100 Subject: [PATCH 05/13] FEAT: test the syntax of new labelgroups query --- tests/test_graphql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_graphql.py b/tests/test_graphql.py index 0fce759..12370c3 100644 --- a/tests/test_graphql.py +++ b/tests/test_graphql.py @@ -13,7 +13,7 @@ def test_graphql_query_string(): gql(graphql.GET_STUDY_WITH_DATA) gql(graphql.GET_LABELS_PAGED) gql(graphql.GET_LABELS_STRING) - gql(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED) + gql(graphql.GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED) gql(graphql.GET_STUDIES_BY_SEARCH_TERM_PAGED) gql(graphql.GET_STUDIES_BY_STUDY_ID_PAGED) gql(graphql.ADD_LABELS) From 6f3836ebc915cf8735f79f296094ae9b4b0087a1 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Mon, 11 Oct 2021 16:42:04 +1100 Subject: [PATCH 06/13] DOCS: add commets to tests data --- tests/test_data/label_groups_for_studies.py | 10 +++++++++- tests/test_data/label_groups_for_study.py | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/test_data/label_groups_for_studies.py b/tests/test_data/label_groups_for_studies.py index 5680c1b..4ed8b47 100644 --- a/tests/test_data/label_groups_for_studies.py +++ b/tests/test_data/label_groups_for_studies.py @@ -1,7 +1,13 @@ +""" +Data for mocking intermediate function calls, as well as expected return values +when testing the following functions: +- client.get_label_groups_for_studies() +- client.get_label_groups_for_studies_dataframe() +""" import io import pandas as pd -# INDIVIDUAL RESPONSES IT GETS FROM CALLING client.get_label_groups_for_study() +# Individual responses it gets from calling client.get_label_groups_for_study() individual_study_responses = [ { "id": "study1_id", @@ -45,6 +51,7 @@ }, ] +# The expected result from client.get_label_groups_for_studies() expected_seerpy_response = [ { "id": "study1_id", @@ -96,4 +103,5 @@ study2_labelgroup2_id,study2_labelgroup2_name,study2_labelgroup2_description,default,202,study2_id,study2_name """ +# The expected result from client.get_label_groups_for_studies_dataframe() expected_seerpy_df = pd.read_csv(io.StringIO(csv)) diff --git a/tests/test_data/label_groups_for_study.py b/tests/test_data/label_groups_for_study.py index 122a0fa..2938120 100644 --- a/tests/test_data/label_groups_for_study.py +++ b/tests/test_data/label_groups_for_study.py @@ -1,5 +1,12 @@ +""" +Data for mocking intermediate function calls, as well as expected return values +when testing the following function: +- client.get_label_groups_for_study() +""" -# RAW RESPONSES IT GETS FROM GRAPH QL QUERY +# Mocked paginated responses on each subsequent call of client.execute_query() +# within the client.get_paginated_response() function that gets called by +# client.get_label_groups_for_study() raw_paginated_responses = [ { "study": { @@ -40,6 +47,7 @@ }, ] +# Expected return value when calling client.get_label_groups_for_study() expected_seerpy_response = { "id": "study1_id", "name": "study1_name", From 38c20f0cfca3d5aeecb4b1aa2532716a7011f534 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Mon, 11 Oct 2021 16:49:25 +1100 Subject: [PATCH 07/13] FEAT: restore changes i made to other tests --- tests/test_seerpy.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_seerpy.py b/tests/test_seerpy.py index 171ca95..ca1a200 100644 --- a/tests/test_seerpy.py +++ b/tests/test_seerpy.py @@ -74,7 +74,7 @@ def test_query_variables_are_passed_on_initial_failure(self, gql_client, unused_ class TestPaginatedQuery: @classmethod def check_paginated_query(cls, seer_connect, function_to_test, function_args=None, - function_kwargs=None, expected_result=None, query_response=None, iteration_path=None): + function_kwargs=None, expected_result=None, query_response=None): """Generic function to test a function which calls Seer_connect.get_paginated_response().""" # run test with mock.patch.object(SeerConnect, 'get_paginated_response', @@ -104,10 +104,9 @@ def check_paginated_query(cls, seer_connect, function_to_test, function_args=Non assert not missing_path_items, ( f'object path item(s) {missing_path_items} not found in query string {query_string}') - iteration_path = iteration_path + iteration_path = None if len(paginate.call_args[0]) > 4: iteration_path = paginate.call_args[0][4] - print(f"iteration path: {iteration_path}") missing_path_items = [ path_item for path_item in iteration_path if path_item not in query_string ] From e6ba6ee3f1a374e009c30dd7dc01735fd18eede1 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Tue, 12 Oct 2021 13:11:49 +1100 Subject: [PATCH 08/13] STYLE: indent graphql query --- seerpy/graphql.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/seerpy/graphql.py b/seerpy/graphql.py index 75d8d65..16f2d27 100644 --- a/seerpy/graphql.py +++ b/seerpy/graphql.py @@ -196,19 +196,19 @@ def get_string_from_list_of_dicts(list_of_dicts): GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED = """ query getStudyLabelGroups( - $study_id: String!, - $limit: PaginationAmount, - $offset: Int + $study_id: String!, + $limit: PaginationAmount, + $offset: Int ) { study(id: $study_id) { - id - name - labelGroups(limit: $limit, offset: $offset) { - id - name - description - labelType - numberOfLabels + id + name + labelGroups(limit: $limit, offset: $offset) { + id + name + description + labelType + numberOfLabels } } }""" From 9882e164ab3ca1d0fa705db2d73920c99aed1a38 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Tue, 12 Oct 2021 13:30:01 +1100 Subject: [PATCH 09/13] STYLE: imports from same file on same line --- tests/test_seerpy.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_seerpy.py b/tests/test_seerpy.py index ca1a200..54ed861 100644 --- a/tests/test_seerpy.py +++ b/tests/test_seerpy.py @@ -11,8 +11,7 @@ from seerpy.seerpy import SeerConnect import seerpy.graphql as graphql -from tests.test_data import label_groups_for_study -from tests.test_data import label_groups_for_studies +from tests.test_data import label_groups_for_study, label_groups_for_studies # having a class is useful to allow patches to be shared across mutliple test functions, but then # pylint complains that the methods could be a function. this disables that warning. @@ -871,4 +870,3 @@ def test_get_label_groups_for_studies_dataframe(self, gql_client, unused_sleep, mock_stdy_labelgroups.side_effect = label_groups_for_studies.individual_study_responses response = seer_connect.get_label_groups_for_studies_dataframe(["study1","study2"]) assert label_groups_for_studies.expected_seerpy_df.equals(response) - print(response) From 220897320ef67bd69ed3577615c6c103dff58590 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Tue, 12 Oct 2021 13:30:20 +1100 Subject: [PATCH 10/13] STYLE: redundant newline --- seerpy/graphql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/seerpy/graphql.py b/seerpy/graphql.py index 16f2d27..6c7a295 100644 --- a/seerpy/graphql.py +++ b/seerpy/graphql.py @@ -214,7 +214,6 @@ def get_string_from_list_of_dicts(list_of_dicts): }""" - def get_channel_groups_query_string(study_id): return """ query { From 6a97c4689303b857fac795df89cb12bd4ceecb0e Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Tue, 12 Oct 2021 13:30:40 +1100 Subject: [PATCH 11/13] STYLE: missing newline --- tests/test_data/label_groups_for_study.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_data/label_groups_for_study.py b/tests/test_data/label_groups_for_study.py index 2938120..bfe6fba 100644 --- a/tests/test_data/label_groups_for_study.py +++ b/tests/test_data/label_groups_for_study.py @@ -67,4 +67,4 @@ "numberOfLabels": 5, } ] -} \ No newline at end of file +} From 21c711c4813138519e2453730e5579bbefa1e401 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Tue, 12 Oct 2021 13:31:45 +1100 Subject: [PATCH 12/13] CHORE: bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 09519f4..77197d6 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name='seerpy', - version='0.6.2', + version='0.6.3', description='Seer Platform SDK for Python', long_description=open('README.md').read(), url='https://github.com/seermedical/seer-py', From ac4f2ae63492daefc19c802b1b0a0b1edba66a73 Mon Sep 17 00:00:00 2001 From: Ronny Restrepo Date: Tue, 12 Oct 2021 13:36:17 +1100 Subject: [PATCH 13/13] FEAT: set limit at labelgroup level --- seerpy/seerpy.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/seerpy/seerpy.py b/seerpy/seerpy.py index ebb58ad..322ace2 100644 --- a/seerpy/seerpy.py +++ b/seerpy/seerpy.py @@ -957,7 +957,7 @@ def get_label_groups_for_study(self, study_id, limit=50): ) return results - def get_label_groups_for_studies(self, study_ids, limit=50, labelgroup_limit=50): + def get_label_groups_for_studies(self, study_ids, limit=50): """Get label group information for all provided study IDs. Parameters @@ -965,8 +965,6 @@ def get_label_groups_for_studies(self, study_ids, limit=50, labelgroup_limit=50) study_ids : str or list of str One or more unique IDs, each identifying a study limit : int, optional - Batch size for repeated API calls - labelgroup_limit: int, optional Batch size for paginating at the label groups level. Returns @@ -979,11 +977,11 @@ def get_label_groups_for_studies(self, study_ids, limit=50, labelgroup_limit=50) study_ids = [study_ids] results = [] for study_id in study_ids: - _results = self.get_label_groups_for_study(study_id, limit=labelgroup_limit) + _results = self.get_label_groups_for_study(study_id, limit=limit) results.append(_results) return results - def get_label_groups_for_studies_dataframe(self, study_ids, labelgroup_limit=100, limit=50): + def get_label_groups_for_studies_dataframe(self, study_ids, limit=50): """Get label group information for all provided study IDs as a DataFrame. See `get_label_groups_for_studies()` for details. @@ -992,8 +990,6 @@ def get_label_groups_for_studies_dataframe(self, study_ids, labelgroup_limit=100 study_ids : str or list of str One or more unique IDs, each identifying a study limit : int, optional - Batch size for repeated API calls - labelgroup_limit: int, optional Batch size for paginating at the label groups level. Returns @@ -1002,9 +998,7 @@ def get_label_groups_for_studies_dataframe(self, study_ids, labelgroup_limit=100 Columns with details on name, id, type, number of labels, study ID and name """ label_groups = [] - for study in self.get_label_groups_for_studies(study_ids, - limit=limit, - labelgroup_limit=labelgroup_limit): + for study in self.get_label_groups_for_studies(study_ids, limit=limit): for label_group in study['labelGroups']: label_group['labelGroup.id'] = label_group.pop('id') label_group['labelGroup.name'] = label_group.pop('name')