Skip to content

Commit

Permalink
Merge pull request #149 from seermedical/fix-labelgroups
Browse files Browse the repository at this point in the history
FIX: only returning subset of labelgroups for study
  • Loading branch information
ronrest authored Oct 12, 2021
2 parents 2c52d8f + ac4f2ae commit d78eb10
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 33 deletions.
27 changes: 15 additions & 12 deletions seerpy/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,21 +193,24 @@ def get_string_from_list_of_dicts(list_of_dicts):
}
}"""

GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED = """
query studies($study_ids: [String],
$limit: PaginationAmount,
$offset: Int) {
studies (studyIds: $study_ids, limit: $limit, offset: $offset) {

GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED = """
query getStudyLabelGroups(
$study_id: String!,
$limit: PaginationAmount,
$offset: Int
) {
study(id: $study_id) {
id
name
labelGroups {
id
name
description
labelType
numberOfLabels
}
labelGroups(limit: $limit, offset: $offset) {
id
name
description
labelType
numberOfLabels
}
}
}"""


Expand Down
54 changes: 41 additions & 13 deletions seerpy/seerpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,43 +934,71 @@ def get_labels_string_dataframe(self, study_id, label_group_id, from_time=0, to_
'labelString.s': 'labels.startTime'
})
return label_group

def get_label_groups_for_study(self, study_id, limit=50):
"""Given a study_id, it returns all the labelgroups.
def get_label_groups_for_studies(self, study_ids, limit=50):
Parameters
----------
study_id : str
limit : int, optional
Batch size for repeated API calls
Returns
-------
label_groups : dict
Keys included: 'id', 'labelGroups' and 'name'
"""
Get label group information for all provided study IDs.
results = self.get_paginated_response(graphql.GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED,
variable_values=dict(study_id=study_id),
limit=limit,
object_path=["study"],
iteration_path=["labelGroups"],
)
return results

def get_label_groups_for_studies(self, study_ids, limit=50):
"""Get label group information for all provided study IDs.
Parameters
----------
study_ids : str or list of str
One or more unique IDs, each identifying a study
limit : int, optional
Batch size for repeated API calls
Batch size for paginating at the label groups level.
Returns
-------
label_groups : list of dict
Keys included: 'id', 'labelGroups' and 'name'
"""
if isinstance(study_ids, str):
study_ids = [study_ids]

variable_values = {'study_ids': study_ids}
return self.get_paginated_response(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED,
variable_values, limit, ['studies'])

results = []
for study_id in study_ids:
_results = self.get_label_groups_for_study(study_id, limit=limit)
results.append(_results)
return results

def get_label_groups_for_studies_dataframe(self, study_ids, limit=50):
"""
Get label group information for all provided study IDs as a DataFrame. See
`get_label_groups_for_studies()` for details.
"""Get label group information for all provided study IDs as a DataFrame.
See `get_label_groups_for_studies()` for details.
Parameters
----------
study_ids : str or list of str
One or more unique IDs, each identifying a study
limit : int, optional
Batch size for paginating at the label groups level.
Returns
-------
label_groups_df : pd.DataFrame
Columns with details on name, id, type, number of labels, study ID and name
"""
# TODO: can we use json_normalize or pandas_flatten for this?
label_groups = []
for study in self.get_label_groups_for_studies(study_ids, limit):
for study in self.get_label_groups_for_studies(study_ids, limit=limit):
for label_group in study['labelGroups']:
label_group['labelGroup.id'] = label_group.pop('id')
label_group['labelGroup.name'] = label_group.pop('name')
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name='seerpy',
version='0.6.2',
version='0.6.3',
description='Seer Platform SDK for Python',
long_description=open('README.md').read(),
url='https://github.com/seermedical/seer-py',
Expand Down
Empty file added tests/test_data/__init__.py
Empty file.
107 changes: 107 additions & 0 deletions tests/test_data/label_groups_for_studies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Data for mocking intermediate function calls, as well as expected return values
when testing the following functions:
- client.get_label_groups_for_studies()
- client.get_label_groups_for_studies_dataframe()
"""
import io
import pandas as pd

# Individual responses it gets from calling client.get_label_groups_for_study()
individual_study_responses = [
{
"id": "study1_id",
"name": "study1_name",
"labelGroups": [
{
"name": "study1_labelgroup1_name",
"description": "study1_labelgroup1_description",
"id": "study1_labelgroup1_id",
"labelType": "default",
"numberOfLabels": 101,
},
{
"name": "study1_labelgroup2_name",
"description": "study1_labelgroup2_description",
"id": "study1_labelgroup2_id",
"labelType": "default",
"numberOfLabels": 102,
}
]
},
{
"id": "study2_id",
"name": "study2_name",
"labelGroups": [
{
"name": "study2_labelgroup1_name",
"description": "study2_labelgroup1_description",
"id": "study2_labelgroup1_id",
"labelType": "default",
"numberOfLabels": 201,
},
{
"name": "study2_labelgroup2_name",
"description": "study2_labelgroup2_description",
"id": "study2_labelgroup2_id",
"labelType": "default",
"numberOfLabels": 202,
}
]
},
]

# The expected result from client.get_label_groups_for_studies()
expected_seerpy_response = [
{
"id": "study1_id",
"name": "study1_name",
"labelGroups": [
{
"name": "study1_labelgroup1_name",
"description": "study1_labelgroup1_description",
"id": "study1_labelgroup1_id",
"labelType": "default",
"numberOfLabels": 101,
},
{
"name": "study1_labelgroup2_name",
"description": "study1_labelgroup2_description",
"id": "study1_labelgroup2_id",
"labelType": "default",
"numberOfLabels": 102,
}
]
},
{
"id": "study2_id",
"name": "study2_name",
"labelGroups": [
{
"name": "study2_labelgroup1_name",
"description": "study2_labelgroup1_description",
"id": "study2_labelgroup1_id",
"labelType": "default",
"numberOfLabels": 201,
},
{
"name": "study2_labelgroup2_name",
"description": "study2_labelgroup2_description",
"id": "study2_labelgroup2_id",
"labelType": "default",
"numberOfLabels": 202,
}
]
},
]

csv = """
labelGroup.id,labelGroup.name,labelGroup.description,labelGroup.labelType,labelGroup.numberOfLabels,id,name
study1_labelgroup1_id,study1_labelgroup1_name,study1_labelgroup1_description,default,101,study1_id,study1_name
study1_labelgroup2_id,study1_labelgroup2_name,study1_labelgroup2_description,default,102,study1_id,study1_name
study2_labelgroup1_id,study2_labelgroup1_name,study2_labelgroup1_description,default,201,study2_id,study2_name
study2_labelgroup2_id,study2_labelgroup2_name,study2_labelgroup2_description,default,202,study2_id,study2_name
"""

# The expected result from client.get_label_groups_for_studies_dataframe()
expected_seerpy_df = pd.read_csv(io.StringIO(csv))
70 changes: 70 additions & 0 deletions tests/test_data/label_groups_for_study.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
Data for mocking intermediate function calls, as well as expected return values
when testing the following function:
- client.get_label_groups_for_study()
"""

# Mocked paginated responses on each subsequent call of client.execute_query()
# within the client.get_paginated_response() function that gets called by
# client.get_label_groups_for_study()
raw_paginated_responses = [
{
"study": {
"id": "study1_id",
"name": "study1_name",
"labelGroups": [
{
"name": "labelgroup1_name",
"description": "labelgroup1_description",
"id": "labelgroup1_id",
"labelType": "default",
"numberOfLabels": 2,
},
]
}
},
{
"study": {
"id": "study1_id",
"name": "study1_name",
"labelGroups": [
{
"name": "labelgroup2_name",
"description": "labelgroup2_description",
"id": "labelgroup2_id",
"labelType": "default",
"numberOfLabels": 5,
}
]
}
},
{
"study": {
"id": "study1_id",
"name": "study1_name",
"labelGroups": []
}
},
]

# Expected return value when calling client.get_label_groups_for_study()
expected_seerpy_response = {
"id": "study1_id",
"name": "study1_name",
"labelGroups": [
{
"name": "labelgroup1_name",
"description": "labelgroup1_description",
"id": "labelgroup1_id",
"labelType": "default",
"numberOfLabels": 2,
},
{
"name": "labelgroup2_name",
"description": "labelgroup2_description",
"id": "labelgroup2_id",
"labelType": "default",
"numberOfLabels": 5,
}
]
}
2 changes: 1 addition & 1 deletion tests/test_graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def test_graphql_query_string():
gql(graphql.GET_STUDY_WITH_DATA)
gql(graphql.GET_LABELS_PAGED)
gql(graphql.GET_LABELS_STRING)
gql(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED)
gql(graphql.GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED)
gql(graphql.GET_STUDIES_BY_SEARCH_TERM_PAGED)
gql(graphql.GET_STUDIES_BY_STUDY_ID_PAGED)
gql(graphql.ADD_LABELS)
Expand Down
32 changes: 26 additions & 6 deletions tests/test_seerpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from seerpy.seerpy import SeerConnect
import seerpy.graphql as graphql

from tests.test_data import label_groups_for_study, label_groups_for_studies

# having a class is useful to allow patches to be shared across mutliple test functions, but then
# pylint complains that the methods could be a function. this disables that warning.
# pylint:disable=no-self-use
Expand Down Expand Up @@ -187,12 +189,6 @@ def test_get_studies_by_id(self, gql_client, unused_sleep, seer_connect):
function_args=[['study-1-id', 'study-2-id']], response_file='studies.json',
empty_response={'studies': []})

def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect):
# run test and check result
self.check_paginated_query_with_data_variations(
gql_client, seer_connect, function_to_test=seer_connect.get_label_groups_for_studies,
function_args=[['study-1-id', 'study-2-id']], response_file='label_groups.json',
empty_response={'studies': []})

def test_get_documents_for_studies(self, gql_client, unused_sleep, seer_connect):
# run test and check result
Expand Down Expand Up @@ -850,3 +846,27 @@ def test_empty(self, gql_client, unused_sleep, seer_connect):

# check result
assert result == expected_result


@mock.patch('time.sleep', return_value=None)
@mock.patch('seerpy.seerpy.GQLClient', autospec=True)
class TestLabelGroups:
def test_get_label_groups_for_study(self, gql_client, unused_sleep, seer_connect):
raw_paginated_responses = label_groups_for_study.raw_paginated_responses
expected_seerpy_response = label_groups_for_study.expected_seerpy_response

gql_client.return_value.execute.side_effect = raw_paginated_responses
response = seer_connect.get_label_groups_for_study("study1")
assert response == expected_seerpy_response

def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect):
with mock.patch.object(seer_connect, "get_label_groups_for_study") as mock_stdy_labelgroups:
mock_stdy_labelgroups.side_effect = label_groups_for_studies.individual_study_responses
response = seer_connect.get_label_groups_for_studies(["study1","study2"])
assert label_groups_for_studies.expected_seerpy_response == response

def test_get_label_groups_for_studies_dataframe(self, gql_client, unused_sleep, seer_connect):
with mock.patch.object(seer_connect, "get_label_groups_for_study") as mock_stdy_labelgroups:
mock_stdy_labelgroups.side_effect = label_groups_for_studies.individual_study_responses
response = seer_connect.get_label_groups_for_studies_dataframe(["study1","study2"])
assert label_groups_for_studies.expected_seerpy_df.equals(response)

0 comments on commit d78eb10

Please sign in to comment.