Merge pull request #149 from seermedical/fix-labelgroups

FIX: only returning subset of labelgroups for study
seermedical · Oct 12, 2021 · d78eb10 · d78eb10
2 parents 2c52d8f + ac4f2ae
commit d78eb10
Show file tree

Hide file tree

Showing 8 changed files with 261 additions and 33 deletions.
diff --git a/seerpy/graphql.py b/seerpy/graphql.py
@@ -193,21 +193,24 @@ def get_string_from_list_of_dicts(list_of_dicts):
         }
     }"""
 
-GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED = """
-    query studies($study_ids: [String],
-                  $limit: PaginationAmount,
-                  $offset: Int) {
-        studies (studyIds: $study_ids, limit: $limit, offset: $offset) {
+
+GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED = """
+    query getStudyLabelGroups(
+        $study_id: String!,
+        $limit: PaginationAmount,
+        $offset: Int
+    ) {
+        study(id: $study_id) {
             id
             name
-            labelGroups {
-                id
-                name
-                description
-                labelType
-                numberOfLabels
-            }
+            labelGroups(limit: $limit, offset: $offset) {
+            id
+            name
+            description
+            labelType
+            numberOfLabels
         }
+    }
     }"""
 
 

diff --git a/seerpy/seerpy.py b/seerpy/seerpy.py
@@ -934,43 +934,71 @@ def get_labels_string_dataframe(self, study_id, label_group_id, from_time=0, to_
                 'labelString.s': 'labels.startTime'
             })
         return label_group
+
+    def get_label_groups_for_study(self, study_id, limit=50):
+        """Given a study_id, it returns all the labelgroups.
 
-    def get_label_groups_for_studies(self, study_ids, limit=50):
+        Parameters
+        ----------
+        study_id : str
+        limit : int, optional
+            Batch size for repeated API calls
+
+        Returns
+        -------
+        label_groups : dict
+            Keys included: 'id', 'labelGroups' and 'name'
         """
-        Get label group information for all provided study IDs.
+        results = self.get_paginated_response(graphql.GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED,
+                                              variable_values=dict(study_id=study_id),
+                                              limit=limit,
+                                              object_path=["study"],
+                                              iteration_path=["labelGroups"],
+                                              )
+        return results
 
+    def get_label_groups_for_studies(self, study_ids, limit=50):
+        """Get label group information for all provided study IDs.
+ 
         Parameters
         ----------
         study_ids : str or list of str
             One or more unique IDs, each identifying a study
         limit : int, optional
-            Batch size for repeated API calls
+            Batch size for paginating at the label groups level.
 
         Returns
         -------
         label_groups : list of dict
             Keys included: 'id', 'labelGroups' and 'name'
+
         """
         if isinstance(study_ids, str):
             study_ids = [study_ids]
-
-        variable_values = {'study_ids': study_ids}
-        return self.get_paginated_response(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED,
-                                           variable_values, limit, ['studies'])
-
+        results = []
+        for study_id in study_ids: 
+            _results = self.get_label_groups_for_study(study_id, limit=limit)
+            results.append(_results)
+        return results
+
     def get_label_groups_for_studies_dataframe(self, study_ids, limit=50):
-        """
-        Get label group information for all provided study IDs as a DataFrame. See
-        `get_label_groups_for_studies()` for details.
+        """Get label group information for all provided study IDs as a DataFrame. 
+        See `get_label_groups_for_studies()` for details.
+        
+        Parameters
+        ----------
+        study_ids : str or list of str
+            One or more unique IDs, each identifying a study
+        limit : int, optional
+            Batch size for paginating at the label groups level.
 
         Returns
         -------
         label_groups_df : pd.DataFrame
             Columns with details on name, id, type, number of labels, study ID and name
         """
-        # TODO: can we use json_normalize or pandas_flatten for this?
         label_groups = []
-        for study in self.get_label_groups_for_studies(study_ids, limit):
+        for study in self.get_label_groups_for_studies(study_ids, limit=limit):
             for label_group in study['labelGroups']:
                 label_group['labelGroup.id'] = label_group.pop('id')
                 label_group['labelGroup.name'] = label_group.pop('name')

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name='seerpy',
-    version='0.6.2',
+    version='0.6.3',
     description='Seer Platform SDK for Python',
     long_description=open('README.md').read(),
     url='https://github.com/seermedical/seer-py',

diff --git a/tests/test_data/__init__.py b/tests/test_data/__init__.py
diff --git a/tests/test_data/label_groups_for_studies.py b/tests/test_data/label_groups_for_studies.py
@@ -0,0 +1,107 @@
+"""
+Data for mocking intermediate function calls, as well as expected return values 
+when testing the following functions: 
+- client.get_label_groups_for_studies()
+- client.get_label_groups_for_studies_dataframe()
+"""
+import io
+import pandas as pd
+
+# Individual responses it gets from calling client.get_label_groups_for_study()
+individual_study_responses = [
+    {
+        "id": "study1_id",
+        "name": "study1_name",
+        "labelGroups": [
+            {
+                "name": "study1_labelgroup1_name",
+                "description": "study1_labelgroup1_description",
+                "id": "study1_labelgroup1_id",
+                "labelType": "default",
+                "numberOfLabels": 101,
+            },
+            {
+                "name": "study1_labelgroup2_name",
+                "description": "study1_labelgroup2_description",
+                "id": "study1_labelgroup2_id",
+                "labelType": "default",
+                "numberOfLabels": 102,
+            }
+        ]
+    },
+    {
+        "id": "study2_id",
+        "name": "study2_name",
+        "labelGroups": [
+            {
+                "name": "study2_labelgroup1_name",
+                "description": "study2_labelgroup1_description",
+                "id": "study2_labelgroup1_id",
+                "labelType": "default",
+                "numberOfLabels": 201,
+            },
+            {
+                "name": "study2_labelgroup2_name",
+                "description": "study2_labelgroup2_description",
+                "id": "study2_labelgroup2_id",
+                "labelType": "default",
+                "numberOfLabels": 202,
+            }
+        ]
+    },
+]
+
+# The expected result from client.get_label_groups_for_studies()
+expected_seerpy_response = [
+    {
+        "id": "study1_id",
+        "name": "study1_name",
+        "labelGroups": [
+            {
+                "name": "study1_labelgroup1_name",
+                "description": "study1_labelgroup1_description",
+                "id": "study1_labelgroup1_id",
+                "labelType": "default",
+                "numberOfLabels": 101,
+            },
+            {
+                "name": "study1_labelgroup2_name",
+                "description": "study1_labelgroup2_description",
+                "id": "study1_labelgroup2_id",
+                "labelType": "default",
+                "numberOfLabels": 102,
+            }
+        ]
+    },
+    {
+        "id": "study2_id",
+        "name": "study2_name",
+        "labelGroups": [
+            {
+                "name": "study2_labelgroup1_name",
+                "description": "study2_labelgroup1_description",
+                "id": "study2_labelgroup1_id",
+                "labelType": "default",
+                "numberOfLabels": 201,
+            },
+            {
+                "name": "study2_labelgroup2_name",
+                "description": "study2_labelgroup2_description",
+                "id": "study2_labelgroup2_id",
+                "labelType": "default",
+                "numberOfLabels": 202,
+            }
+        ]
+    },
+]
+
+csv = """
+labelGroup.id,labelGroup.name,labelGroup.description,labelGroup.labelType,labelGroup.numberOfLabels,id,name
+study1_labelgroup1_id,study1_labelgroup1_name,study1_labelgroup1_description,default,101,study1_id,study1_name
+study1_labelgroup2_id,study1_labelgroup2_name,study1_labelgroup2_description,default,102,study1_id,study1_name
+study2_labelgroup1_id,study2_labelgroup1_name,study2_labelgroup1_description,default,201,study2_id,study2_name
+study2_labelgroup2_id,study2_labelgroup2_name,study2_labelgroup2_description,default,202,study2_id,study2_name
+"""
+
+# The expected result from client.get_label_groups_for_studies_dataframe()
+expected_seerpy_df = pd.read_csv(io.StringIO(csv))
diff --git a/tests/test_data/label_groups_for_study.py b/tests/test_data/label_groups_for_study.py
@@ -0,0 +1,70 @@
+"""
+Data for mocking intermediate function calls, as well as expected return values 
+when testing the following function: 
+- client.get_label_groups_for_study()
+"""
+
+# Mocked paginated responses on each subsequent call of client.execute_query()
+# within the client.get_paginated_response() function that gets called by 
+# client.get_label_groups_for_study()
+raw_paginated_responses = [
+    {
+        "study": {
+            "id": "study1_id",
+            "name": "study1_name",
+            "labelGroups": [
+                {
+                    "name": "labelgroup1_name",
+                    "description": "labelgroup1_description",
+                    "id": "labelgroup1_id",
+                    "labelType": "default",
+                    "numberOfLabels": 2,
+                },
+            ]
+        }
+    },
+    {
+        "study": {
+            "id": "study1_id",
+            "name": "study1_name",
+            "labelGroups": [
+                {
+                    "name": "labelgroup2_name",
+                    "description": "labelgroup2_description",
+                    "id": "labelgroup2_id",
+                    "labelType": "default",
+                    "numberOfLabels": 5,
+                }
+            ]
+        }
+    },
+    {
+        "study": {
+            "id": "study1_id",
+            "name": "study1_name",
+            "labelGroups": []
+        }
+    },
+]
+
+# Expected return value when calling client.get_label_groups_for_study()
+expected_seerpy_response = {
+    "id": "study1_id",
+    "name": "study1_name",
+    "labelGroups": [
+        {
+            "name": "labelgroup1_name",
+            "description": "labelgroup1_description",
+            "id": "labelgroup1_id",
+            "labelType": "default",
+            "numberOfLabels": 2,
+        },
+        {
+            "name": "labelgroup2_name",
+            "description": "labelgroup2_description",
+            "id": "labelgroup2_id",
+            "labelType": "default",
+            "numberOfLabels": 5,
+        }
+    ]
+}
diff --git a/tests/test_graphql.py b/tests/test_graphql.py
@@ -13,7 +13,7 @@ def test_graphql_query_string():
     gql(graphql.GET_STUDY_WITH_DATA)
     gql(graphql.GET_LABELS_PAGED)
     gql(graphql.GET_LABELS_STRING)
-    gql(graphql.GET_LABEL_GROUPS_FOR_STUDY_IDS_PAGED)
+    gql(graphql.GET_ALL_LABEL_GROUPS_FOR_STUDY_ID_PAGED)
     gql(graphql.GET_STUDIES_BY_SEARCH_TERM_PAGED)
     gql(graphql.GET_STUDIES_BY_STUDY_ID_PAGED)
     gql(graphql.ADD_LABELS)

diff --git a/tests/test_seerpy.py b/tests/test_seerpy.py
@@ -11,6 +11,8 @@
 from seerpy.seerpy import SeerConnect
 import seerpy.graphql as graphql
 
+from tests.test_data import label_groups_for_study, label_groups_for_studies
+
 # having a class is useful to allow patches to be shared across mutliple test functions, but then
 # pylint complains that the methods could be a function. this disables that warning.
 # pylint:disable=no-self-use
@@ -187,12 +189,6 @@ def test_get_studies_by_id(self, gql_client, unused_sleep, seer_connect):
             function_args=[['study-1-id', 'study-2-id']], response_file='studies.json',
             empty_response={'studies': []})
 
-    def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect):
-        # run test and check result
-        self.check_paginated_query_with_data_variations(
-            gql_client, seer_connect, function_to_test=seer_connect.get_label_groups_for_studies,
-            function_args=[['study-1-id', 'study-2-id']], response_file='label_groups.json',
-            empty_response={'studies': []})
 
     def test_get_documents_for_studies(self, gql_client, unused_sleep, seer_connect):
         # run test and check result
@@ -850,3 +846,27 @@ def test_empty(self, gql_client, unused_sleep, seer_connect):
 
         # check result
         assert result == expected_result
+
+
+@mock.patch('time.sleep', return_value=None)
+@mock.patch('seerpy.seerpy.GQLClient', autospec=True)
+class TestLabelGroups:
+    def test_get_label_groups_for_study(self, gql_client, unused_sleep, seer_connect):
+        raw_paginated_responses = label_groups_for_study.raw_paginated_responses
+        expected_seerpy_response = label_groups_for_study.expected_seerpy_response
+
+        gql_client.return_value.execute.side_effect = raw_paginated_responses
+        response = seer_connect.get_label_groups_for_study("study1")
+        assert response == expected_seerpy_response
+
+    def test_get_label_groups_for_studies(self, gql_client, unused_sleep, seer_connect):
+        with mock.patch.object(seer_connect, "get_label_groups_for_study") as mock_stdy_labelgroups:
+            mock_stdy_labelgroups.side_effect = label_groups_for_studies.individual_study_responses
+            response = seer_connect.get_label_groups_for_studies(["study1","study2"])
+        assert label_groups_for_studies.expected_seerpy_response == response
+
+    def test_get_label_groups_for_studies_dataframe(self, gql_client, unused_sleep, seer_connect):
+        with mock.patch.object(seer_connect, "get_label_groups_for_study") as mock_stdy_labelgroups:
+            mock_stdy_labelgroups.side_effect = label_groups_for_studies.individual_study_responses
+            response = seer_connect.get_label_groups_for_studies_dataframe(["study1","study2"])
+        assert label_groups_for_studies.expected_seerpy_df.equals(response)