diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 96f465290..280bdc0df 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -8,7 +8,7 @@ from collections import namedtuple from elasticsearch.exceptions import AuthorizationException -from typing import Optional, Dict, List +from typing import Dict, List, Optional from urllib.parse import parse_qs, urlencode, urlparse, urlunparse from . import s3_utils, es_utils from .common import ( @@ -17,7 +17,7 @@ # S3BucketName, S3KeyName, ) from .lang_utils import disjoined_list -from .misc_utils import PRINT, to_camel_case, remove_suffix +from .misc_utils import PRINT, to_camel_case, remove_suffix, VirtualApp # TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP. @@ -419,7 +419,7 @@ def search_result_generator(page_generator): but where a page size of 3 is used with start position 0. That call will return A,C,E. The user may expect G,I on the second page, but before it can be done, suppose an element D is indexed and that the stored data is A,C,D,E,G,I,K,M. Requesting data from start position 0 would - now return A,C,D but we already had the first page, so we request data starting at position 3 + now return A,C,D, but we already had the first page, so we request data starting at position 3 for the second page and get E,G,I. That means our sequence of return values would be A,C,E,E,G,I,K,M, or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't @@ -647,7 +647,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil include_raw_files=False, include_supplementary_files=False): """ - Given a uuid of an experimentSet return a dictionary of dictionaries with each dictionary + Given a UUID of an experimentSet return a dictionary of dictionaries with each dictionary representing a quality metric. Args: @@ -942,32 +942,75 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth): yield hit['_source'] # yield individual items from ES -def get_schema(name, key=None, ff_env=None) -> Dict: +def resolve_portal_env(ff_env: Optional[str], portal_env: Optional[str], + portal_vapp: Optional[VirtualApp]) -> Optional[str]: + """ + Resolves which of ff_env and portal_env to use (after doing consistency checking). + There are two consistency checks performed, for which an error is raised on failure: + 1. If neither ff_env= and portal_env= is None, the values must be compatible. + 2. If either ff_env= or portal_env= is not None, portal_vapp= must be None. + + The intent is that callers will do: + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + and then afterward not have to worry that arguments are inconsistent. + + Args: + ff_env: an environment name or None + portal_env: an environment name or None + portal_vapp: a VirtualApp or None + """ + if ff_env: + if portal_env and portal_env != ff_env: + raise ValueError("You may not supply both portal_env= and ff_env= together.") + portal_env = ff_env + if portal_env and portal_vapp: + env_arg_name = 'ff_env=' if ff_env else 'portal_env=' + raise ValueError(f"You may not supply both portal_vapp= and {env_arg_name} together.") + return portal_env + + +def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optional[str] = None, + portal_vapp: Optional[VirtualApp] = None) -> Dict: """ Gets the schema definition with the given name. + Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. + Args: name (str): a schema name (CamelCase or snake_case), or None key (dict): standard ff_utils authentication key - ff_env (str): standard ff environment string + ff_env (str): standard environment string (deprecated, please prefer portal_env=) + portal_env: standard environment string (compatible replacement for ff_env=) + portal_vapp: a VirtualApp or None Returns: dict: contains key schema names and value item class names """ - auth = get_authentication_with_server(key, ff_env) - url = f"profiles/{to_camel_case(name)}.json" - schema = get_metadata(url, key=auth, add_on='frame=raw') - return schema + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + base_url = f"profiles/{to_camel_case(name)}.json" + add_on = 'frame=raw' + if portal_vapp: + full_url = f"{base_url}?{add_on}" + res = portal_vapp.get(full_url) + return get_response_json(res) + else: + schema = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on) + return schema -def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) -> Dict[str, Dict]: +def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool = True, require_id: bool = False, + portal_env: Optional[str] = None, portal_vapp: Optional[VirtualApp] = None) -> Dict[str, Dict]: """ Gets a dictionary of all schema definitions. By default, this returns all schemas, but the allow_abstract= and require_id= keywords allow limited filtering. + Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. + Args: key (dict): standard ff_utils authentication key - ff_env (str): standard ff environment string + ff_env (str): standard environment string (deprecated, please prefer portal_env=) + portal_env: standard environment string (compatible replacement for ff_env=) + portal_vapp: a VirtualApp or None allow_abstract (boolean): controls whether abstract schemas can be returned (default True, return them) require_id (boolean): controls whether a '$id' field is required for schema to be included (default False, include even if no $id) @@ -975,8 +1018,14 @@ def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) Returns: dict: a mapping from keys that are schema names to schema definitions """ - auth = get_authentication_with_server(key, ff_env) - schemas: Dict[str, Dict] = get_metadata('profiles/', key=auth, add_on='frame=raw') + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + base_url = 'profiles/' + add_on = 'frame=raw' + if portal_vapp: + full_url = f"{base_url}?{add_on}" + schemas: Dict[str, Dict] = portal_vapp.get(full_url) + else: + schemas: Dict[str, Dict] = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on) filtered_schemas = {} for schema_name, schema in schemas.items(): if allow_abstract or not schema.get('isAbstract'): diff --git a/test/test_ff_utils.py b/test/test_ff_utils.py index 7d4254d7a..16413a519 100644 --- a/test/test_ff_utils.py +++ b/test/test_ff_utils.py @@ -1316,6 +1316,111 @@ def get_it(): time.sleep(2) +@pytest.mark.unit +def test_get_schema_with_vapp(): + + sample_vapp = mock.MagicMock() + sample_schema_metadata = {"foo": "foo-schema", "bar": "bar-schema"} + sample_auth = mock.MagicMock() + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', ff_env='foo', portal_env='bar') + assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', ff_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', portal_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' + + for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + expected_env = list(env_args.items())[0][1] if env_args else None + + mock_get_metadata.return_value = sample_schema_metadata + mock_get_authentication_with_server.return_value = sample_auth + + # When called with no vapp, get_metadata is consulted (after getting auth info) + assert ff_utils.get_schema('User', **env_args) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_called_once_with(obj_id='profiles/User.json', key=None, ff_env=expected_env, + add_on='frame=raw') + + sample_vapp.get.assert_not_called() + + sample_vapp.get.assert_not_called() + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + sample_vapp.get.return_value = MockResponse(200, json=sample_schema_metadata) + + assert ff_utils.get_schema('User', portal_vapp=sample_vapp) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_not_called() + + sample_vapp.get.assert_called_once_with('profiles/User.json?frame=raw') + + +@pytest.mark.unit +def test_get_schemas_with_vapp(): + + sample_vapp = mock.MagicMock() + sample_schema_metadata = {"foo": {"$id": "Foo.json"}, "bar": {"$id": "Bar.json"}} + sample_auth = mock.MagicMock() + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(ff_env='foo', portal_env='bar') + assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(ff_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(portal_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' + + for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + expected_env = list(env_args.items())[0][1] if env_args else None + + mock_get_metadata.return_value = sample_schema_metadata + mock_get_authentication_with_server.return_value = sample_auth + + assert ff_utils.get_schemas(**env_args) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_called_once_with(obj_id='profiles/', key=None, ff_env=expected_env, + add_on='frame=raw') + + sample_vapp.get.assert_not_called() + + sample_vapp.get.assert_not_called() + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + sample_vapp.get.return_value = sample_schema_metadata + + assert ff_utils.get_schemas(portal_vapp=sample_vapp) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_not_called() + + sample_vapp.get.assert_called_once_with('profiles/?frame=raw') + + def test_get_schemas_options(): mocked_schemas = { @@ -1350,9 +1455,10 @@ def mocked_schemas_subset(keys): with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - def mocked_get_metadata(url, key, add_on): - assert url == "profiles/" # this is the web API to ask for all profiles - assert key == 'some-auth' # we assume auth is tested elsewhere + def mocked_get_metadata(obj_id, key, ff_env, add_on): + assert obj_id == "profiles/" # this is the web API to ask for all profiles + assert key is None # it would get looked up + assert ff_env is None # it would get looked up, too assert add_on == "frame=raw" # we presently always send this return mocked_schemas