Skip to content

Commit

Permalink
Support multi valued facets and test filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
Cito committed Aug 5, 2024
1 parent 5c5b553 commit 1415082
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 10 deletions.
53 changes: 43 additions & 10 deletions src/mass/adapters/outbound/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,37 @@ def args_for_getfield(*, root_object_name: str, field_name: str) -> tuple[str, s
specified_field = pieces[-1]
prefix += "." + ".".join(pieces[:-1])

return (prefix, specified_field)
return prefix, specified_field


def pipeline_match_filters_stage(*, filters: list[models.Filter]) -> JsonObject:
"""Build segment of pipeline to apply search filters"""
segment: dict[str, dict[str, list[str]]] = defaultdict(lambda: {"$in": []})
filter_values = defaultdict(list)
for item in filters:
filter_key = "content." + str(item.key)
filter_value = item.value
segment[filter_key]["$in"].append(filter_value)

return {"$match": segment}
filter_values[item.key].append(item.value)
segment = []
for key, values in filter_values.items():
if key != "id_":
key = "content." + key
segment.append(
{
"$or": [
{
"$and": [
{key: {"$type": "string"}},
{key: {"$in": values}},
]
},
{
"$and": [
{key: {"$type": "array"}},
{key: {"$elemMatch": {"$in": values}}},
]
},
]
}
)
return {"$match": {"$and": segment}}


def pipeline_facet_sort_and_paginate(
Expand All @@ -74,8 +93,16 @@ def pipeline_facet_sort_and_paginate(
prefix, specified_field = args_for_getfield(
root_object_name="content", field_name=facet.key
)

segment[facet.name] = [
name = facet.name
if not name:
name = facet.key.capitalize()
segment[name] = [
{
"$unwind": {
"path": f"{prefix}.{specified_field}",
"preserveNullAndEmptyArrays": True,
}
},
{
"$group": {
"_id": {"$getField": {"field": specified_field, "input": prefix}},
Expand Down Expand Up @@ -116,8 +143,14 @@ def pipeline_project(*, facet_fields: list[models.FieldLabel]) -> JsonObject:

# add a segment for each facet to summarize the options
for facet in facet_fields:
key = facet.key
name = facet.name or key.capitalize()
segment["facets"].append(
{"key": facet.key, "name": facet.name, "options": f"${facet.name}"}
{
"key": key,
"name": name,
"options": f"${name}",
}
)
return {"$project": segment}

Expand Down
8 changes: 8 additions & 0 deletions tests/fixtures/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ searchable_classes:
- key: data
name: Data
selected_fields: []
FilteringTests:
description: Data for testing filtering on using single and multi-valued fields.
facetable_fields:
- key: species
- key: eats
name: Food
selected_fields:
- key: name
resource_change_event_topic: searchable_resources
resource_deletion_event_type: searchable_resource_deleted
resource_upsertion_event_type: searchable_resource_upserted
Expand Down
51 changes: 51 additions & 0 deletions tests/fixtures/test_data/FilteringTests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"items": [
{
"eats": [
"bananas"
],
"id_": "1",
"name": "Jack",
"species": "monkey"
},
{
"eats": [
"dog food",
"treats"
],
"id_": "2",
"name": "Bruiser",
"species": "dog"
},
{
"eats": [
"spaghetti",
"meatballs"
],
"id_": "3",
"name": "Lady",
"species": "dog"
},
{
"eats": [
"fish",
"lasagna",
"meatballs",
"spaghetti",
"treats"
],
"id_": "4",
"name": "Garfield",
"species": "cat"
},
{
"eats": [
"fish",
"shrimp"
],
"id_": "5",
"name": "Flipper",
"species": "dolphin"
}
]
}
159 changes: 159 additions & 0 deletions tests/test_filtering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Tests concerning the filtering functionality"""

import pytest

from tests.fixtures.joint import JointFixture, QueryParams

CLASS_NAME = "FilteringTests"


@pytest.mark.asyncio
async def test_facets(joint_fixture: JointFixture):
"""Test that the facets are returned properly"""
params: QueryParams = {"class_name": CLASS_NAME}

results = await joint_fixture.call_search_endpoint(params)

facets = results.facets
assert len(facets) == 2

facet = facets[0]
assert facet.key == "species"
assert facet.name == "Species"
options = {option.value: option.count for option in facet.options}
assert options == {"cat": 1, "dog": 2, "dolphin": 1, "monkey": 1}

facet = facets[1]
assert facet.key == "eats"
assert facet.name == "Food"
options = {option.value: option.count for option in facet.options}
assert options == {
"bananas": 1,
"dog food": 1,
"fish": 2,
"lasagna": 1,
"meatballs": 2,
"shrimp": 1,
"spaghetti": 2,
"treats": 2,
}


@pytest.mark.parametrize(
"species,names",
[("mouse", []), ("cat", ["Garfield"]), ("dog", ["Bruiser", "Lady"])],
ids=[0, 1, 2],
)
@pytest.mark.asyncio
async def test_single_valued_with_with_single_filter(
species: str, names: list[str], joint_fixture: JointFixture
):
"""Test that we can filter a single-valued field using a single value"""
params: QueryParams = {
"class_name": CLASS_NAME,
"filter_by": "species",
"value": species,
}

results = await joint_fixture.call_search_endpoint(params)

# Check that the expected names are returned
returned_names = [resource.content["name"] for resource in results.hits]
assert returned_names == names

# Check that the facet only contains the filtered values
facets = results.facets
assert len(facets) == 2
facet = facets[0]
assert facet.key == "species"
assert facet.name == "Species"
options = facet.options
if names:
assert len(options) == 1
option = options[0]
assert option.count == len(names)
assert option.value == species
else:
assert not options


@pytest.mark.parametrize(
"food,names",
[("broccoli", []), ("bananas", ["Jack"]), ("fish", ["Garfield", "Flipper"])],
ids=[0, 1, 2],
)
@pytest.mark.asyncio
async def test_multi_valued_with_with_single_filter(
food: str, names: list[str], joint_fixture: JointFixture
):
"""Test that we can filter a multi-valued field using a single value"""
params: QueryParams = {
"class_name": CLASS_NAME,
"filter_by": "eats",
"value": food,
}

results = await joint_fixture.call_search_endpoint(params)

# Check that the expected names are returned
returned_names = [resource.content["name"] for resource in results.hits]
assert returned_names == names

# Check that the facet only contains the filtered values
facets = results.facets
assert len(facets) == 2
facet = facets[1]
assert facet.key == "eats"
assert facet.name == "Food"
options = facet.options
if names:
values = {option.value: option.count for option in options}
if food == "fish":
# should get everything that Garfield or Flipper eat
assert values == {
"fish": 2,
"lasagna": 1,
"meatballs": 1,
"shrimp": 1,
"spaghetti": 1,
"treats": 1,
}
else:
assert values == {food: 1}
else:
assert not options


@pytest.mark.asyncio
async def test_multiple_filters(joint_fixture: JointFixture):
"""Test the combination of multiple filters.
Check that we use AND for different fields, but OR for the same fields.
"""
# Query cats, dogs or monkeys that eat fish or bananas
params: QueryParams = {
"class_name": CLASS_NAME,
"filter_by": ["species", "species", "species", "eats", "eats"],
"value": ["cat", "dog", "monkey", "fish", "bananas"],
}

results = await joint_fixture.call_search_endpoint(params)

# Only Jack and Garfield fulfill these conditions
returned_names = [resource.content["name"] for resource in results.hits]
assert returned_names == ["Jack", "Garfield"]

0 comments on commit 1415082

Please sign in to comment.