Skip to content

Commit

Permalink
Some optimization of the exists() method: use a simpler query, and …
Browse files Browse the repository at this point in the history
…allow restricting the query to specific spaces.
  • Loading branch information
apdavison committed Oct 18, 2024
1 parent 17b9de7 commit 1ac7b56
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 10 deletions.
3 changes: 3 additions & 0 deletions fairgraph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def query(
scope: str = "released",
id_key: str = "@id",
use_stored_query: bool = False,
restrict_to_spaces: Optional[List[str]] = None
) -> ResultPage[JsonLdDocument]:
"""
Execute a Knowledge Graph (KG) query with the given filters and query definition.
Expand Down Expand Up @@ -204,6 +205,7 @@ def _query(scope, from_index, size):
stage=STAGE_MAP[scope],
pagination=Pagination(start=from_index, size=size),
instance_id=instance_id,
restrict_to_spaces=restrict_to_spaces
)
error_context = f"_query(scope={scope} query_id={query_id} filter={filter} instance_id={instance_id} size={size} from_index={from_index})"
return self._check_response(response, error_context=error_context)
Expand All @@ -217,6 +219,7 @@ def _query(scope, from_index, size):
stage=STAGE_MAP[scope],
pagination=Pagination(start=from_index, size=size),
instance_id=instance_id,
restrict_to_spaces=restrict_to_spaces
)
error_context = f"_query(scope={scope} query_id={query_id} filter={filter} instance_id={instance_id} size={size} from_index={from_index})"
return self._check_response(response, error_context=error_context)
Expand Down
56 changes: 48 additions & 8 deletions fairgraph/kgobject.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
have_tabulate = False
from .utility import expand_uri, as_list, expand_filter, ActivityLog
from .registry import lookup_type
from .queries import Query
from .queries import Query, QueryProperty
from .errors import AuthorizationError, ResourceExistsError, CannotBuildExistenceQuery
from .caching import object_cache, save_cache, generate_cache_key
from .base import RepresentsSingleObject, ContainsMetadata, SupportsQuerying, IRI, JSONdict
Expand Down Expand Up @@ -470,7 +470,12 @@ def diff(self, other):
differences["properties"][prop.name] = (val_self, val_other)
return differences

def exists(self, client: KGClient, ignore_duplicates: bool = False) -> bool:
def exists(
self,
client: KGClient,
ignore_duplicates: bool = False,
in_spaces: Optional[List[str]] = None
) -> bool:
"""Check if this object already exists in the KnowledgeGraph"""

if self.id:
Expand Down Expand Up @@ -509,12 +514,12 @@ def exists(self, client: KGClient, ignore_duplicates: bool = False) -> bool:
self.remote_data = cached_obj.remote_data # copy or update needed?
return True

query = self.__class__.generate_query(
space=None,
query = self.__class__.generate_minimal_query(
client=client,
filters=query_filter,
)
instances = client.query(query=query, size=2, scope="any").data

instances = client.query(query=query, size=2, scope="any", restrict_to_spaces=in_spaces).data

if instances:
if len(instances) > 1 and not ignore_duplicates:
Expand All @@ -527,10 +532,10 @@ def exists(self, client: KGClient, ignore_duplicates: bool = False) -> bool:
if instance is None:
return False

self.id = instances[0]["@id"]
self.id = instance["@id"]
assert isinstance(self.id, str)
save_cache[self.__class__][query_cache_key] = self.id
self._update_empty_properties(instances[0], client) # also updates `remote_data`
self._update_empty_properties(instance, client) # also updates `remote_data`
return bool(instances)

def modified_data(self) -> JSONdict:
Expand Down Expand Up @@ -619,7 +624,7 @@ def save(
else:
space = self.space
logger.info(f"Saving a {self.__class__.__name__} in space {space}")
if self.exists(client, ignore_duplicates=ignore_duplicates):
if self.exists(client, ignore_duplicates=ignore_duplicates, in_spaces=[space]):
if not self.allow_update:
logger.info(f" - not updating {self.__class__.__name__}(id={self.id}), update not allowed by user")
if activity_log:
Expand Down Expand Up @@ -840,6 +845,41 @@ def generate_query(
# than necessary, but it makes the logic easier to understand.
return query.serialize()

@classmethod
def generate_minimal_query(
cls,
client: KGClient,
filters: Optional[Dict[str, Any]] = None,
label: Optional[str] = None,
) -> Union[Dict[str, Any], None]:
"""
Generate a minimal KG query definition as a JSON-LD document.
Such a query returns only the @id of any instances that are found.
Args:
client: KGClient object that handles the communication with the KG.
filters (dict): A dictonary defining search parameters for the query.
label (str, optional): a label for the query
Returns:
A JSON-LD document containing the KG query definition.
"""
if filters:
normalized_filters = cls.normalize_filter(expand_filter(filters))
else:
normalized_filters = None
# first pass, we build the basic structure
query = Query(
node_type=cls.type_,
label=label,
space=None,
properties=[QueryProperty("@type")],
)
# second pass, we add filters
query.properties.extend(cls.generate_query_filter_properties(normalized_filters))
return query.serialize()

def children(
self, client: KGClient, follow_links: Optional[Dict[str, Any]] = None
) -> List[RepresentsSingleObject]:
Expand Down
2 changes: 1 addition & 1 deletion fairgraph/openminds/core/data/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class File(KGObject):
),
]
aliases = {"hash": "hashes"}
existence_query_properties = ("iri", "hash")
existence_query_properties = ("iri", "hashes")

def __init__(
self,
Expand Down
1 change: 1 addition & 0 deletions fairgraph/openminds/core/data/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class Hash(EmbeddedMetadata):
),
]
reverse_properties = []
existence_query_properties = ("algorithm", "digest")

def __init__(self, algorithm=None, digest=None, id=None, data=None, space=None, scope=None):
return super().__init__(data=data, algorithm=algorithm, digest=digest)
2 changes: 1 addition & 1 deletion test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def instance_from_full_uri(
else:
raise NotImplementedError

def query(self, query, filter=None, space=None, size=100, from_index=0, scope="released"):
def query(self, query, filter=None, space=None, size=100, from_index=0, scope="released", restrict_to_spaces=None):
for prop in query["structure"]:
if prop.get("propertyName", "") in ("Qname", "Qfull_name"):
filter_value = prop["filter"]["value"]
Expand Down

0 comments on commit 1ac7b56

Please sign in to comment.