From db89e19c48b6e4eefbebd2f0e8b96fe4a2420c41 Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Wed, 15 Nov 2023 09:59:34 +0000 Subject: [PATCH 1/2] Implement searchableAnnotationsValue field --- .../controller/api/v1/SolrFieldMapper.java | 4 +- .../controller/api/v1/V1SearchController.java | 4 +- .../v2/helpers/V2SearchFieldsParser.java | 1 + .../src/main/java/OntologyWriter.java | 4 +- .../uk/ac/ebi/rdf2json/OntologyGraph.java | 2 + .../SearchableAnnotationValuesAnnotator.java | 61 +++++++++++++++++++ .../ols4_entities/conf/managed-schema.xml | 2 +- 7 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/SearchableAnnotationValuesAnnotator.java diff --git a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java index dc2521a7e..74c6dc10b 100644 --- a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java +++ b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java @@ -76,8 +76,8 @@ public static List mapFieldsList(Collection ols3FieldNames) { continue; } - if (legacyFieldName.equals("_json")) { - newFields.add("_json" + suffix); + if (legacyFieldName.equals("annotations_trimmed")) { + newFields.add(prefix + "searchableAnnotationValues" + suffix); continue; } } diff --git a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java index e2cb35e64..5c56f8df5 100644 --- a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java +++ b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java @@ -73,7 +73,7 @@ public void search( if (queryFields == null) { // if exact just search the supplied fields for exact matches if (exact) { - String[] fields = {"label_s", "synonym_s", "short_form_s", "obo_id_s", "iri_s", "_json"}; + String[] fields = {"label_s", "synonym_s", "short_form_s", "obo_id_s", "iri_s", "annotations_trimmed"}; solrQuery.setQuery( "((" + createUnionQuery(query.toLowerCase(), SolrFieldMapper.mapFieldsList(List.of(fields)).toArray(new String[0]), true) @@ -85,7 +85,7 @@ public void search( solrQuery.set("defType", "edismax"); solrQuery.setQuery(query); - String[] fields = {"label^5", "synonym^3", "definition", "short_form^2", "obo_id^2", "iri", "_json"}; + String[] fields = {"label^5", "synonym^3", "definition", "short_form^2", "obo_id^2", "iri", "annotations_trimmed"}; solrQuery.set("qf", String.join(" ", SolrFieldMapper.mapFieldsList(List.of(fields)))); diff --git a/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java b/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java index 6a2cff813..55a150bf3 100644 --- a/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java +++ b/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java @@ -20,6 +20,7 @@ public static void addSearchFieldsToQuery(OlsSolrQuery query, String searchField query.addSearchField("id", 1, SearchType.WHITESPACE_EDGES); query.addSearchField("oboId", 1, SearchType.WHITESPACE_EDGES); query.addSearchField("synonym", 1, SearchType.WHITESPACE_EDGES); + query.addSearchField("searchableAnnotationValues", 1, SearchType.WHITESPACE_EDGES); } else { for (ParsedField field : parseFieldsString(searchFields)) { query.addSearchField(field.property, field.weight, SearchType.CASE_INSENSITIVE_TOKENS); diff --git a/dataload/json2neo/src/main/java/OntologyWriter.java b/dataload/json2neo/src/main/java/OntologyWriter.java index bb6d649aa..30c1a4fd5 100644 --- a/dataload/json2neo/src/main/java/OntologyWriter.java +++ b/dataload/json2neo/src/main/java/OntologyWriter.java @@ -23,7 +23,9 @@ public class OntologyWriter { public static final Set PROPERTY_BLACKLIST = Set.of( // large and doesn't get queried - "appearsIn" + "appearsIn", + // all property values together, this is for solr and not useful in neo4j + "searchableAnnotationValues" ); public static final Set EDGE_BLACKLIST = Set.of( diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java index 052849e5e..436b99f2b 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java @@ -247,6 +247,7 @@ private String urlToFilename(String url) { long endTime = System.nanoTime(); System.out.println("load ontology: " + ((endTime - startTime) / 1000 / 1000 / 1000)); + SearchableAnnotationValuesAnnotator.annotateSearchableAnnotationValues(this); InverseOfAnnotator.annotateInverseOf(this); NegativePropertyAssertionAnnotator.annotateNegativePropertyAssertions(this); OboSynonymTypeNameAnnotator.annotateOboSynonymTypeNames(this); // n.b. this one labels axioms so must run before the ReifiedPropertyAnnotator @@ -268,6 +269,7 @@ private String urlToFilename(String url) { DisjointWithAnnotator.annotateDisjointWith(this); HasIndividualsAnnotator.annotateHasIndividuals(this); EquivalenceAnnotator.annotateEquivalance(this); + SearchableAnnotationValuesAnnotator.annotateSearchableAnnotationValues(this); } diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/SearchableAnnotationValuesAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/SearchableAnnotationValuesAnnotator.java new file mode 100644 index 000000000..6e8d83276 --- /dev/null +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/SearchableAnnotationValuesAnnotator.java @@ -0,0 +1,61 @@ +package uk.ac.ebi.rdf2json.annotators; + +import uk.ac.ebi.rdf2json.OntologyGraph; +import uk.ac.ebi.rdf2json.OntologyNode; +import uk.ac.ebi.rdf2json.properties.PropertyValue; + +import java.util.ArrayList; +import java.util.List; + +import static uk.ac.ebi.rdf2json.properties.PropertyValue.Type.LITERAL; + +public class SearchableAnnotationValuesAnnotator { + + // Roughly equivalent to "annotations_trimmed" in OLS3. + // + // A field that contains a list of just the values (no predicates) of all of the "annotations" (which is not a well + // defined term, so we have to make it up) of an entity. + // + // This field is used for solr searching, so that you can search for the value of any property (regardless of how + // important OLS thinks it is), and still expect a result. + // + public static void annotateSearchableAnnotationValues(OntologyGraph graph) { + + long startTime3 = System.nanoTime(); + for(String id : graph.nodes.keySet()) { + OntologyNode c = graph.nodes.get(id); + if(c.types.contains(OntologyNode.NodeType.CLASS) || + c.types.contains(OntologyNode.NodeType.PROPERTY) || + c.types.contains(OntologyNode.NodeType.INDIVIDUAL) || + c.types.contains(OntologyNode.NodeType.ONTOLOGY)) { + + List values = new ArrayList<>(); + + for(var predicate : c.properties.getPropertyPredicates()) { + + // namespaces that are NOT considered annotations for this exercise... + // + if(predicate.startsWith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + || predicate.startsWith("http://www.w3.org/2000/01/rdf-schema#") + || predicate.startsWith("http://www.w3.org/2002/07/owl#")) { + + continue; + } + + for(var value : c.properties.getPropertyValues(predicate)) { + if(value.getType().equals(LITERAL)) { + values.add(value); + } + } + } + + for(var value : values) { + c.properties.addProperty("searchableAnnotationValues", value); + } + } + } + + long endTime3 = System.nanoTime(); + System.out.println("annotate searchable annotation values: " + ((endTime3 - startTime3) / 1000 / 1000 / 1000)); + } +} diff --git a/dataload/solr_config/ols4_entities/conf/managed-schema.xml b/dataload/solr_config/ols4_entities/conf/managed-schema.xml index f538162e8..bed3f4ab2 100644 --- a/dataload/solr_config/ols4_entities/conf/managed-schema.xml +++ b/dataload/solr_config/ols4_entities/conf/managed-schema.xml @@ -135,7 +135,7 @@ - + From 4c59f2465ef38513d775aa1b499feb00e03dfb79 Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Sun, 19 Nov 2023 17:55:05 +0000 Subject: [PATCH 2/2] fix searchableAnnotationValues being added twice --- .../rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java index 436b99f2b..275e6a511 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java @@ -269,7 +269,6 @@ private String urlToFilename(String url) { DisjointWithAnnotator.annotateDisjointWith(this); HasIndividualsAnnotator.annotateHasIndividuals(this); EquivalenceAnnotator.annotateEquivalance(this); - SearchableAnnotationValuesAnnotator.annotateSearchableAnnotationValues(this); }