Skip to content

Commit

Permalink
Merge pull request #587 from EBISPOT/searchable-anno-values2
Browse files Browse the repository at this point in the history
Implement searchable anno values
  • Loading branch information
henrietteharmse authored Nov 21, 2023
2 parents d92d04d + 4c59f24 commit 840114c
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ public static List<String> mapFieldsList(Collection<String> ols3FieldNames) {
continue;
}

if (legacyFieldName.equals("_json")) {
newFields.add("_json" + suffix);
if (legacyFieldName.equals("annotations_trimmed")) {
newFields.add(prefix + "searchableAnnotationValues" + suffix);
continue;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public void search(
if (queryFields == null) {
// if exact just search the supplied fields for exact matches
if (exact) {
String[] fields = {"label_s", "synonym_s", "short_form_s", "obo_id_s", "iri_s", "_json"};
String[] fields = {"label_s", "synonym_s", "short_form_s", "obo_id_s", "iri_s", "annotations_trimmed"};
solrQuery.setQuery(
"((" +
createUnionQuery(query.toLowerCase(), SolrFieldMapper.mapFieldsList(List.of(fields)).toArray(new String[0]), true)
Expand All @@ -85,7 +85,7 @@ public void search(
solrQuery.set("defType", "edismax");
solrQuery.setQuery(query);

String[] fields = {"label^5", "synonym^3", "definition", "short_form^2", "obo_id^2", "iri", "_json"};
String[] fields = {"label^5", "synonym^3", "definition", "short_form^2", "obo_id^2", "iri", "annotations_trimmed"};

solrQuery.set("qf", String.join(" ", SolrFieldMapper.mapFieldsList(List.of(fields))));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public static void addSearchFieldsToQuery(OlsSolrQuery query, String searchField
query.addSearchField("id", 1, SearchType.WHITESPACE_EDGES);
query.addSearchField("oboId", 1, SearchType.WHITESPACE_EDGES);
query.addSearchField("synonym", 1, SearchType.WHITESPACE_EDGES);
query.addSearchField("searchableAnnotationValues", 1, SearchType.WHITESPACE_EDGES);
} else {
for (ParsedField field : parseFieldsString(searchFields)) {
query.addSearchField(field.property, field.weight, SearchType.CASE_INSENSITIVE_TOKENS);
Expand Down
4 changes: 3 additions & 1 deletion dataload/json2neo/src/main/java/OntologyWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ public class OntologyWriter {

public static final Set<String> PROPERTY_BLACKLIST = Set.of(
// large and doesn't get queried
"appearsIn"
"appearsIn",
// all property values together, this is for solr and not useful in neo4j
"searchableAnnotationValues"
);

public static final Set<String> EDGE_BLACKLIST = Set.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ private String urlToFilename(String url) {
long endTime = System.nanoTime();
System.out.println("load ontology: " + ((endTime - startTime) / 1000 / 1000 / 1000));

SearchableAnnotationValuesAnnotator.annotateSearchableAnnotationValues(this);
InverseOfAnnotator.annotateInverseOf(this);
NegativePropertyAssertionAnnotator.annotateNegativePropertyAssertions(this);
OboSynonymTypeNameAnnotator.annotateOboSynonymTypeNames(this); // n.b. this one labels axioms so must run before the ReifiedPropertyAnnotator
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package uk.ac.ebi.rdf2json.annotators;

import uk.ac.ebi.rdf2json.OntologyGraph;
import uk.ac.ebi.rdf2json.OntologyNode;
import uk.ac.ebi.rdf2json.properties.PropertyValue;

import java.util.ArrayList;
import java.util.List;

import static uk.ac.ebi.rdf2json.properties.PropertyValue.Type.LITERAL;

public class SearchableAnnotationValuesAnnotator {

// Roughly equivalent to "annotations_trimmed" in OLS3.
//
// A field that contains a list of just the values (no predicates) of all of the "annotations" (which is not a well
// defined term, so we have to make it up) of an entity.
//
// This field is used for solr searching, so that you can search for the value of any property (regardless of how
// important OLS thinks it is), and still expect a result.
//
public static void annotateSearchableAnnotationValues(OntologyGraph graph) {

long startTime3 = System.nanoTime();
for(String id : graph.nodes.keySet()) {
OntologyNode c = graph.nodes.get(id);
if(c.types.contains(OntologyNode.NodeType.CLASS) ||
c.types.contains(OntologyNode.NodeType.PROPERTY) ||
c.types.contains(OntologyNode.NodeType.INDIVIDUAL) ||
c.types.contains(OntologyNode.NodeType.ONTOLOGY)) {

List<PropertyValue> values = new ArrayList<>();

for(var predicate : c.properties.getPropertyPredicates()) {

// namespaces that are NOT considered annotations for this exercise...
//
if(predicate.startsWith("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|| predicate.startsWith("http://www.w3.org/2000/01/rdf-schema#")
|| predicate.startsWith("http://www.w3.org/2002/07/owl#")) {

continue;
}

for(var value : c.properties.getPropertyValues(predicate)) {
if(value.getType().equals(LITERAL)) {
values.add(value);
}
}
}

for(var value : values) {
c.properties.addProperty("searchableAnnotationValues", value);
}
}
}

long endTime3 = System.nanoTime();
System.out.println("annotate searchable annotation values: " + ((endTime3 - startTime3) / 1000 / 1000 / 1000));
}
}
2 changes: 1 addition & 1 deletion dataload/solr_config/ols4_entities/conf/managed-schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
<!-- OLS4: we are using dynamic fields for everything so no need to specify much of a schema.
however we do need to specify the fields we DON'T want to be indexed
-->
<field name="_json" type="ols4json" indexed="true" stored="true" multiValued="false" />
<field name="_json" type="ols4json" indexed="false" stored="true" multiValued="false" />

<!-- Type used for data-driven schema, to add a string copy for each text field -->
<dynamicField name="str_*" type="strings" stored="false" docValues="true" indexed="false" useDocValuesAsStored="false"/>
Expand Down

0 comments on commit 840114c

Please sign in to comment.