From f026c2b4e06d48e53a64319d4e9b4d477e9effe5 Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Fri, 8 Nov 2024 00:55:33 +0000 Subject: [PATCH] simplified curie logic --- .../linker/src/main/java/LinkerPass2.java | 73 +++++-------------- .../annotators/ShortFormAnnotator.java | 39 +--------- 2 files changed, 22 insertions(+), 90 deletions(-) diff --git a/dataload/linker/src/main/java/LinkerPass2.java b/dataload/linker/src/main/java/LinkerPass2.java index 6c511429f..68dc92576 100644 --- a/dataload/linker/src/main/java/LinkerPass2.java +++ b/dataload/linker/src/main/java/LinkerPass2.java @@ -144,6 +144,8 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite Set stringsInEntity = new HashSet(); String entityIri = null; + EntityDefinitionSet defOfThisEntity = pass1Result.iriToDefinitions.get(entityIri); + while(jsonReader.peek() != JsonToken.END_OBJECT) { String name = jsonReader.nextName(); @@ -153,17 +155,26 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite if(name.equals("iri")) { entityIri = jsonReader.nextString(); jsonWriter.value(entityIri); - } else if (name.equalsIgnoreCase("curie")) { - processCurieObject(jsonReader, jsonWriter, pass1Result, entityIri); - } else if (name.equalsIgnoreCase("shortForm")) { - processShortFormObject(jsonReader, jsonWriter, pass1Result, entityIri); - } else { - CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity); + continue; } - } + if(name == "curie") { + if(defOfThisEntity.definingDefinitions.size() > 0) { + // always use the defining ontology's curie, as the defining + // ontology knows the base URI and we might not + // + com.google.gson.internal.Streams.write( + defOfThisEntity.definingDefinitions.iterator().next().curie, + jsonWriter); + continue; + } else { + // fallback to using the curie we already have + } + } + + CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity); + } - EntityDefinitionSet defOfThisEntity = pass1Result.iriToDefinitions.get(entityIri); if(defOfThisEntity != null) { jsonWriter.name(IS_DEFINING_ONTOLOGY.getText()); @@ -476,50 +487,4 @@ private static void processShortFormObject(JsonReader jsonReader, JsonWriter jso jsonWriter.name("value").value(shortFormObject.get("value").getAsString()); jsonWriter.endObject(); } - - private static void processCurieObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException { - jsonReader.beginObject(); - JsonObject curieObject = new JsonObject(); - - while (jsonReader.peek() != JsonToken.END_OBJECT) { - String curieFieldName = jsonReader.nextName(); - if (curieFieldName.equals("type")) { - JsonArray typeArray = new JsonArray(); - jsonReader.beginArray(); - while (jsonReader.peek() != JsonToken.END_ARRAY) { - typeArray.add(jsonReader.nextString()); - } - jsonReader.endArray(); - curieObject.add("type", typeArray); - } else if (curieFieldName.equals("value")) { - String curieValue = jsonReader.nextString(); - // Modify the value attribute - curieValue = getProcessedCurieValue(pass1Result, entityIri); - curieObject.addProperty("value", curieValue); - } - } - jsonReader.endObject(); - - // Write the modified curie object - jsonWriter.beginObject(); - jsonWriter.name("type"); - jsonWriter.beginArray(); - for (JsonElement typeElement : curieObject.getAsJsonArray("type")) { - jsonWriter.value(typeElement.getAsString()); - } - jsonWriter.endArray(); - jsonWriter.name("value").value(curieObject.get("value").getAsString()); - jsonWriter.endObject(); - } - - private static String getProcessedCurieValue(LinkerPass1.LinkerPass1Result pass1Result, String entityIri) { - var def = pass1Result.iriToDefinitions.get(entityIri); - if (def.definitions.iterator().hasNext()) { - JsonObject defCurieObject = def.definitions.iterator().next().curie.getAsJsonObject(); - if (defCurieObject.has("value")) { - return defCurieObject.get("value").getAsString(); - } - } - return ""; - } } diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java index fa4f2002f..60c2400fd 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java @@ -36,36 +36,7 @@ public static void annotateShortForms(OntologyGraph graph) { preferredPrefix = graph.config.get("id").toString().toUpperCase(); } - String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri); - - /* - CURIEs are formed by following rules: - If there is only one underscore "_" AND the characters before the underscore are PreferredPrefix then replace the underscore with colon ":" - If there is only one underscore "_" AND the characters after the underscore are numbers then replace the underscore with colon ":" - If there is only one underscore "_" and the characters after the underscore are not just numbers then just keep the curie same as shortform - If there are multiple underscore but has only digits after the last underscore then the code replaces the last underscore with a colon - */ - String curie; - // Pattern for: single underscore, prefix matches preferredPrefix - String preferredPrefixPattern = "^(?:" + Pattern.quote(preferredPrefix) + ")_([^_]+)$"; - // Pattern for: single underscore, suffix is all digits - String singleUnderscoreDigitsPattern = "^[^_]+_(\\d+)$"; - // Pattern for: multiple underscores, suffix is all digits - String multipleUnderscoresDigitsPattern = "^(.*)_(\\d+)$"; - if (shortForm.matches(preferredPrefixPattern)) { - curie = shortForm.replaceFirst("_", ":"); - } else if (shortForm.matches(singleUnderscoreDigitsPattern)) { - curie = shortForm.replaceFirst("_", ":"); - } else if (shortForm.matches(multipleUnderscoresDigitsPattern)) { - // Multiple underscores, suffix is digits - // Replace the last underscore with a colon - curie = shortForm.replaceFirst("_(?=\\d+$)", ":"); - } else { - // No transformation needed - curie = shortForm; - } - - c.properties.addProperty("shortForm", PropertyValueLiteral.fromString(shortForm)); + String curie = extractCurie(graph, ontologyBaseUris, preferredPrefix, c.uri); c.properties.addProperty("curie", PropertyValueLiteral.fromString(curie)); } } @@ -75,20 +46,16 @@ public static void annotateShortForms(OntologyGraph graph) { } - private static String extractShortForm(OntologyGraph graph, Set ontologyBaseUris, String preferredPrefix, + private static String extractCurie(OntologyGraph graph, Set ontologyBaseUris, String preferredPrefix, String uri) { if (uri.startsWith("urn:")) { return uri.substring(4); } - // if(uri.startsWith("http://purl.obolibrary.org/obo/")) { - // return uri.substring("http://purl.obolibrary.org/obo/".length()); - // } - for (String baseUri : ontologyBaseUris) { if (uri.startsWith(baseUri) && preferredPrefix != null) { - return preferredPrefix + "_" + uri.substring(baseUri.length()); + return preferredPrefix + ":" + uri.substring(baseUri.length()); } }