Skip to content

Commit

Permalink
simplified curie logic
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesamcl committed Nov 8, 2024
1 parent 3783d98 commit f026c2b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 90 deletions.
73 changes: 19 additions & 54 deletions dataload/linker/src/main/java/LinkerPass2.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
Set<String> stringsInEntity = new HashSet<String>();
String entityIri = null;

EntityDefinitionSet defOfThisEntity = pass1Result.iriToDefinitions.get(entityIri);

while(jsonReader.peek() != JsonToken.END_OBJECT) {

String name = jsonReader.nextName();
Expand All @@ -153,17 +155,26 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
if(name.equals("iri")) {
entityIri = jsonReader.nextString();
jsonWriter.value(entityIri);
} else if (name.equalsIgnoreCase("curie")) {
processCurieObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else if (name.equalsIgnoreCase("shortForm")) {
processShortFormObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else {
CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
continue;
}
}

if(name == "curie") {
if(defOfThisEntity.definingDefinitions.size() > 0) {
// always use the defining ontology's curie, as the defining
// ontology knows the base URI and we might not
//
com.google.gson.internal.Streams.write(
defOfThisEntity.definingDefinitions.iterator().next().curie,
jsonWriter);
continue;
} else {
// fallback to using the curie we already have
}
}

CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
}

EntityDefinitionSet defOfThisEntity = pass1Result.iriToDefinitions.get(entityIri);
if(defOfThisEntity != null) {

jsonWriter.name(IS_DEFINING_ONTOLOGY.getText());
Expand Down Expand Up @@ -476,50 +487,4 @@ private static void processShortFormObject(JsonReader jsonReader, JsonWriter jso
jsonWriter.name("value").value(shortFormObject.get("value").getAsString());
jsonWriter.endObject();
}

private static void processCurieObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject curieObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String curieFieldName = jsonReader.nextName();
if (curieFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
curieObject.add("type", typeArray);
} else if (curieFieldName.equals("value")) {
String curieValue = jsonReader.nextString();
// Modify the value attribute
curieValue = getProcessedCurieValue(pass1Result, entityIri);
curieObject.addProperty("value", curieValue);
}
}
jsonReader.endObject();

// Write the modified curie object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : curieObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(curieObject.get("value").getAsString());
jsonWriter.endObject();
}

private static String getProcessedCurieValue(LinkerPass1.LinkerPass1Result pass1Result, String entityIri) {
var def = pass1Result.iriToDefinitions.get(entityIri);
if (def.definitions.iterator().hasNext()) {
JsonObject defCurieObject = def.definitions.iterator().next().curie.getAsJsonObject();
if (defCurieObject.has("value")) {
return defCurieObject.get("value").getAsString();
}
}
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,36 +36,7 @@ public static void annotateShortForms(OntologyGraph graph) {
preferredPrefix = graph.config.get("id").toString().toUpperCase();
}

String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri);

/*
CURIEs are formed by following rules:
If there is only one underscore "_" AND the characters before the underscore are PreferredPrefix then replace the underscore with colon ":"
If there is only one underscore "_" AND the characters after the underscore are numbers then replace the underscore with colon ":"
If there is only one underscore "_" and the characters after the underscore are not just numbers then just keep the curie same as shortform
If there are multiple underscore but has only digits after the last underscore then the code replaces the last underscore with a colon
*/
String curie;
// Pattern for: single underscore, prefix matches preferredPrefix
String preferredPrefixPattern = "^(?:" + Pattern.quote(preferredPrefix) + ")_([^_]+)$";
// Pattern for: single underscore, suffix is all digits
String singleUnderscoreDigitsPattern = "^[^_]+_(\\d+)$";
// Pattern for: multiple underscores, suffix is all digits
String multipleUnderscoresDigitsPattern = "^(.*)_(\\d+)$";
if (shortForm.matches(preferredPrefixPattern)) {
curie = shortForm.replaceFirst("_", ":");
} else if (shortForm.matches(singleUnderscoreDigitsPattern)) {
curie = shortForm.replaceFirst("_", ":");
} else if (shortForm.matches(multipleUnderscoresDigitsPattern)) {
// Multiple underscores, suffix is digits
// Replace the last underscore with a colon
curie = shortForm.replaceFirst("_(?=\\d+$)", ":");
} else {
// No transformation needed
curie = shortForm;
}

c.properties.addProperty("shortForm", PropertyValueLiteral.fromString(shortForm));
String curie = extractCurie(graph, ontologyBaseUris, preferredPrefix, c.uri);
c.properties.addProperty("curie", PropertyValueLiteral.fromString(curie));
}
}
Expand All @@ -75,20 +46,16 @@ public static void annotateShortForms(OntologyGraph graph) {

}

private static String extractShortForm(OntologyGraph graph, Set<String> ontologyBaseUris, String preferredPrefix,
private static String extractCurie(OntologyGraph graph, Set<String> ontologyBaseUris, String preferredPrefix,
String uri) {

if (uri.startsWith("urn:")) {
return uri.substring(4);
}

// if(uri.startsWith("http://purl.obolibrary.org/obo/")) {
// return uri.substring("http://purl.obolibrary.org/obo/".length());
// }

for (String baseUri : ontologyBaseUris) {
if (uri.startsWith(baseUri) && preferredPrefix != null) {
return preferredPrefix + "_" + uri.substring(baseUri.length());
return preferredPrefix + ":" + uri.substring(baseUri.length());
}
}

Expand Down

0 comments on commit f026c2b

Please sign in to comment.