From 1150b1ab45ccba26fb9e96622fbf13b0d419a1ad Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Mon, 23 Sep 2024 17:44:33 +0200 Subject: [PATCH 01/21] Add QueryData class --- .../org/aksw/iguana/cc/query/QueryData.java | 50 +++++++++++++++++++ .../cc/query/list/FileBasedQueryList.java | 11 ++++ .../aksw/iguana/cc/query/list/QueryList.java | 3 ++ .../query/list/impl/FileCachingQueryList.java | 2 + .../query/list/impl/FileReadingQueryList.java | 6 +++ .../query/list/impl/StringListQueryList.java | 8 +++ 6 files changed, 80 insertions(+) create mode 100644 src/main/java/org/aksw/iguana/cc/query/QueryData.java diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java new file mode 100644 index 00000000..22c870ce --- /dev/null +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -0,0 +1,50 @@ +package org.aksw.iguana.cc.query; + +import org.aksw.iguana.cc.query.source.QuerySource; +import org.apache.jena.update.UpdateFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * This class stores extra information about a query. + * At the moment, it only stores if the query is an update query or not. + * + * @param queryId The id of the query + * @param update If the query is an update query + */ +public record QueryData(int queryId, boolean update) { + public static List generate(Collection queries) { + final var queryData = new ArrayList(); + int i = 0; + for (InputStream query : queries) { + boolean update = true; + try { + UpdateFactory.read(query); // Throws an exception if the query is not an update query + } catch (Exception e) { + update = false; + } + queryData.add(new QueryData(i++, update)); + } + return queryData; + } + + public static List generate(QuerySource queries) throws IOException { + final var streams = new ArrayList(); + int bound = queries.size(); + for (int i = 0; i < bound; i++) { + InputStream queryStream = queries.getQueryStream(i); + streams.add(queryStream); + } + return generate(streams); + } + + public static List generate(List queries) { + final var streams = queries.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList(); + return generate(streams); + } +} diff --git a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java index 0256fee5..f4412e85 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java @@ -1,9 +1,11 @@ package org.aksw.iguana.cc.query.list; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.source.QuerySource; import java.io.IOException; import java.io.InputStream; +import java.util.List; /** * The abstract class for a QueryList. A query list provides the queries to the QueryHandler. @@ -16,6 +18,7 @@ public abstract class FileBasedQueryList implements QueryList{ * This is the QuerySource from which the queries should be retrieved. */ final protected QuerySource querySource; + protected List queryData; public FileBasedQueryList(QuerySource querySource) { if (querySource == null) { @@ -24,6 +27,10 @@ public FileBasedQueryList(QuerySource querySource) { this.querySource = querySource; } + protected void setQueryData(List queryData) { + this.queryData = queryData; + } + /** * This method returns the amount of queries in the query list. * @@ -52,4 +59,8 @@ public int hashCode() { public abstract String getQuery(int index) throws IOException; public abstract InputStream getQueryStream(int index) throws IOException; + + public QueryData getQueryData(int index) { + return queryData.get(index); + } } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java index 623a8c67..877a03f9 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.source.QuerySource; import java.io.IOException; @@ -35,4 +36,6 @@ public interface QueryList { String getQuery(int index) throws IOException; InputStream getQueryStream(int index) throws IOException; + + QueryData getQueryData(int index); } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java index 8f6c3a38..78dbb349 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.FileBasedQueryList; import org.aksw.iguana.cc.query.source.QuerySource; import org.aksw.iguana.commons.io.ByteArrayListInputStream; @@ -40,6 +41,7 @@ public FileCachingQueryList(QuerySource querySource) throws IOException { queries.add(balos); } } + setQueryData(QueryData.generate(queries.stream().map(ByteArrayListOutputStream::toInputStream).toList())); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java index e3074be0..045a326d 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.FileBasedQueryList; import org.aksw.iguana.cc.query.source.QuerySource; @@ -15,6 +16,11 @@ public class FileReadingQueryList extends FileBasedQueryList { public FileReadingQueryList(QuerySource querySource) { super(querySource); + try { + setQueryData(QueryData.generate(querySource)); + } catch (IOException e) { + throw new RuntimeException("Could not read queries from the source.", e); + } } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java index 1d448940..8c3dcfc6 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.QueryList; import java.io.ByteArrayInputStream; @@ -10,9 +11,11 @@ public class StringListQueryList implements QueryList { private final List queries; + private final List queryData; public StringListQueryList(List queries) { this.queries = queries; + this.queryData = QueryData.generate(queries); } @Override @@ -34,4 +37,9 @@ public int size() { public int hashCode() { return queries.hashCode(); } + + @Override + public QueryData getQueryData(int index) { + return queryData.get(index); + } } From 5bdf321dc7064132bba6addcd65f7e42dfd02c5b Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Mon, 23 Sep 2024 17:53:47 +0200 Subject: [PATCH 02/21] Add test --- .../aksw/iguana/cc/query/QueryDataTest.java | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java diff --git a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java new file mode 100644 index 00000000..19d6a748 --- /dev/null +++ b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java @@ -0,0 +1,68 @@ +package org.aksw.iguana.cc.query; + +import org.aksw.iguana.cc.query.source.QuerySource; +import org.aksw.iguana.cc.query.source.impl.FileSeparatorQuerySource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +class QueryDataTest { + + private static Path tempFile = null; + + @BeforeAll + public static void setup() throws IOException { + tempFile = Files.createTempFile("test", "txt"); + Files.writeString(tempFile, """ + SELECT ?s ?p ?o WHERE { + ?s ?p ?o + } + + INSERT DATA { + + } + + DELETE DATA { + + } + + SELECT ?s ?p ?o WHERE { + ?s ?p ?o + } + """); + } + + @AfterAll + public static void teardown() throws IOException { + Files.deleteIfExists(tempFile); + } + + @Test + void testGeneration() throws IOException { + final QuerySource querySource = new FileSeparatorQuerySource(tempFile, ""); + final var testStrings = querySource.getAllQueries(); + + List> generations = List.of( + QueryData.generate(testStrings), + QueryData.generate(testStrings.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList()), + QueryData.generate(querySource) + ); + for (List generation : generations) { + assertEquals(4, generation.size()); + assertFalse(generation.get(0).update()); + assertTrue(generation.get(1).update()); + assertTrue(generation.get(2).update()); + assertFalse(generation.get(3).update()); + } + } +} \ No newline at end of file From bdef045e8b4de1c9f8cc22be96cbcf27207ea5b7 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:14:32 +0200 Subject: [PATCH 03/21] Check for update queries --- .../iguana/cc/query/handler/QueryHandler.java | 9 +++++---- .../iguana/cc/utils/http/RequestFactory.java | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 6930d3f1..03a1f656 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,8 +145,9 @@ public Template(URI endpoint, Long limit, Boolean save) { } } - public record QueryStringWrapper(int index, String query) {} - public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier) {} + public record QueryStringWrapper(int index, String query, boolean update) {} + + public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update) {} protected static final Logger LOGGER = LoggerFactory.getLogger(QueryHandler.class); @@ -247,7 +248,7 @@ public QuerySelector getQuerySelectorInstance() { public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = querySelector.getNextIndex(); - return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex)); + return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryList.getQueryData(queryIndex).update()); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { @@ -258,7 +259,7 @@ public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { } catch (IOException e) { throw new RuntimeException(e); } - }); + }, queryList.getQueryData(queryIndex).update()); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index e29fc533..3392c31b 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -17,6 +17,7 @@ import java.io.IOException; import java.io.InputStream; +import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -113,9 +114,18 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que throw new IOException(e); } + // check if the query is an update query, if yes, change the request type to similar update request type + RequestType actualRequestType = requestType; + if (requestType == RequestType.GET_QUERY || requestType == RequestType.POST_QUERY) + actualRequestType = queryHandle.update() ? RequestType.POST_UPDATE : requestType; + if (requestType == RequestType.POST_URL_ENC_QUERY) + actualRequestType = queryHandle.update() ? RequestType.POST_URL_ENC_UPDATE : requestType; + // if only one endpoint is set, use it for both queries and updates + URI updateEndpoint = connectionConfig.updateEndpoint() != null ? connectionConfig.updateEndpoint() : connectionConfig.endpoint(); + // If the query is bigger than 2^31 bytes (2GB) and the request type is set to GET_QUERY, POST_URL_ENC_QUERY or // POST_URL_ENC_UPDATE, the following code will throw an exception. - switch (requestType) { + switch (actualRequestType) { case GET_QUERY -> asyncRequestBuilder = AsyncRequestBuilder.get(new URIBuilder(connectionConfig.endpoint()) .addParameter("query", new String(queryStream.readAllBytes(), StandardCharsets.UTF_8)) .build() @@ -127,10 +137,10 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que .setEntity(new BasicAsyncEntityProducer(urlEncode("query", new String(queryStream.readAllBytes(), StandardCharsets.UTF_8)), null, false)); case POST_QUERY -> asyncRequestBuilder = AsyncRequestBuilder.post(connectionConfig.endpoint()) .setEntity(new StreamEntityProducer(queryStreamSupplier, !caching, "application/sparql-query")); - case POST_URL_ENC_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(connectionConfig.endpoint()) + case POST_URL_ENC_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(updateEndpoint) .setHeader(HttpHeaders.CONTENT_TYPE, "application/x-www-form-urlencoded") .setEntity(new BasicAsyncEntityProducer(urlEncode("update", new String(queryStream.readAllBytes(), StandardCharsets.UTF_8)), null, false)); - case POST_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(connectionConfig.endpoint()) + case POST_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(updateEndpoint) .setEntity(new StreamEntityProducer(queryStreamSupplier, !caching, "application/sparql-update")); default -> throw new IllegalStateException("Unexpected value: " + requestType); } From 984cd18591e58e39c1e67a80dc5a9bb1e198f1c7 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:50:19 +0200 Subject: [PATCH 04/21] Move responsibility of QueryData to QueryHandler --- .../aksw/iguana/cc/query/handler/QueryHandler.java | 14 ++++++++++++-- .../iguana/cc/query/list/FileBasedQueryList.java | 9 --------- .../org/aksw/iguana/cc/query/list/QueryList.java | 2 -- .../cc/query/list/impl/FileCachingQueryList.java | 1 - .../cc/query/list/impl/FileReadingQueryList.java | 5 ----- .../cc/query/list/impl/StringListQueryList.java | 7 ------- 6 files changed, 12 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 03a1f656..be30268f 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -5,6 +5,7 @@ import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.impl.StringListQueryList; import org.aksw.iguana.cc.query.selector.QuerySelector; import org.aksw.iguana.cc.query.selector.impl.LinearQuerySelector; @@ -156,6 +157,7 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; private int workerCount = 0; // give every worker inside the same worker config an offset seed @@ -169,6 +171,7 @@ protected QueryHandler() { config = null; queryList = null; hashCode = 0; + queryData = null; } @JsonCreator @@ -185,6 +188,13 @@ public QueryHandler(Config config) throws IOException { new FileReadingQueryList(querySource); } this.hashCode = queryList.hashCode(); + this.queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { + try { + return queryList.getQueryStream(i); + } catch (IOException e) { + throw new RuntimeException("Couldn't read query stream", e); + } + }).collect(Collectors.toList())); } private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { @@ -248,7 +258,7 @@ public QuerySelector getQuerySelectorInstance() { public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = querySelector.getNextIndex(); - return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryList.getQueryData(queryIndex).update()); + return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryData.get(queryIndex).update()); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { @@ -259,7 +269,7 @@ public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { } catch (IOException e) { throw new RuntimeException(e); } - }, queryList.getQueryData(queryIndex).update()); + }, queryData.get(queryIndex).update()); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java index f4412e85..013093fe 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java @@ -18,7 +18,6 @@ public abstract class FileBasedQueryList implements QueryList{ * This is the QuerySource from which the queries should be retrieved. */ final protected QuerySource querySource; - protected List queryData; public FileBasedQueryList(QuerySource querySource) { if (querySource == null) { @@ -27,10 +26,6 @@ public FileBasedQueryList(QuerySource querySource) { this.querySource = querySource; } - protected void setQueryData(List queryData) { - this.queryData = queryData; - } - /** * This method returns the amount of queries in the query list. * @@ -59,8 +54,4 @@ public int hashCode() { public abstract String getQuery(int index) throws IOException; public abstract InputStream getQueryStream(int index) throws IOException; - - public QueryData getQueryData(int index) { - return queryData.get(index); - } } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java index 877a03f9..7df4dd33 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java @@ -36,6 +36,4 @@ public interface QueryList { String getQuery(int index) throws IOException; InputStream getQueryStream(int index) throws IOException; - - QueryData getQueryData(int index); } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java index 78dbb349..a804702b 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java @@ -41,7 +41,6 @@ public FileCachingQueryList(QuerySource querySource) throws IOException { queries.add(balos); } } - setQueryData(QueryData.generate(queries.stream().map(ByteArrayListOutputStream::toInputStream).toList())); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java index 045a326d..0999deba 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java @@ -16,11 +16,6 @@ public class FileReadingQueryList extends FileBasedQueryList { public FileReadingQueryList(QuerySource querySource) { super(querySource); - try { - setQueryData(QueryData.generate(querySource)); - } catch (IOException e) { - throw new RuntimeException("Could not read queries from the source.", e); - } } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java index 8c3dcfc6..71f93d74 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java @@ -11,11 +11,9 @@ public class StringListQueryList implements QueryList { private final List queries; - private final List queryData; public StringListQueryList(List queries) { this.queries = queries; - this.queryData = QueryData.generate(queries); } @Override @@ -37,9 +35,4 @@ public int size() { public int hashCode() { return queries.hashCode(); } - - @Override - public QueryData getQueryData(int index) { - return queryData.get(index); - } } From d63e4fb8016ab4edf4805d0c1b27da07b3ef9268 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:51:25 +0200 Subject: [PATCH 05/21] Remove unused methods --- .../org/aksw/iguana/cc/query/QueryData.java | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 22c870ce..02858cb8 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -1,10 +1,7 @@ package org.aksw.iguana.cc.query; -import org.aksw.iguana.cc.query.source.QuerySource; import org.apache.jena.update.UpdateFactory; -import java.io.ByteArrayInputStream; -import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; @@ -32,19 +29,4 @@ public static List generate(Collection queries) { } return queryData; } - - public static List generate(QuerySource queries) throws IOException { - final var streams = new ArrayList(); - int bound = queries.size(); - for (int i = 0; i < bound; i++) { - InputStream queryStream = queries.getQueryStream(i); - streams.add(queryStream); - } - return generate(streams); - } - - public static List generate(List queries) { - final var streams = queries.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList(); - return generate(streams); - } } From a382d3e51b191c7985cbeaa3827a816b7db38e5d Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:05:40 +0200 Subject: [PATCH 06/21] Add tests --- .../aksw/iguana/cc/query/QueryDataTest.java | 4 +- .../worker/impl/SPARQLProtocolWorkerTest.java | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java index 19d6a748..259dd9aa 100644 --- a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java @@ -53,9 +53,7 @@ void testGeneration() throws IOException { final var testStrings = querySource.getAllQueries(); List> generations = List.of( - QueryData.generate(testStrings), - QueryData.generate(testStrings.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList()), - QueryData.generate(querySource) + QueryData.generate(testStrings.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList()) ); for (List generation : generations) { assertEquals(4, generation.size()); diff --git a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java index b7d4daf7..d6c5911f 100644 --- a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java +++ b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java @@ -57,15 +57,19 @@ public class SPARQLProtocolWorkerTest { .build(); private final static String QUERY = "SELECT * WHERE { ?s ?p ?o }"; + private final static String UPDATE_QUERY = "INSERT DATA { }"; private final static int QUERY_MIXES = 1; private static Path queryFile; + private static Path updateFile; private static final Logger LOGGER = LoggerFactory.getLogger(SPARQLProtocolWorker.class); @BeforeAll public static void setup() throws IOException { queryFile = Files.createTempFile("iguana-test-queries", ".tmp"); + updateFile = Files.createTempFile("iguana-test-updates", ".tmp"); Files.writeString(queryFile, QUERY, StandardCharsets.UTF_8); + Files.writeString(updateFile, QUERY + "\n\n" + UPDATE_QUERY, StandardCharsets.UTF_8); } @BeforeEach @@ -77,6 +81,7 @@ public void reset() { @AfterAll public static void cleanup() throws IOException { Files.deleteIfExists(queryFile); + Files.deleteIfExists(updateFile); SPARQLProtocolWorker.closeHttpClient(); } @@ -120,6 +125,31 @@ public static Stream requestFactoryData() throws URISyntaxException { return workers.stream(); } + public static Stream updateWorkerData() throws IOException { + final var normalEndpoint = URI.create("http://localhost:" + wm.getPort() + "/ds/query"); + final var updateEndpoint = URI.create("http://localhost:" + wm.getPort() + "/ds/update"); + final var processor = new ResponseBodyProcessor("application/sparql-results+json"); + final var format = QueryHandler.Config.Format.SEPARATOR; + final var queryHandler = new QueryHandler(new QueryHandler.Config(updateFile.toAbsolutePath().toString(), format, null, true, QueryHandler.Config.Order.LINEAR, 0L, QueryHandler.Config.Language.SPARQL)); + final var datasetConfig = new DatasetConfig("TestDS", null); + final var connection = new ConnectionConfig("TestConn", "1", datasetConfig, normalEndpoint, new ConnectionConfig.Authentication("testUser", "password"), updateEndpoint, new ConnectionConfig.Authentication("updateUser", "password")); + final var workers = new ArrayDeque(); + for (var requestType : List.of(RequestFactory.RequestType.GET_QUERY, RequestFactory.RequestType.POST_URL_ENC_QUERY, RequestFactory.RequestType.POST_QUERY)) { + final var config = new SPARQLProtocolWorker.Config( + 1, + queryHandler, + new HttpWorker.QueryMixes(QUERY_MIXES), + connection, + Duration.parse("PT6S"), + "application/sparql-results+json", + requestType, + true + ); + workers.add(Arguments.of(Named.of(requestType.name(), new SPARQLProtocolWorker(0, processor, config)))); + } + return workers.stream(); + } + public static List completionTargets() { final var out = new ArrayList(); final var queryMixesAmount = List.of(1, 2, 5, 10, 100, 200); @@ -204,6 +234,59 @@ public void testRequestFactory(SPARQLProtocolWorker worker, boolean cached) { assertNotEquals(Duration.ZERO, result.executionStats().get(0).duration(), "Worker returned zero duration"); } + @ParameterizedTest + @MethodSource("updateWorkerData") + public void testSeparateUpdateEndpoint(SPARQLProtocolWorker worker) { + final var workerConfig = worker.config(); + switch (workerConfig.requestType()) { + case GET_QUERY -> { + wm.stubFor(get(urlPathEqualTo("/ds/query")) + .withQueryParam("query", equalTo(QUERY)) + .withBasicAuth("testUser", "password") + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + wm.stubFor(post(urlPathEqualTo("/ds/update")) + .withHeader("Content-Type", equalTo("application/sparql-update")) + .withBasicAuth("updateUser", "password") + .withRequestBody(equalTo(UPDATE_QUERY)) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + } + case POST_URL_ENC_QUERY -> { + wm.stubFor(post(urlPathEqualTo("/ds/query")) + .withHeader("Content-Type", equalTo("application/x-www-form-urlencoded")) + .withBasicAuth("testUser", "password") + .withRequestBody(equalTo("query=" + URLEncoder.encode(QUERY, StandardCharsets.UTF_8))) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + wm.stubFor(post(urlPathEqualTo("/ds/update")) + .withHeader("Content-Type", equalTo("application/x-www-form-urlencoded")) + .withBasicAuth("updateUser", "password") + .withRequestBody(equalTo("update=" + URLEncoder.encode(UPDATE_QUERY, StandardCharsets.UTF_8))) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + } + case POST_QUERY -> { + wm.stubFor(post(urlPathEqualTo("/ds/query")) + .withHeader("Content-Type", equalTo("application/sparql-query")) + .withBasicAuth("testUser", "password") + .withRequestBody(equalTo(QUERY)) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + wm.stubFor(post(urlPathEqualTo("/ds/update")) + .withHeader("Content-Type", equalTo("application/sparql-update")) + .withBasicAuth("updateUser", "password") + .withRequestBody(equalTo(UPDATE_QUERY)) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + } + } + final HttpWorker.Result result = worker.start().join(); + assertEquals(result.executionStats().size(), QUERY_MIXES * 2, "Worker should have executed only 1 query"); + for (var res : result.executionStats()) { + assertNull(res.error().orElse(null), "Worker threw an exception, during execution"); + assertEquals(200, res.httpStatusCode().get(), "Worker returned wrong status code"); + assertNotEquals(0, res.responseBodyHash().getAsLong(), "Worker didn't return a response body hash"); + assertEquals("Non-Empty-Body".getBytes(StandardCharsets.UTF_8).length, res.contentLength().getAsLong(), "Worker returned wrong content length"); + assertNotEquals(Duration.ZERO, res.duration(), "Worker returned zero duration"); + } + + } + @DisplayName("Test Malformed Response Processing") @ParameterizedTest(name = "[{index}] fault = {0}") @EnumSource(Fault.class) From 21cc30410a208322329fc913151534cb4f22de94 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:05:46 +0200 Subject: [PATCH 07/21] Fix authentication --- .../org/aksw/iguana/cc/utils/http/RequestFactory.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index 3392c31b..e0853166 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -148,10 +148,15 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que // set additional headers if (acceptHeader != null) asyncRequestBuilder.addHeader("Accept", acceptHeader); - if (connectionConfig.authentication() != null && connectionConfig.authentication().user() != null) + if (queryHandle.update() && connectionConfig.updateAuthentication() != null && connectionConfig.updateAuthentication().user() != null) { asyncRequestBuilder.addHeader("Authorization", - HttpWorker.basicAuth(connectionConfig.authentication().user(), - Optional.ofNullable(connectionConfig.authentication().password()).orElse(""))); + HttpWorker.basicAuth(connectionConfig.updateAuthentication().user(), + Optional.ofNullable(connectionConfig.updateAuthentication().password()).orElse(""))); + } else if (connectionConfig.authentication() != null && connectionConfig.authentication().user() != null) { + asyncRequestBuilder.addHeader("Authorization", + HttpWorker.basicAuth(connectionConfig.authentication().user(), + Optional.ofNullable(connectionConfig.authentication().password()).orElse(""))); + } // cache request if (caching) From 870d9d96a73aace5a1eb4470ff6c633c96643f9a Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:06:20 +0200 Subject: [PATCH 08/21] Cleanup --- src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java | 1 - .../aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java index 259dd9aa..3db404fc 100644 --- a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java @@ -11,7 +11,6 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.List; import static org.junit.jupiter.api.Assertions.*; diff --git a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java index d6c5911f..6d9842fa 100644 --- a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java +++ b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java @@ -290,7 +290,7 @@ public void testSeparateUpdateEndpoint(SPARQLProtocolWorker worker) { @DisplayName("Test Malformed Response Processing") @ParameterizedTest(name = "[{index}] fault = {0}") @EnumSource(Fault.class) - public void testMalformedResponseProcessing(Fault fault) throws IOException, URISyntaxException { + public void testMalformedResponseProcessing(Fault fault) throws URISyntaxException { SPARQLProtocolWorker worker = (SPARQLProtocolWorker) ((Named)requestFactoryData().toList().get(0).get()[0]).getPayload(); wm.stubFor(get(urlPathEqualTo("/ds/query")) .willReturn(aResponse().withFault(fault))); @@ -300,7 +300,7 @@ public void testMalformedResponseProcessing(Fault fault) throws IOException, URI } @Test - public void testBadHttpCodeResponse() throws IOException, URISyntaxException { + public void testBadHttpCodeResponse() throws URISyntaxException { SPARQLProtocolWorker worker = (SPARQLProtocolWorker) ((Named)requestFactoryData().toList().get(0).get()[0]).getPayload(); wm.stubFor(get(urlPathEqualTo("/ds/query")) .willReturn(aResponse().withStatus(404))); From 3aa46ed31cbc769d6b1ee090383d7692211b44ed Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:16:05 +0200 Subject: [PATCH 09/21] Fix StringListQueryList --- .../aksw/iguana/cc/query/list/impl/StringListQueryList.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java index 71f93d74..809ff6ff 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java @@ -1,9 +1,8 @@ package org.aksw.iguana.cc.query.list.impl; -import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.QueryList; +import org.aksw.iguana.commons.io.ByteArrayListInputStream; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.List; @@ -23,7 +22,7 @@ public String getQuery(int index) throws IOException { @Override public InputStream getQueryStream(int index) throws IOException { - return new ByteArrayInputStream(queries.get(index).getBytes()); + return new ByteArrayListInputStream(List.of(queries.get(index).getBytes())); } @Override From df06fda9af53764b5a112300c7c6dbbe219c648b Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:20:01 +0200 Subject: [PATCH 10/21] Modify QueryHandler and QueryData --- .../org/aksw/iguana/cc/query/QueryData.java | 29 ++++- .../iguana/cc/query/handler/QueryHandler.java | 122 ++++++++++++++---- .../cc/worker/impl/SPARQLProtocolWorker.java | 15 ++- .../query/handler/QueryHandlerConfigTest.java | 2 +- .../cc/query/handler/TemplateQueriesTest.java | 4 +- 5 files changed, 132 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 02858cb8..3f71c0b9 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -2,6 +2,7 @@ import org.apache.jena.update.UpdateFactory; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; @@ -12,9 +13,15 @@ * At the moment, it only stores if the query is an update query or not. * * @param queryId The id of the query - * @param update If the query is an update query */ -public record QueryData(int queryId, boolean update) { +public record QueryData(int queryId, QueryType type, Integer templateId) { + public enum QueryType { + DEFAULT, + UPDATE, + TEMPLATE, + TEMPLATE_INSTANCE + } + public static List generate(Collection queries) { final var queryData = new ArrayList(); int i = 0; @@ -25,8 +32,24 @@ public static List generate(Collection queries) { } catch (Exception e) { update = false; } - queryData.add(new QueryData(i++, update)); + queryData.add(new QueryData(i++, update ? QueryType.UPDATE : QueryType.DEFAULT, null)); + try { + query.close(); + } catch (IOException ignored) {} } return queryData; } + + public static boolean checkUpdate(InputStream query) { + try { + UpdateFactory.read(query); // Throws an exception if the query is not an update query + return true; + } catch (Exception e) { + return false; + } + } + + public boolean update() { + return type == QueryType.UPDATE; + } } diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index be30268f..a685dd33 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -21,17 +21,20 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; /** * The QueryHandler is used by every worker that extends the AbstractWorker. @@ -137,18 +140,18 @@ public String value() { } } - public record Template(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save) { - public Template(URI endpoint, Long limit, Boolean save) { + public record Template(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save, Boolean individualResults) { + public Template(URI endpoint, Long limit, Boolean save, Boolean individualResults) { this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; + this.individualResults = individualResults != null; } } } - public record QueryStringWrapper(int index, String query, boolean update) {} - - public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update) {} + public record QueryStringWrapper(int index, String query, boolean update, Integer resultId) {} + public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update, Integer resultId) {} protected static final Logger LOGGER = LoggerFactory.getLogger(QueryHandler.class); @@ -157,7 +160,7 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; + protected List queryData; private int workerCount = 0; // give every worker inside the same worker config an offset seed @@ -186,17 +189,20 @@ public QueryHandler(Config config) throws IOException { queryList = (config.caching()) ? new FileCachingQueryList(querySource) : new FileReadingQueryList(querySource); + queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { + try { + return queryList.getQueryStream(i); + } catch (IOException e) { + throw new RuntimeException("Couldn't read query stream", e); + } + }).collect(Collectors.toList())); } this.hashCode = queryList.hashCode(); - this.queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { - try { - return queryList.getQueryStream(i); - } catch (IOException e) { - throw new RuntimeException("Couldn't read query stream", e); - } - }).collect(Collectors.toList())); } + private record TemplateData(List queries, int templates, int[] indices, int[] instanceNumber, int instanceStart) {} + + // TODO: storing and reading of instance file private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { QuerySource querySource = templateSource; final var originalPath = templateSource.getPath(); @@ -205,16 +211,22 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr final Path instancePath = Files.isDirectory(originalPath) ? originalPath.resolveSibling(originalPath.getFileName() + postfix) : // if the source of the query templates is a folder, the instances will be saved in a file with the same name as the folder originalPath.resolveSibling(originalPath.getFileName().toString().split("\\.")[0] + postfix); // if the source of the query templates is a file, the instances will be saved in a file with the same name as the file + TemplateData templateData; + if (Files.exists(instancePath)) { LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); + + // TODO: change this querySource = createQuerySource(instancePath); // if the instances already exist, use them } else { - final List instances = instantiateTemplateQueries(querySource, config.template); + templateData = instantiateTemplateQueries(querySource, config.template); + + // TODO: change this if (config.template.save) { // save the instances to a file Files.createFile(instancePath); try (var writer = Files.newBufferedWriter(instancePath)) { - for (String instance : instances) { + for (String instance : templateData.queries()) { writer.write(instance); writer.newLine(); } @@ -224,8 +236,40 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } else { // query source isn't necessary, because queries aren't stored in a file, // directly return a list of the instances instead - return new StringListQueryList(instances); + // return new StringListQueryList(templateData.queries()); } + + AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template + AtomicInteger index = new AtomicInteger(0); // index of the current query + AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template + queryData = templateData.queries.stream().map( + query -> { + // once the template instances start, the template index is reset and reused for the instances + // to track to which template the instances belong + if (index.get() == templateData.instanceStart) templateIndex.set(0); + + if (index.get() >= templateData.instanceStart) { + // query is an instance of a template + + // if the instance id is equal to the number of instances for the current template, + // the next template is used + if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { + templateIndex.getAndIncrement(); + instanceId.set(0); + } + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); + } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { + // query is a template + templateIndex.getAndIncrement(); + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } else { + // query is neither a template nor an instance + final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); + return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + } + } + ).toList(); + return new StringListQueryList(templateData.queries); } return (config.caching()) ? new FileCachingQueryList(querySource) : // if caching is enabled, cache the instances @@ -257,19 +301,33 @@ public QuerySelector getQuerySelectorInstance() { } public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { - final var queryIndex = querySelector.getNextIndex(); - return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryData.get(queryIndex).update()); + final var queryIndex = getNextQueryIndex(querySelector); + return new QueryStringWrapper(queryIndex[0], queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { - final var queryIndex = querySelector.getNextIndex(); - return new QueryStreamWrapper(queryIndex, config.caching(), () -> { + final var queryIndex = getNextQueryIndex(querySelector); + return new QueryStreamWrapper(queryIndex[0], config.caching(), () -> { try { - return this.queryList.getQueryStream(queryIndex); + return this.queryList.getQueryStream(queryIndex[0]); } catch (IOException e) { throw new RuntimeException(e); } - }, queryData.get(queryIndex).update()); + }, queryData.get(queryIndex[0]).update(), queryIndex[1]); + } + + private Integer[] getNextQueryIndex(QuerySelector querySelector) { + int queryIndex; + do { + queryIndex = querySelector.getNextIndex(); + } while (queryData.get(queryIndex).type() == QueryData.QueryType.TEMPLATE); // query templates can't be executed directly + + // if individual results are disabled, the query instance will represent the template, by using its id + Integer resultId = null; + if (queryData.get(queryIndex).type() == QueryData.QueryType.TEMPLATE_INSTANCE && !config.template().individualResults) { + resultId = queryData.get(queryIndex).templateId(); + } + return new Integer[]{ queryIndex, resultId }; } @Override @@ -327,15 +385,22 @@ public Config getConfig() { * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
* SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
*/ - private static List instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { - // charset for generating random varia ble names + private static TemplateData instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { + // charset for generating random variable names final String charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; final Random random = new Random(); final var templateQueries = new FileCachingQueryList(querySource); final Pattern template = Pattern.compile("%%[a-zA-Z0-9_]+%%"); + final var oldQueries = new ArrayList(); final var instances = new ArrayList(); + + int templateNumber = 0; + final var indices = new ArrayList(); + final var instanceNumber = new ArrayList(); + for (int i = 0; i < templateQueries.size(); i++) { + oldQueries.add(templateQueries.getQuery(i)); // replace all variables in the query template with SPARQL variables // and store the variable names var templateQueryString = templateQueries.getQuery(i); @@ -355,7 +420,6 @@ private static List instantiateTemplateQueries(QuerySource querySource, // if no placeholders are found, the query is already a valid SPARQL query if (variables.isEmpty()) { - instances.add(templateQueryString); continue; } @@ -368,13 +432,13 @@ private static List instantiateTemplateQueries(QuerySource querySource, selectQueryString.append(" LIMIT " + config.limit()); selectQueryString.setNsPrefixes(templateQuery.getPrefixMapping()); + int count = 0; // send request to SPARQL endpoint and instantiate the template based on results try (QueryExecution exec = QueryExecutionFactory.createServiceRequest(config.endpoint().toString(), selectQueryString.asQuery())) { ResultSet resultSet = exec.execSelect(); if (!resultSet.hasNext()) { LOGGER.warn("No results for query template: {}", templateQueryString); } - int count = 0; while (resultSet.hasNext() && count++ < config.limit()) { var instance = new ParameterizedSparqlString(templateQueryString); QuerySolution solution = resultSet.next(); @@ -385,7 +449,11 @@ private static List instantiateTemplateQueries(QuerySource querySource, instances.add(instance.toString()); } } + // store the number of instances and the index of the template query + templateNumber++; + indices.add(i); + instanceNumber.add(count); } - return instances; + return new TemplateData(Stream.concat(oldQueries.stream(), instances.stream()).toList(), templateNumber, indices.stream().mapToInt(Integer::intValue).toArray(), instanceNumber.stream().mapToInt(Integer::intValue).toArray(), oldQueries.size()); } } diff --git a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java index 565763f6..ef0ffffb 100644 --- a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java +++ b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java @@ -265,6 +265,7 @@ private HttpExecutionResult executeHttpRequest(Duration timeout) { // get the next query and request final var queryHandle = config().queries().getNextQueryStream(querySelector); final int queryIndex = queryHandle.index(); + final int resultIndex = queryHandle.resultId() == null ? queryIndex : queryHandle.resultId(); final AsyncRequestProducer request; try { @@ -358,7 +359,7 @@ protected HttpExecutionResult buildResult() { // check for http error if (response.getCode() / 100 != 2) { - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, null); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, null); } // check content length @@ -370,18 +371,18 @@ protected HttpExecutionResult buildResult() { if (responseSize != responseBody.size()) LOGGER.error("Error during copying the response data. (expected written data size = {}, actual written data size = {}, Content-Length-Header = {})", responseSize, responseBody.size(), contentLengthHeader.getValue()); final var exception = new HttpException(String.format("Content-Length header value doesn't match actual content length. (Content-Length-Header = %s, written data size = %s)", contentLength, config.parseResults() ? responseBody.size() : responseSize)); - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, exception); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, exception); } } // check timeout if (duration.compareTo(timeout) > 0) { - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, new TimeoutException()); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, new TimeoutException()); } // return successful result return new HttpExecutionResult( - queryIndex, + resultIndex, Optional.of(response), timeStamp, Duration.ofNanos(responseEnd - requestStart), @@ -402,18 +403,18 @@ protected HttpExecutionResult buildResult() { } catch (InterruptedException | ExecutionException e) { // This will close the connection and cancel the request if it's still running. future.cancel(true); - return createFailedResultBeforeRequest(queryIndex, e); + return createFailedResultBeforeRequest(resultIndex, e); } catch (TimeoutException e) { if (future.isDone()) { LOGGER.warn("Request finished immediately after timeout but will still be counted as timed out."); try { return future.get(); } catch (InterruptedException | ExecutionException ex) { - return createFailedResultBeforeRequest(queryIndex, ex); + return createFailedResultBeforeRequest(resultIndex, ex); } } else { future.cancel(true); - return createFailedResultBeforeRequest(queryIndex, e); + return createFailedResultBeforeRequest(resultIndex, e); } } } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java index 71e25605..243fade7 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java @@ -73,7 +73,7 @@ private static Stream testDeserializationData() { QueryHandler.Config.Order.RANDOM, 42L, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true) + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true, null) ), """ {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql"}} diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index 2beb8bf1..2c552bdf 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -69,7 +69,7 @@ public void testTemplateQueries() throws IOException { QueryHandler.Config.Order.LINEAR, null, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, true) ); wm.stubFor(get(anyUrl()) .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) @@ -99,7 +99,7 @@ public void testMalformedTemplateQuery() throws IOException { QueryHandler.Config.Order.LINEAR, null, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, true) ); Assertions.assertThrows(QueryParseException.class, () -> new QueryHandler(queryHandlerConfig)); } From 5c50c6903708a2c840b64db0eec5581430218c52 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:20:59 +0200 Subject: [PATCH 11/21] Add executable query count and representative query count to QueryHandler --- .../impl/AggregatedExecutionStatistics.java | 2 +- .../metrics/impl/EachExecutionStatistic.java | 2 +- .../org/aksw/iguana/cc/metrics/impl/QMPH.java | 2 +- .../iguana/cc/query/handler/QueryHandler.java | 21 +++++++++++++++---- .../tasks/impl/StresstestResultProcessor.java | 11 +++++----- .../iguana/cc/utils/http/RequestFactory.java | 4 ++-- .../cc/worker/impl/SPARQLProtocolWorker.java | 4 ++-- .../iguana/cc/mockup/MockupQueryHandler.java | 7 ++++++- .../aksw/iguana/cc/mockup/MockupWorker.java | 2 +- .../cc/query/handler/QueryHandlerTest.java | 17 +++++++-------- .../cc/query/handler/TemplateQueriesTest.java | 2 +- 11 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java index 8582f202..84416804 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java @@ -29,7 +29,7 @@ public AggregatedExecutionStatistics() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { Resource queryRes = iresFactory.getWorkerQueryResource(worker, i); m.add(createAggregatedModel(data[(int) worker.getWorkerID()][i], queryRes)); } diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java index c6e1bf95..b3c3f1de 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java @@ -26,7 +26,7 @@ public EachExecutionStatistic() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { Resource workerQueryResource = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(worker.config().queries().getQueryId(i)); BigInteger run = BigInteger.ONE; diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java index d2ae1914..fe0232ba 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java @@ -28,7 +28,7 @@ public Number calculateTaskMetric(List workers, List[] data) { BigDecimal successes = BigDecimal.ZERO; - BigDecimal noq = BigDecimal.valueOf(worker.queries().getQueryCount()); + BigDecimal noq = BigDecimal.valueOf(worker.queries().getExecutableQueryCount()); Duration totalTime = Duration.ZERO; for (List datum : data) { for (HttpWorker.ExecutionStats exec : datum) { diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index a685dd33..8ef2aa71 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -162,6 +162,9 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; + int executableQueryCount = 0; + int representativeQueryCount = 0; + private int workerCount = 0; // give every worker inside the same worker config an offset seed final protected int hashCode; @@ -196,6 +199,8 @@ public QueryHandler(Config config) throws IOException { throw new RuntimeException("Couldn't read query stream", e); } }).collect(Collectors.toList())); + executableQueryCount = queryList.size(); + representativeQueryCount = queryList.size(); } this.hashCode = queryList.hashCode(); } @@ -269,6 +274,10 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } } ).toList(); + this.executableQueryCount = templateData.queries.size() - templateData.templates; + this.representativeQueryCount = config.template.individualResults ? + templateData.queries.size() - templateData.templates : + templateData.queries.size() - templateData.instanceStart; return new StringListQueryList(templateData.queries); } return (config.caching()) ? @@ -335,8 +344,12 @@ public int hashCode() { return hashCode; } - public int getQueryCount() { - return this.queryList.size(); + public int getExecutableQueryCount() { + return executableQueryCount; + } + + public int getRepresentativeQueryCount() { + return representativeQueryCount; } public String getQueryId(int i) { @@ -350,8 +363,8 @@ public String getQueryId(int i) { * @return String[] of query ids */ public String[] getAllQueryIds() { - String[] out = new String[queryList.size()]; - for (int i = 0; i < queryList.size(); i++) { + String[] out = new String[getRepresentativeQueryCount()]; + for (int i = 0; i < getRepresentativeQueryCount(); i++) { out[i] = getQueryId(i); } return out; diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java index c748f324..e23fd459 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java @@ -63,8 +63,8 @@ public StresstestResultProcessor(String suiteID, this.workerQueryExecutions = new ArrayList[workers.size()][]; for (int i = 0; i < workers.size(); i++) { - this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getQueryCount()]; - for (int j = 0; j < workers.get(i).config().queries().getQueryCount(); j++) { + this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getRepresentativeQueryCount()]; + for (int j = 0; j < workers.get(i).config().queries().getRepresentativeQueryCount(); j++) { this.workerQueryExecutions[i][j] = new ArrayList<>(); } } @@ -128,7 +128,8 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { m.add(workerRes, RDF.type, IONT.worker); m.add(workerRes, IPROP.workerID, toInfinitePrecisionIntegerLiteral(worker.getWorkerID())); m.add(workerRes, IPROP.workerType, ResourceFactory.createTypedLiteral(worker.getClass().getSimpleName())); - m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getQueryCount())); + // TODO: is it executable or representative? + m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getExecutableQueryCount())); m.add(workerRes, IPROP.timeOut, TimeUtils.createTypedDurationLiteral(config.timeout())); if (config.completionTarget() instanceof HttpWorker.QueryMixes) m.add(workerRes, IPROP.noOfQueryMixes, toInfinitePrecisionIntegerLiteral(((HttpWorker.QueryMixes) config.completionTarget()).number())); @@ -155,7 +156,7 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { for (var worker : workers) { var config = worker.config(); var workerQueryIDs = config.queries().getAllQueryIds(); - for (int i = 0; i < config.queries().getQueryCount(); i++) { + for (int i = 0; i < config.queries().getRepresentativeQueryCount(); i++) { Resource workerQueryRes = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(workerQueryIDs[i]); m.add(workerQueryRes, IPROP.queryID, queryRes); @@ -257,7 +258,7 @@ private Model createMetricModel(Metric metric) { if (metric instanceof QueryMetric) { // queries grouped by worker for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { Number metricValue = ((QueryMetric) metric).calculateQueryMetric(workerQueryExecutions[(int) worker.getWorkerID()][i]); if (metricValue != null) { Literal lit = ResourceFactory.createTypedLiteral(metricValue); diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index e0853166..ca305a0d 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -173,8 +173,8 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que * @param queryHandler the query handler to preload requests for */ public void preloadRequests(QueryHandler queryHandler) { - final var selector = new LinearQuerySelector(queryHandler.getQueryCount()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + final var selector = new LinearQuerySelector(queryHandler.getExecutableQueryCount()); + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { try { // build request and discard it buildHttpRequest(queryHandler.getNextQueryStream(selector)); diff --git a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java index ef0ffffb..82f5da16 100644 --- a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java +++ b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java @@ -174,7 +174,7 @@ public CompletableFuture start() { List executionStats = new ArrayList<>(); if (config().completionTarget() instanceof QueryMixes queryMixes) { for (int i = 0; i < queryMixes.number(); i++) { - for (int j = 0; j < config().queries().getQueryCount(); j++) { + for (int j = 0; j < config().queries().getExecutableQueryCount(); j++) { ExecutionStats execution = executeQuery(config().timeout(), false); if (execution == null) throw new RuntimeException("Execution returned null at a place, where it should have never been null."); logExecution(execution); @@ -186,7 +186,7 @@ public CompletableFuture start() { final var startNanos = System.nanoTime(); long queryExecutionCount = 0; int queryMixExecutionCount = 0; - int queryMixSize = config().queries().getQueryCount(); + int queryMixSize = config().queries().getExecutableQueryCount(); long now; while ((now = System.nanoTime()) - startNanos < ((TimeLimit) config.completionTarget()).duration().toNanos()) { final var timeLeft = ((TimeLimit) config.completionTarget()).duration().toNanos() - (now - startNanos); diff --git a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java index 6988f0ab..6ba0989d 100644 --- a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java +++ b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java @@ -30,7 +30,12 @@ public String[] getAllQueryIds() { } @Override - public int getQueryCount() { + public int getExecutableQueryCount() { + return queryNumber; + } + + @Override + public int getRepresentativeQueryCount() { return queryNumber; } diff --git a/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java b/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java index 9950c9f9..7c6d171b 100644 --- a/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java +++ b/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java @@ -68,7 +68,7 @@ public static List createWorkerResults(QueryHandler queries, List sourceType) thro QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQuery(selector); assertEquals(i, selector.getCurrentIndex()); if (FolderQuerySource.class.isAssignableFrom(sourceType)) @@ -110,9 +109,9 @@ public void testQueryStreamWrapper(String json, Class sourceType) t QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertTrue(selector instanceof LinearQuerySelector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQueryStream(selector); assertEquals(i, selector.getCurrentIndex()); final var acutalQuery = new String(wrapper.queryInputStreamSupplier().get().readAllBytes(), StandardCharsets.UTF_8); @@ -131,9 +130,9 @@ public void testQueryStringWrapper(String json, Class sourceType) t QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQuery(selector); assertEquals(i, selector.getCurrentIndex()); if (FolderQuerySource.class.isAssignableFrom(sourceType)) @@ -151,10 +150,10 @@ public void testQueryIDs(String json, Class sourceType) { QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); final var allQueryIDs = queryHandler.getAllQueryIds(); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { assertEquals(queryHandler.hashCode() + ":" + i, allQueryIDs[i]); assertEquals(allQueryIDs[i], queryHandler.getQueryId(i)); } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index 2c552bdf..d68d5abd 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -79,7 +79,7 @@ public void testTemplateQueries() throws IOException { .withBody(RESPONSE_JSON))); final var queryHandler = new QueryHandler(queryHandlerConfig); final var selector = queryHandler.getQuerySelectorInstance(); - Assertions.assertEquals(2, queryHandler.getQueryCount()); + Assertions.assertEquals(2, queryHandler.getExecutableQueryCount()); for (int i = 0; i < 2; i++) { final var query = queryHandler.getNextQuery(selector); Assertions.assertEquals("PREFIX rdf: SELECT * WHERE {?s rdf:type ; . ?p }", query.query()); From 5458233537bc9a3d080961bfbb40972481661c36 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:35:47 +0200 Subject: [PATCH 12/21] Update the saving template instances --- .../iguana/cc/query/handler/QueryHandler.java | 114 ++++++++++-------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 8ef2aa71..9012ea83 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -207,9 +207,7 @@ public QueryHandler(Config config) throws IOException { private record TemplateData(List queries, int templates, int[] indices, int[] instanceNumber, int instanceStart) {} - // TODO: storing and reading of instance file private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { - QuerySource querySource = templateSource; final var originalPath = templateSource.getPath(); final var postfix = String.format("_instances_f%s_l%s.txt", Integer.toUnsignedString(this.config.template.endpoint.hashCode()), Integer.toUnsignedString((int) this.config.template.limit.longValue())); @@ -221,68 +219,86 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr if (Files.exists(instancePath)) { LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); - // TODO: change this - querySource = createQuerySource(instancePath); // if the instances already exist, use them + // read in the template data + // the header contains the number of templates and the index (index doesn't count headers) of the first instance + // afterward for each template the index of the template and the number of instances are stored + String header; + try (var reader = Files.newBufferedReader(instancePath)) { + header = reader.readLine(); + Pattern digitRegex = Pattern.compile("\\d+"); + Matcher matcher = digitRegex.matcher(header); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int templates = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int instanceStart = Integer.parseInt(matcher.group()); + final var indices = new int[templates]; + final var instanceNumber = new int[templates]; + for (int i = 0; i < templates; i++) { + if (!matcher.find()) throw new IOException("Invalid instance file header"); + indices[i] = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + instanceNumber[i] = Integer.parseInt(matcher.group()); + } + templateData = new TemplateData(reader.lines().toList(), templates, indices, instanceNumber, instanceStart); + } } else { - templateData = instantiateTemplateQueries(querySource, config.template); + templateData = instantiateTemplateQueries(templateSource, config.template); - // TODO: change this if (config.template.save) { // save the instances to a file Files.createFile(instancePath); + try (var writer = Files.newBufferedWriter(instancePath)) { - for (String instance : templateData.queries()) { + // write header line + writer.write(String.format("templates: %d instances_start: %d ", templateData.templates, templateData.instanceStart)); + writer.write(String.format("%s", IntStream.range(0, templateData.templates) + .mapToObj(i -> "index: " + templateData.indices[i] + " instances_count: " + templateData.instanceNumber[i]) + .collect(Collectors.joining(" ")))); + writer.newLine(); + // write queries and instances + for (String instance : templateData.queries) { writer.write(instance); writer.newLine(); } } - // create a new query source based on the new instance file - querySource = createQuerySource(instancePath); - } else { - // query source isn't necessary, because queries aren't stored in a file, - // directly return a list of the instances instead - // return new StringListQueryList(templateData.queries()); } + } - AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template - AtomicInteger index = new AtomicInteger(0); // index of the current query - AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template - queryData = templateData.queries.stream().map( - query -> { - // once the template instances start, the template index is reset and reused for the instances - // to track to which template the instances belong - if (index.get() == templateData.instanceStart) templateIndex.set(0); - - if (index.get() >= templateData.instanceStart) { - // query is an instance of a template - - // if the instance id is equal to the number of instances for the current template, - // the next template is used - if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { - templateIndex.getAndIncrement(); - instanceId.set(0); - } - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); - } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { - // query is a template + AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template + AtomicInteger index = new AtomicInteger(0); // index of the current query + AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template + queryData = templateData.queries.stream().map( + query -> { + // once the template instances start, the template index is reset and reused for the instances + // to track to which template the instances belong + if (index.get() == templateData.instanceStart) templateIndex.set(0); + + if (index.get() >= templateData.instanceStart) { + // query is an instance of a template + + // if the instance id is equal to the number of instances for the current template, + // the next template is used + if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { templateIndex.getAndIncrement(); - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); - } else { - // query is neither a template nor an instance - final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); - return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + instanceId.set(0); } + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); + } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { + // query is a template + templateIndex.getAndIncrement(); + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } else { + // query is neither a template nor an instance + final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); + return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); } - ).toList(); - this.executableQueryCount = templateData.queries.size() - templateData.templates; - this.representativeQueryCount = config.template.individualResults ? - templateData.queries.size() - templateData.templates : - templateData.queries.size() - templateData.instanceStart; - return new StringListQueryList(templateData.queries); - } - return (config.caching()) ? - new FileCachingQueryList(querySource) : // if caching is enabled, cache the instances - new FileReadingQueryList(querySource); // if caching is disabled, read the instances from the file every time + } + ).toList(); + this.executableQueryCount = templateData.queries.size() - templateData.templates; + this.representativeQueryCount = config.template.individualResults ? + templateData.queries.size() - templateData.templates : + templateData.instanceStart; + return new StringListQueryList(templateData.queries); } /** From bfa61c4a2844c29575d680eab93dfff73d7e0143 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:03:05 +0200 Subject: [PATCH 13/21] Fix individual template instances results --- .../iguana/cc/query/handler/QueryHandler.java | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 9012ea83..6bb275aa 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,7 +145,7 @@ public Template(URI endpoint, Long limit, Boolean save, Boolean individualResult this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; - this.individualResults = individualResults != null; + this.individualResults = individualResults == null; } } } @@ -269,6 +269,9 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template queryData = templateData.queries.stream().map( query -> { + // If "individualResults" is turned on, give the query templates to last ids, so that there aren't + // any gaps in the ids and results. + // once the template instances start, the template index is reset and reused for the instances // to track to which template the instances belong if (index.get() == templateData.instanceStart) templateIndex.set(0); @@ -282,14 +285,26 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr templateIndex.getAndIncrement(); instanceId.set(0); } + + if (config.template.individualResults) { + return new QueryData(index.getAndIncrement() - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex.get()); + } return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { // query is a template + if (config.template.individualResults) { + // give the templates the last ids, so that there aren't any gaps in the ids and results + index.incrementAndGet(); + return new QueryData(templateData.queries.size() - templateData.templates + templateIndex.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } templateIndex.getAndIncrement(); return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); } else { // query is neither a template nor an instance final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); + if (config.template.individualResults) { + return new QueryData(index.getAndIncrement() - templateIndex.get(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + } return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); } } @@ -327,12 +342,12 @@ public QuerySelector getQuerySelectorInstance() { public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = getNextQueryIndex(querySelector); - return new QueryStringWrapper(queryIndex[0], queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); + return new QueryStringWrapper(queryData.get(queryIndex[0]).queryId(), queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { final var queryIndex = getNextQueryIndex(querySelector); - return new QueryStreamWrapper(queryIndex[0], config.caching(), () -> { + return new QueryStreamWrapper(queryData.get(queryIndex[0]).queryId(), config.caching(), () -> { try { return this.queryList.getQueryStream(queryIndex[0]); } catch (IOException e) { From 9ee14ed05a2b394eec5e758a88229b6d4e1add0e Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:11:22 +0200 Subject: [PATCH 14/21] Add some comments --- .../org/aksw/iguana/cc/query/handler/QueryHandler.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 6bb275aa..e0f4f053 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -162,8 +162,8 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; - int executableQueryCount = 0; - int representativeQueryCount = 0; + int executableQueryCount = 0; // stores the number of queries that can be executed + int representativeQueryCount = 0; // stores the number of queries that are represented in the results private int workerCount = 0; // give every worker inside the same worker config an offset seed @@ -264,6 +264,7 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } } + // initialize queryData based on the template data AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template AtomicInteger index = new AtomicInteger(0); // index of the current query AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template @@ -309,6 +310,9 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } } ).toList(); + + // set the number of queries that can be executed and the number of queries + // that are represented in the results this.executableQueryCount = templateData.queries.size() - templateData.templates; this.representativeQueryCount = config.template.individualResults ? templateData.queries.size() - templateData.templates : From d0c99c0edebbe772db4dfc86bb58693c11c82a7b Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:15:54 +0200 Subject: [PATCH 15/21] Update schema --- schema/iguana-schema.json | 3 +++ src/main/resources/iguana-schema.json | 3 +++ 2 files changed, 6 insertions(+) diff --git a/schema/iguana-schema.json b/schema/iguana-schema.json index 0ad54c1b..cf1539f9 100644 --- a/schema/iguana-schema.json +++ b/schema/iguana-schema.json @@ -351,6 +351,9 @@ }, "save": { "type": "boolean" + }, + "individualResults": { + "type": "boolean" } }, "required": [ diff --git a/src/main/resources/iguana-schema.json b/src/main/resources/iguana-schema.json index d92fb8d6..621bc3e1 100644 --- a/src/main/resources/iguana-schema.json +++ b/src/main/resources/iguana-schema.json @@ -351,6 +351,9 @@ }, "save": { "type": "boolean" + }, + "individualResults": { + "type": "boolean" } }, "required": [ From 2c78c062c76505266c69d9c602198504d25f4359 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:16:42 +0200 Subject: [PATCH 16/21] Change default behavior of query templates --- .../java/org/aksw/iguana/cc/query/handler/QueryHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index e0f4f053..9de16c26 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,7 +145,7 @@ public Template(URI endpoint, Long limit, Boolean save, Boolean individualResult this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; - this.individualResults = individualResults == null; + this.individualResults = individualResults != null; } } } From f868736355f64336fbf67d56b9b5427a113f3f56 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:04:54 +0200 Subject: [PATCH 17/21] Update tests --- .../query/handler/QueryHandlerConfigTest.java | 15 +++++++- .../cc/query/handler/TemplateQueriesTest.java | 35 +++++++++++++++++++ .../suite-configs/valid/config-full.yaml | 1 + 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java index 243fade7..19421477 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java @@ -73,11 +73,24 @@ private static Stream testDeserializationData() { QueryHandler.Config.Order.RANDOM, 42L, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true, null) + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true, false) ), """ {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql"}} """ + ), + Arguments.of(new QueryHandler.Config("some.queries", + QueryHandler.Config.Format.SEPARATOR, + "\n", + true, + QueryHandler.Config.Order.RANDOM, + 42L, + QueryHandler.Config.Language.SPARQL, + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 10L, false, true) + ), + """ + {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql", "limit": 10, "save": false, "individualResults": true }} + """ ) ); } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index d68d5abd..63d3ae4c 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -104,4 +104,39 @@ public void testMalformedTemplateQuery() throws IOException { Assertions.assertThrows(QueryParseException.class, () -> new QueryHandler(queryHandlerConfig)); } + @Test + public void testSubsumedQueries() throws IOException { + String templateQuery = "PREFIX rdf: SELECT * WHERE {?s rdf:type %%var0%% ; %%var1%% %%var2%%. %%var2%% ?p }"; + tempTemplateFile = Files.createTempFile(parentFolder, "Template", ".txt"); + Files.writeString(tempTemplateFile, templateQuery, StandardCharsets.UTF_8); + final var queryHandlerConfig = new QueryHandler.Config( + tempTemplateFile.toString(), + QueryHandler.Config.Format.ONE_PER_LINE, + null, + true, + QueryHandler.Config.Order.LINEAR, + null, + QueryHandler.Config.Language.SPARQL, + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, false) + ); + wm.stubFor(get(anyUrl()) + .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", "application/sparql-results+json") + .withBody(RESPONSE_JSON))); + final var queryHandler = new QueryHandler(queryHandlerConfig); + final var selector = queryHandler.getQuerySelectorInstance(); + Assertions.assertEquals(2, queryHandler.getExecutableQueryCount()); + Assertions.assertEquals(1, queryHandler.getRepresentativeQueryCount()); + var query = queryHandler.getNextQuery(selector); + Assertions.assertEquals(0, query.resultId()); + Assertions.assertEquals(1, query.index()); + Assertions.assertFalse(query.update()); + query = queryHandler.getNextQuery(selector); + Assertions.assertEquals(0, query.resultId()); + Assertions.assertEquals(2, query.index()); + Assertions.assertFalse(query.update()); + } + } diff --git a/src/test/resources/suite-configs/valid/config-full.yaml b/src/test/resources/suite-configs/valid/config-full.yaml index ad228cc2..260add8e 100644 --- a/src/test/resources/suite-configs/valid/config-full.yaml +++ b/src/test/resources/suite-configs/valid/config-full.yaml @@ -57,6 +57,7 @@ tasks: endpoint: "http://localhost:3030/sp2b" limit: 2000 save: false + individualResults: false timeout: 2S connection: fuseki completionTarget: From bbcae532665ff6d7007d30cceb9fe4bcd5d86dea Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:05:07 +0200 Subject: [PATCH 18/21] Fix configuration --- .../java/org/aksw/iguana/cc/query/handler/QueryHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 9de16c26..b487f045 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,7 +145,7 @@ public Template(URI endpoint, Long limit, Boolean save, Boolean individualResult this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; - this.individualResults = individualResults != null; + this.individualResults = individualResults != null && individualResults; } } } From 779bd341c6d7f70d90c3f4b9eb8442a9f3c28cb5 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:05:15 +0200 Subject: [PATCH 19/21] Update documentation --- docs/configuration/queries.md | 16 +++++++++++----- example-suite.yml | 3 ++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/configuration/queries.md b/docs/configuration/queries.md index ffb3bf90..25758d0b 100644 --- a/docs/configuration/queries.md +++ b/docs/configuration/queries.md @@ -130,16 +130,21 @@ The results may look like the following: ### Configuration The `template` attribute has the following properties: -| property | required | default | description | example | -|----------|----------|---------|---------------------------------------------------------------------|-----------------------------| -| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` | -| limit | no | `2000` | The maximum number of instances per query template. | `100` | -| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` | +| property | required | default | description | example | +|-------------------|----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------| +| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` | +| limit | no | `2000` | The maximum number of instances per query template. | `100` | +| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` | +| individualResults | no | `false` | If set to `true`, the results of each individual template instance will be reported, otherwise if set to `false` their results will be subsumed for the query template. | `true` | If the `save` attribute is set to `true`, the instances will be saved in a separate file in the same directory as the query templates. If the query templates are stored in a folder, the instances will be saved in the parent directory. +If the `individualResults` attribute is set to `false`, +the results of the instances will be subsumed for the query template. +The query template will then be considered as an actual query in the results. + Example of query configuration with query templates: ```yaml queries: @@ -149,4 +154,5 @@ queries: endpoint: "http://dbpedia.org/sparql" limit: 100 save: true + individualResults: true ``` diff --git a/example-suite.yml b/example-suite.yml index c4b3d280..50d65cef 100644 --- a/example-suite.yml +++ b/example-suite.yml @@ -75,10 +75,11 @@ tasks: requestType: post query queries: path: "./example/query_pattern.txt" - pattern: + template: endpoint: "https://dbpedia.org/sparql" limit: 1000 save: false + individualResults: false timeout: 180s completionTarget: duration: 1000s From b3d980fb905f35565f50f7cb8168413500117b3d Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:19:44 +0200 Subject: [PATCH 20/21] Add some comments (to trigger GitHub actions) --- .../aksw/iguana/cc/query/handler/QueryHandler.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index b487f045..efce866b 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -270,8 +270,11 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template queryData = templateData.queries.stream().map( query -> { - // If "individualResults" is turned on, give the query templates to last ids, so that there aren't - // any gaps in the ids and results. + // If "individualResults" is turned on, move the query templates outside the range of + // "representativeQueryCount" to avoid them being represented in the results. + // Otherwise, if "individualResults" is turned off, the instances need to be moved outside the range + // of "representativeQueryCount", but because "instantiateTemplateQueries" already appends the + // instances to the end of the original queries, this will already be done. // once the template instances start, the template index is reset and reused for the instances // to track to which template the instances belong @@ -432,6 +435,10 @@ public Config getConfig() { * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
* SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
* SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
+ * + * The template data that this method returns will contain a list of all queries, + * where the first queries are the original queries including the query templates. + * The query instances will be appended to the original queries. */ private static TemplateData instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { // charset for generating random variable names From 1c9ec738b7b045ffdd7df9940b17d124bdef0e97 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 15:27:08 +0200 Subject: [PATCH 21/21] Fix minor bug that caused an infinite loop --- graalvm/suite.yml | 9 +++++---- .../org/aksw/iguana/cc/query/handler/QueryHandler.java | 9 +++++++++ .../org/aksw/iguana/cc/utils/http/RequestFactory.java | 3 +-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/graalvm/suite.yml b/graalvm/suite.yml index 75c9bd4c..c3ee89d3 100644 --- a/graalvm/suite.yml +++ b/graalvm/suite.yml @@ -6,7 +6,7 @@ connections: - name: "Blazegraph" version: "1.1.1" dataset: "DatasetName" - endpoint: "http://localhost:9999/blazegraph/sparql" + endpoint: "https://dbpedia.org/sparql" authentication: user: "user" password: "test" @@ -60,13 +60,14 @@ tasks: seed: 123 lang: "SPARQL" template: - endpoint: "http://dbpedia.org/sparql" + endpoint: "https://dbpedia.org/sparql" limit: 1 save: false + individualResults: false timeout: 2s connection: Blazegraph completionTarget: - duration: 1s + duration: 0.5s acceptHeader: "application/sparql-results+json" requestType: get query parseResults: true @@ -78,7 +79,7 @@ tasks: timeout: 3m connection: Blazegraph completionTarget: - duration: 1s + duration: 0.5s requestType: get query acceptHeader: "application/sparql-results+json" - number: 1 diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index efce866b..f1ead708 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -347,6 +347,15 @@ public QuerySelector getQuerySelectorInstance() { throw new IllegalStateException("Unknown query selection order: " + config.order()); } + public QuerySelector getQuerySelectorInstance(Config.Order type) { + switch (type) { + case LINEAR -> { return new LinearQuerySelector(queryList.size()); } + case RANDOM -> { return new RandomQuerySelector(queryList.size(), config.seed() + workerCount++); } + } + + throw new IllegalStateException("Unknown query selection order: " + type); + } + public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = getNextQueryIndex(querySelector); return new QueryStringWrapper(queryData.get(queryIndex[0]).queryId(), queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index ca305a0d..9d44cb95 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -4,7 +4,6 @@ import com.fasterxml.jackson.annotation.JsonValue; import org.aksw.iguana.cc.config.elements.ConnectionConfig; import org.aksw.iguana.cc.query.handler.QueryHandler; -import org.aksw.iguana.cc.query.selector.impl.LinearQuerySelector; import org.aksw.iguana.cc.worker.HttpWorker; import org.aksw.iguana.cc.worker.impl.SPARQLProtocolWorker; import org.apache.hc.core5.http.HttpHeaders; @@ -173,7 +172,7 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que * @param queryHandler the query handler to preload requests for */ public void preloadRequests(QueryHandler queryHandler) { - final var selector = new LinearQuerySelector(queryHandler.getExecutableQueryCount()); + final var selector = queryHandler.getQuerySelectorInstance(QueryHandler.Config.Order.LINEAR); for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { try { // build request and discard it