From ba811b9f6a9fec15dddb1457fefb1281fb54f335 Mon Sep 17 00:00:00 2001 From: lingbin Date: Wed, 11 Sep 2024 02:48:43 +0800 Subject: [PATCH 01/86] [native] Fix recording of SUM type in PrometheusStatsReporter For SUM type worker metrics, the corresponding type in `PrometheusStatsReporter` is `prometheus::Gauge`. For these metrics, each time they are recorded (via `RECORD_METRIC_VALUE`), a "delta" is passed in, so `Gauge::Increment()` should be used in `PrometheusStatsReporter` instead of `Gauge::Set()` (which overwrites the old value). --- .../PrometheusStatsReporter.cpp | 20 ++++++++++++------- .../tests/PrometheusReporterTest.cpp | 15 ++++++++++---- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/runtime-metrics/PrometheusStatsReporter.cpp b/presto-native-execution/presto_cpp/main/runtime-metrics/PrometheusStatsReporter.cpp index 2cedf64fb86f..30e361a6bc4d 100644 --- a/presto-native-execution/presto_cpp/main/runtime-metrics/PrometheusStatsReporter.cpp +++ b/presto-native-execution/presto_cpp/main/runtime-metrics/PrometheusStatsReporter.cpp @@ -169,17 +169,23 @@ void PrometheusStatsReporter::addMetricValue(const char* key, size_t value) auto statsInfo = metricIterator->second; switch (statsInfo.statType) { case velox::StatType::COUNT: { - auto counter = + auto* counter = reinterpret_cast<::prometheus::Counter*>(statsInfo.metricPtr); - counter->Increment(value); - } break; - case velox::StatType::SUM: + counter->Increment(static_cast(value)); + break; + } + case velox::StatType::SUM: { + auto* gauge = reinterpret_cast<::prometheus::Gauge*>(statsInfo.metricPtr); + gauge->Increment(static_cast(value)); + break; + } case velox::StatType::AVG: case velox::StatType::RATE: { // Overrides the existing state. - auto gauge = reinterpret_cast<::prometheus::Gauge*>(statsInfo.metricPtr); - gauge->Set(value); - } break; + auto* gauge = reinterpret_cast<::prometheus::Gauge*>(statsInfo.metricPtr); + gauge->Set(static_cast(value)); + break; + } default: VELOX_UNSUPPORTED( "Unsupported metric type {}", diff --git a/presto-native-execution/presto_cpp/main/runtime-metrics/tests/PrometheusReporterTest.cpp b/presto-native-execution/presto_cpp/main/runtime-metrics/tests/PrometheusReporterTest.cpp index e2b7ffa178e8..c6b6d635d1a9 100644 --- a/presto-native-execution/presto_cpp/main/runtime-metrics/tests/PrometheusReporterTest.cpp +++ b/presto-native-execution/presto_cpp/main/runtime-metrics/tests/PrometheusReporterTest.cpp @@ -22,6 +22,7 @@ class PrometheusReporterTest : public testing::Test { void SetUp() override { reporter = std::make_shared(testLabels); } + void verifySerializedResult( const std::string& fullSerializedResult, std::vector& expected) { @@ -32,6 +33,7 @@ class PrometheusReporterTest : public testing::Test { EXPECT_EQ(line, expected[i++]); } } + const std::map testLabels = { {"cluster", "test_cluster"}, {"worker", "test_worker_pod"}}; @@ -62,24 +64,29 @@ TEST_F(PrometheusReporterTest, testCountAndGauge) { facebook::velox::StatType::RATE, reporter->registeredMetricsMap_.find("test.key4")->second.statType); - std::vector testData = {10, 11, 15}; + std::vector testData = {10, 12, 14}; for (auto i : testData) { reporter->addMetricValue("test.key1", i); reporter->addMetricValue("test.key2", i + 1000); + reporter->addMetricValue("test.key3", i + 2000); + reporter->addMetricValue("test.key4", i + 3000); } + // Uses default value of 1 for second parameter. reporter->addMetricValue("test.key1"); + reporter->addMetricValue("test.key3"); + auto fullSerializedResult = reporter->fetchMetrics(); std::vector expected = { "# TYPE test_key1 counter", "test_key1{" + labelsSerialized + "} 37", "# TYPE test_key2 gauge", - "test_key2{" + labelsSerialized + "} 1015", + "test_key2{" + labelsSerialized + "} 1014", "# TYPE test_key3 gauge", - "test_key3{" + labelsSerialized + "} 0", + "test_key3{" + labelsSerialized + "} 6037", "# TYPE test_key4 gauge", - "test_key4{" + labelsSerialized + "} 0"}; + "test_key4{" + labelsSerialized + "} 3014"}; verifySerializedResult(fullSerializedResult, expected); }; From 26fd8dbd976e2b4341558c953e54bcfbad0a6198 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Wed, 18 Sep 2024 13:25:33 -0400 Subject: [PATCH 02/86] broadcasted --> broadcast broadcasted isn't totally wrong, but broadcast is usually preferred as the past tense --- presto-docs/src/main/sphinx/sql/explain.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/sql/explain.rst b/presto-docs/src/main/sphinx/sql/explain.rst index b5dc695cec1d..e4c9144726e4 100644 --- a/presto-docs/src/main/sphinx/sql/explain.rst +++ b/presto-docs/src/main/sphinx/sql/explain.rst @@ -38,7 +38,7 @@ distributed between fragments: ``BROADCAST`` Fragment is executed on a fixed number of nodes with the input data - broadcasted to all nodes. + broadcast to all nodes. ``SOURCE`` Fragment is executed on nodes where input splits are accessed. From 0b10149ee03f571ba8bab13a4e3d67b03defcd21 Mon Sep 17 00:00:00 2001 From: Tim Meehan Date: Fri, 30 Aug 2024 15:21:57 -0400 Subject: [PATCH 03/86] Acknowledge results in HEAD request to output buffer --- .../src/main/java/com/facebook/presto/server/TaskResource.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/presto-main/src/main/java/com/facebook/presto/server/TaskResource.java b/presto-main/src/main/java/com/facebook/presto/server/TaskResource.java index 37450b850aa6..372326ad4ce4 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/TaskResource.java +++ b/presto-main/src/main/java/com/facebook/presto/server/TaskResource.java @@ -322,8 +322,9 @@ public Response taskResultsHeaders( public Response taskResultsHeaders( @PathParam("taskId") TaskId taskId, @PathParam("bufferId") OutputBufferId bufferId, - @PathParam("token") final long unused) + @PathParam("token") final long token) { + taskManager.acknowledgeTaskResults(taskId, bufferId, token); return taskResultsHeaders(taskId, bufferId); } From 46eacc3888e1d4e5605015a52cd0a04262c5f183 Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Fri, 13 Sep 2024 13:25:06 -0700 Subject: [PATCH 04/86] [native] Removing status message from native worker to support http/2 --- .../presto_cpp/main/http/HttpClient.cpp | 4 ++-- .../presto_cpp/main/http/HttpServer.cpp | 21 ++++--------------- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/http/HttpClient.cpp b/presto-native-execution/presto_cpp/main/http/HttpClient.cpp index 59e8144e59f0..4504d9ae83b1 100644 --- a/presto-native-execution/presto_cpp/main/http/HttpClient.cpp +++ b/presto-native-execution/presto_cpp/main/http/HttpClient.cpp @@ -145,7 +145,7 @@ std::unique_ptr HttpResponse::consumeBody( void HttpResponse::freeBuffers() { if (pool_ != nullptr) { - for (auto& iobuf : bodyChain_) { + for (const auto& iobuf : bodyChain_) { if (iobuf != nullptr) { pool_->free(iobuf->writableData(), iobuf->capacity()); } @@ -170,7 +170,7 @@ std::string HttpResponse::dumpBodyChain() const { std::string responseBody; if (!bodyChain_.empty()) { std::ostringstream oss; - for (auto& buf : bodyChain_) { + for (const auto& buf : bodyChain_) { oss << std::string((const char*)buf->data(), buf->length()); } responseBody = oss.str(); diff --git a/presto-native-execution/presto_cpp/main/http/HttpServer.cpp b/presto-native-execution/presto_cpp/main/http/HttpServer.cpp index 205d95b480ab..8d22b3fb0e2d 100644 --- a/presto-native-execution/presto_cpp/main/http/HttpServer.cpp +++ b/presto-native-execution/presto_cpp/main/http/HttpServer.cpp @@ -21,9 +21,7 @@ namespace facebook::presto::http { void sendOkResponse(proxygen::ResponseHandler* downstream) { - proxygen::ResponseBuilder(downstream) - .status(http::kHttpOk, "OK") - .sendWithEOM(); + proxygen::ResponseBuilder(downstream).status(http::kHttpOk, "").sendWithEOM(); } void sendOkResponse(proxygen::ResponseHandler* downstream, const json& body) { @@ -49,7 +47,7 @@ void sendOkResponse( proxygen::ResponseHandler* downstream, const std::string& body) { proxygen::ResponseBuilder(downstream) - .status(http::kHttpOk, "OK") + .status(http::kHttpOk, "") .header( proxygen::HTTP_HEADER_CONTENT_TYPE, http::kMimeTypeApplicationJson) .body(body) @@ -60,7 +58,7 @@ void sendOkThriftResponse( proxygen::ResponseHandler* downstream, const std::string& body) { proxygen::ResponseBuilder(downstream) - .status(http::kHttpOk, "OK") + .status(http::kHttpOk, "") .header( proxygen::HTTP_HEADER_CONTENT_TYPE, http::kMimeTypeApplicationThrift) .body(body) @@ -71,19 +69,8 @@ void sendErrorResponse( proxygen::ResponseHandler* downstream, const std::string& error, uint16_t status) { - static const size_t kMaxStatusSize = 1024; - - // Use a prefix of the 'error' as status message. Make sure it doesn't include - // new lines. See https://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html - - size_t statusSize = kMaxStatusSize; - auto pos = error.find('\n'); - if (pos != std::string::npos && pos < statusSize) { - statusSize = pos; - } - proxygen::ResponseBuilder(downstream) - .status(status, error.substr(0, statusSize)) + .status(status, "") .body(error) .sendWithEOM(); } From c00f8af3595bc36eef10cdc79c66008cf0871a77 Mon Sep 17 00:00:00 2001 From: Tim Meehan Date: Tue, 17 Sep 2024 10:02:52 -0400 Subject: [PATCH 05/86] Use Provisio plugin for packaging --- pom.xml | 32 +- .../conf/docker/common/compose-commons.sh | 2 +- presto-server/pom.xml | 443 ++---------------- presto-server/src/main/assembly/presto.xml | 220 --------- .../main/java/com/facebook/presto/Dummy.java | 21 - presto-server/src/main/provisio/presto.xml | 274 +++++++++++ 6 files changed, 336 insertions(+), 656 deletions(-) delete mode 100644 presto-server/src/main/assembly/presto.xml delete mode 100644 presto-server/src/main/java/com/facebook/presto/Dummy.java create mode 100644 presto-server/src/main/provisio/presto.xml diff --git a/pom.xml b/pom.xml index e53fa8717e35..3985475d962a 100644 --- a/pom.xml +++ b/pom.xml @@ -2368,7 +2368,8 @@ org.sonatype.plugins nexus-staging-maven-plugin ${dep.nexus-staging-plugin.version} - true + ossrh https://oss.sonatype.org/ @@ -2397,6 +2398,13 @@ presto-maven-plugin + + ca.vanzyl.provisio.maven.plugins + provisio-maven-plugin + 1.0.18 + true + + org.apache.maven.plugins maven-compiler-plugin @@ -2576,9 +2584,31 @@ deploy-to-ossrh + + org.apache.maven.plugins + maven-deploy-plugin + + true + + org.sonatype.plugins nexus-staging-maven-plugin + + + + default-deploy + deploy + + deploy + + + + + ossrh + https://oss.sonatype.org/ + org.apache.maven.plugins diff --git a/presto-product-tests/conf/docker/common/compose-commons.sh b/presto-product-tests/conf/docker/common/compose-commons.sh index 0b1119bf2546..5a87e5e1bc79 100644 --- a/presto-product-tests/conf/docker/common/compose-commons.sh +++ b/presto-product-tests/conf/docker/common/compose-commons.sh @@ -33,7 +33,7 @@ export HADOOP_BASE_IMAGE=${HADOOP_BASE_IMAGE:-"prestodb/hdp2.6-hive"} if [[ -z "${PRESTO_SERVER_DIR:-}" ]]; then source "${PRODUCT_TESTS_ROOT}/target/classes/presto.env" - PRESTO_SERVER_DIR="${PROJECT_ROOT}/presto-server/target/presto-server-${PRESTO_VERSION}/presto-server-${PRESTO_VERSION}/" + PRESTO_SERVER_DIR="${PROJECT_ROOT}/presto-server/target/presto-server-${PRESTO_VERSION}/" fi export_canonical_path PRESTO_SERVER_DIR diff --git a/presto-server/pom.xml b/presto-server/pom.xml index a638792efa73..deeff2d162f3 100644 --- a/presto-server/pom.xml +++ b/presto-server/pom.xml @@ -10,6 +10,7 @@ presto-server presto-server + provisio ${project.parent.basedir} @@ -22,425 +23,41 @@ com.facebook.presto.server.PrestoServer ${project.artifactId} - - - - - - com.facebook.presto - presto-main - - - - - - com.facebook.airlift - launcher - ${dep.packaging.version} - bin - tar.gz - provided - - - - com.facebook.airlift - launcher - ${dep.packaging.version} - properties - tar.gz - provided - - - - - com.facebook.presto - presto-resource-group-managers - ${project.version} - zip - provided - - - - com.facebook.presto - presto-password-authenticators - ${project.version} - zip - provided - - - - com.facebook.presto - presto-session-property-managers - ${project.version} - zip - provided - - - - com.facebook.presto - presto-node-ttl-fetchers - ${project.version} - zip - provided - - - - com.facebook.presto - presto-cluster-ttl-providers - ${project.version} - zip - provided - - - - com.facebook.presto - presto-jmx - ${project.version} - zip - provided - - - - com.facebook.presto - presto-cassandra - ${project.version} - zip - provided - - - - com.facebook.presto - presto-pinot - ${project.version} - zip - provided - - - - com.facebook.presto - presto-example-http - ${project.version} - zip - provided - - - - com.facebook.presto - presto-hive-hadoop2 - ${project.version} - zip - provided - - - - com.facebook.presto - presto-memory - ${project.version} - zip - provided - - - - com.facebook.presto - presto-blackhole - ${project.version} - zip - provided - - - - com.facebook.presto - presto-kafka - ${project.version} - zip - provided - - - - com.facebook.presto - presto-kudu - ${project.version} - zip - provided - - - - com.facebook.presto - presto-atop - ${project.version} - zip - provided - - - - com.facebook.presto - presto-ml - ${project.version} - zip - provided - - - - com.facebook.presto - presto-mysql - ${project.version} - zip - provided - - - - com.facebook.presto - presto-singlestore - ${project.version} - zip - provided - - - - com.facebook.presto - presto-hana - ${project.version} - zip - provided - - - - com.facebook.presto - presto-oracle - ${project.version} - zip - provided - - - - com.facebook.presto - presto-bigquery - ${project.version} - zip - provided - - - com.facebook.presto - presto-prometheus - ${project.version} - zip - provided - - - - com.facebook.presto - presto-postgresql - ${project.version} - zip - provided - - - - com.facebook.presto - presto-redshift - ${project.version} - zip - provided - - - - com.facebook.presto - presto-sqlserver - ${project.version} - zip - provided - - - - com.facebook.presto - presto-redis - ${project.version} - zip - provided - - - - com.facebook.presto - presto-tpch - ${project.version} - zip - provided - - - - com.facebook.presto - presto-tpcds - ${project.version} - zip - provided - - - - com.facebook.presto - presto-teradata-functions - ${project.version} - zip - provided - - - - com.facebook.presto - presto-mongodb - ${project.version} - zip - provided - - - - com.facebook.presto - presto-local-file - ${project.version} - zip - provided - - - - com.facebook.presto - presto-accumulo - ${project.version} - zip - provided - - - - com.facebook.presto - presto-thrift-connector - ${project.version} - zip - provided - - - - com.facebook.presto - presto-elasticsearch - ${project.version} - zip - provided - - - - com.facebook.presto - presto-druid - ${project.version} - zip - provided - - - - com.facebook.presto - presto-iceberg - ${project.version} - zip - provided - - - - com.facebook.presto - presto-function-namespace-managers - ${project.version} - zip - provided - - - - com.facebook.presto - presto-hive-function-namespace - ${project.version} - zip - provided - - - - com.facebook.presto - presto-delta - ${project.version} - zip - provided - - - - com.facebook.presto - presto-hudi - ${project.version} - zip - provided - - - - com.facebook.presto - presto-clickhouse - ${project.version} - zip - provided - - - + false + - org.apache.maven.plugins - maven-dependency-plugin - - - unpack-plugins - prepare-package - - unpack-dependencies - - - false - - zip - provided - - - - unpack-launcher - prepare-package - - unpack-dependencies - - - false - launcher - provided - ${project.build.directory}/dependency/launcher - - - - - - - org.apache.maven.plugins - maven-assembly-plugin - - - bin - package - - single - - - - dir - tar.gz - - - src/main/assembly/presto.xml - - presto-server-${project.version} - false - - - + ca.vanzyl.provisio.maven.plugins + provisio-maven-plugin + + + + io.takari.maven.plugins + takari-lifecycle-plugin + 1.10.1 + false + + none + ${skipTakariLifecyclePlugin} + + + + + + + + deploy-to-ossrh + + + true + + + diff --git a/presto-server/src/main/assembly/presto.xml b/presto-server/src/main/assembly/presto.xml deleted file mode 100644 index c81bd9e5b10f..000000000000 --- a/presto-server/src/main/assembly/presto.xml +++ /dev/null @@ -1,220 +0,0 @@ - - presto-server - true - - - - README.txt - - - NOTICE - - - - - - - false - runtime - lib - true - - - - - - - ${project.build.directory}/dependency/launcher/bin - - launcher - launcher.py - launcher.properties - - bin - - - ${project.build.directory}/dependency/launcher/bin - - launcher.properties - - bin - true - - - ${project.build.directory}/dependency/launcher/bin - - launcher - launcher.py - - bin - 0755 - - - - - ${project.build.directory}/dependency/presto-resource-group-managers-${project.version} - plugin/resource-group-managers - - - ${project.build.directory}/dependency/presto-password-authenticators-${project.version} - plugin/password-authenticators - - - ${project.build.directory}/dependency/presto-session-property-managers-${project.version} - plugin/session-property-managers - - - ${project.build.directory}/dependency/presto-node-ttl-fetchers-${project.version} - plugin/ttl-fetchers - - - ${project.build.directory}/dependency/presto-cluster-ttl-providers-${project.version} - plugin/cluster-ttl-providers - - - ${project.build.directory}/dependency/presto-function-namespace-managers-${project.version} - plugin/function-namespace-managers - - - ${project.build.directory}/dependency/presto-jmx-${project.version} - plugin/jmx - - - ${project.build.directory}/dependency/presto-cassandra-${project.version} - plugin/cassandra - - - ${project.build.directory}/dependency/presto-pinot-${project.version} - plugin/pinot - - - ${project.build.directory}/dependency/presto-example-http-${project.version} - plugin/example-http - - - ${project.build.directory}/dependency/presto-hana-${project.version} - plugin/hana - - - ${project.build.directory}/dependency/presto-hive-hadoop2-${project.version} - plugin/hive-hadoop2 - - - ${project.build.directory}/dependency/presto-memory-${project.version} - plugin/memory - - - ${project.build.directory}/dependency/presto-blackhole-${project.version} - plugin/blackhole - - - ${project.build.directory}/dependency/presto-kafka-${project.version} - plugin/kafka - - - ${project.build.directory}/dependency/presto-kudu-${project.version} - plugin/kudu - - - ${project.build.directory}/dependency/presto-atop-${project.version} - plugin/atop - - - ${project.build.directory}/dependency/presto-ml-${project.version} - plugin/ml - - - ${project.build.directory}/dependency/presto-mysql-${project.version} - plugin/mysql - - - ${project.build.directory}/dependency/presto-singlestore-${project.version} - plugin/singlestore - - - ${project.build.directory}/dependency/presto-oracle-${project.version} - plugin/oracle - - - ${project.build.directory}/dependency/presto-postgresql-${project.version} - plugin/postgresql - - - ${project.build.directory}/dependency/presto-prometheus-${project.version} - plugin/prometheus - - - ${project.build.directory}/dependency/presto-redshift-${project.version} - plugin/redshift - - - ${project.build.directory}/dependency/presto-sqlserver-${project.version} - plugin/sqlserver - - - ${project.build.directory}/dependency/presto-redis-${project.version} - plugin/redis - - - ${project.build.directory}/dependency/presto-tpch-${project.version} - plugin/tpch - - - ${project.build.directory}/dependency/presto-tpcds-${project.version} - plugin/tpcds - - - ${project.build.directory}/dependency/presto-teradata-functions-${project.version} - plugin/teradata-functions - - - ${project.build.directory}/dependency/presto-mongodb-${project.version} - plugin/mongodb - - - ${project.build.directory}/dependency/presto-local-file-${project.version} - plugin/localfile - - - ${project.build.directory}/dependency/presto-accumulo-${project.version} - plugin/accumulo - - - ${project.build.directory}/dependency/presto-thrift-connector-${project.version} - plugin/presto-thrift - - - ${project.build.directory}/dependency/presto-elasticsearch-${project.version} - plugin/presto-elasticsearch - - - ${project.build.directory}/dependency/presto-druid-${project.version} - plugin/presto-druid - - - ${project.build.directory}/dependency/presto-bigquery-${project.version} - plugin/presto-bigquery - - - ${project.build.directory}/dependency/presto-iceberg-${project.version} - plugin/iceberg - - - ${project.build.directory}/dependency/presto-hive-function-namespace-${project.version} - plugin/hive-function-namespace - - - ${project.build.directory}/dependency/presto-delta-${project.version} - plugin/delta - - - ${project.build.directory}/dependency/presto-hudi-${project.version} - plugin/hudi - - - ${project.build.directory}/dependency/presto-clickhouse-${project.version} - plugin/clickhouse - - - diff --git a/presto-server/src/main/java/com/facebook/presto/Dummy.java b/presto-server/src/main/java/com/facebook/presto/Dummy.java deleted file mode 100644 index 363367c5c9f6..000000000000 --- a/presto-server/src/main/java/com/facebook/presto/Dummy.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto; - -/** - * This class exists to force the creation of a jar for the presto-server module. This is needed to deploy the presto-server module to nexus. - */ -public class Dummy -{ -} diff --git a/presto-server/src/main/provisio/presto.xml b/presto-server/src/main/provisio/presto.xml new file mode 100644 index 000000000000..4e82ac23b9a1 --- /dev/null +++ b/presto-server/src/main/provisio/presto.xml @@ -0,0 +1,274 @@ + + + + + + + + NOTICE + README.txt + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 240948137669b685b806a5b9018c58302221259f Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Thu, 19 Sep 2024 10:18:06 -0700 Subject: [PATCH 06/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index af2513b77104..48c662813101 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit af2513b771044923eb6b24179062de707957eff9 +Subproject commit 48c662813101c75851f126ce9ecb86669cd8b06a From 52081acf54c7e27a7b3a103a04a250b2e82b23c0 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Wed, 18 Sep 2024 15:37:30 -0400 Subject: [PATCH 07/86] Add missing article --- presto-docs/src/main/sphinx/language/types.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presto-docs/src/main/sphinx/language/types.rst b/presto-docs/src/main/sphinx/language/types.rst index d257efed4205..9a58f281c80e 100644 --- a/presto-docs/src/main/sphinx/language/types.rst +++ b/presto-docs/src/main/sphinx/language/types.rst @@ -249,7 +249,7 @@ Example: ``MAP(ARRAY['foo', 'bar'], ARRAY[1, 2])`` ^^^^^^^ A structure made up of named fields. The fields may be of any SQL type, and are -accessed with field reference operator ``.`` +accessed with the field reference operator ``.`` Example: ``CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE))`` @@ -447,4 +447,4 @@ The types support operations such as spatial measurements and relationship check crucial for geographic information systems (GIS) and other applications requiring spatial data manipulation. The geospatial types ensure data integrity and provide robust tools for complex spatial querying and analysis. -See :doc:`/functions/geospatial`. \ No newline at end of file +See :doc:`/functions/geospatial`. From bf5928e04a844ecc1594f6a4c6f174a7d31ee715 Mon Sep 17 00:00:00 2001 From: Timothy Meehan Date: Wed, 18 Sep 2024 16:23:07 -0400 Subject: [PATCH 08/86] Add presto-server to maven-checks --- .github/workflows/maven-checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven-checks.yml b/.github/workflows/maven-checks.yml index 9799de4d797e..d8a05fece1cd 100644 --- a/.github/workflows/maven-checks.yml +++ b/.github/workflows/maven-checks.yml @@ -44,6 +44,6 @@ jobs: - name: Maven Checks run: | export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}" - ./mvnw install -B -V -T 1C -DskipTests -Dmaven.javadoc.skip=true --no-transfer-progress -P ci -pl '!presto-test-coverage,!:presto-server,!:presto-docs' + ./mvnw install -B -V -T 1C -DskipTests -Dmaven.javadoc.skip=true --no-transfer-progress -P ci -pl '!presto-test-coverage,!:presto-docs' - name: Clean Maven Output run: ./mvnw clean -pl '!:presto-server,!:presto-cli,!presto-test-coverage' From 1a4339ef9b38ab2ddd3b5fefc542c654b7e73a42 Mon Sep 17 00:00:00 2001 From: Tim Meehan Date: Wed, 11 Sep 2024 16:29:41 -0400 Subject: [PATCH 09/86] Fix case to map optimization --- .../presto/sql/planner/iterative/rule/RewriteCaseToMap.java | 4 ++++ .../java/com/facebook/presto/tests/AbstractTestQueries.java | 1 + 2 files changed, 5 insertions(+) diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RewriteCaseToMap.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RewriteCaseToMap.java index 23c4e149cace..30e4cedcba06 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RewriteCaseToMap.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RewriteCaseToMap.java @@ -231,6 +231,10 @@ else if (!curCheck.equals(checkExpr)) { } } + if (checkExpr == null) { + return node; + } + // Here we have all values! RowExpression mapLookup = makeMapAndAccess(whens, thens, checkExpr); diff --git a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java index 8e6d34006c2c..77777da9b9d7 100644 --- a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java +++ b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java @@ -7419,6 +7419,7 @@ public void testCaseToMapOptimization() assertQuery("select x, case x when 1 then 1 when 2 then 2 else 3 end from (select x from (values 1, 2, 3, 4) t(x))"); assertQuery("select x, case when x=1 then 1 when x=2 then 2 else 3 end from (select x from (values 1, 2, 3, 4) t(x))"); assertQuery("select x, case when x=1 then 1 when x in (2, 3) then 2 else 3 end from (select x from (values 1, 2, 3, 4) t(x))"); + assertQuery("select case (case true when true then true else coalesce(false = any (values true), true) end) when false then true end limit 1"); // disable the feature and test to make sure it doesn't fire From 79a5802a336404110c86f144e24699dd4b1e1134 Mon Sep 17 00:00:00 2001 From: Jialiang Tan Date: Fri, 20 Sep 2024 10:44:20 -0700 Subject: [PATCH 10/86] [native] Remove dependencies of old arbitrator config --- presto-native-execution/presto_cpp/main/PrestoServer.cpp | 2 +- .../presto_cpp/main/tests/TaskManagerTest.cpp | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 6716546e8324..3c919b744892 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -739,7 +739,7 @@ void PrestoServer::initializeThreadPools() { #ifdef __linux__ threadFactory = std::make_shared("Driver"); #else - VELOX_FAIL("Batch scheduling policy can only be enabled on Linux") + VELOX_FAIL("Batch scheduling policy can only be enabled on Linux"); #endif } else { threadFactory = std::make_shared("Driver"); diff --git a/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp b/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp index 6602ad104510..928ed2d2ce9e 100644 --- a/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp +++ b/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp @@ -198,7 +198,10 @@ class TaskManagerTest : public testing::Test { velox::memory::MemoryManagerOptions options; options.allocatorCapacity = 8L << 30; options.arbitratorCapacity = 6L << 30; - options.memoryPoolInitCapacity = 512 << 20; + options.extraArbitratorConfigs = { + {std::string(velox::memory::SharedArbitrator::ExtraConfig:: + kMemoryPoolInitialCapacity), + "512MB"}}; options.arbitratorKind = "SHARED"; options.checkUsageLeak = true; options.arbitrationStateCheckCb = memoryArbitrationStateCheck; From 33ff9521dbd25e2b7b1181cf01391cb899857d65 Mon Sep 17 00:00:00 2001 From: Matt Calder Date: Sat, 14 Sep 2024 17:14:38 -0700 Subject: [PATCH 11/86] initial commit for ip_prefix_subnets reviewer feedback --- presto-docs/src/main/sphinx/functions/ip.rst | 8 +++ .../operator/scalar/IpPrefixFunctions.java | 67 +++++++++++++++++++ .../scalar/TestIpPrefixFunctions.java | 33 +++++++++ 3 files changed, 108 insertions(+) diff --git a/presto-docs/src/main/sphinx/functions/ip.rst b/presto-docs/src/main/sphinx/functions/ip.rst index f5e637afb50b..cc685be916dd 100644 --- a/presto-docs/src/main/sphinx/functions/ip.rst +++ b/presto-docs/src/main/sphinx/functions/ip.rst @@ -70,3 +70,11 @@ IP Functions SELECT is_private_ip(IPADDRESS '157.240.200.99'); -- false SELECT is_private_ip(IPADDRESS '2a03:2880:f031:12:face:b00c:0:2'); -- false +.. function:: ip_prefix_subnets(ip_prefix, prefix_length) -> array(ip_prefix) + + Returns the subnets of ``ip_prefix`` of size ``prefix_length``. ``prefix_length`` must be valid ([0, 32] for IPv4 + and [0, 128] for IPv6) or the query will fail and raise an error. An empty array is returned if ``prefix_length`` + is shorter (that is, less specific) than ``ip_prefix``. :: + + SELECT IP_PREFIX_SUBNETS(IPPREFIX '192.168.1.0/24', 25); -- [{192.168.1.0/25}, {192.168.1.128/25}] + SELECT IP_PREFIX_SUBNETS(IPPREFIX '2a03:2880:c000::/34', 36); -- [{2a03:2880:c000::/36}, {2a03:2880:d000::/36}, {2a03:2880:e000::/36}, {2a03:2880:f000::/36}] diff --git a/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java b/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java index b47e8f48ce04..5f7f9a27d19f 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java @@ -52,6 +52,8 @@ public final class IpPrefixFunctions { private static final BigInteger TWO = BigInteger.valueOf(2); + private static final Block EMPTY_BLOCK = IPPREFIX.createBlockBuilder(null, 0).build(); + /** * Our definitions for what IANA considers not "globally reachable" are taken from the docs at * https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml and @@ -290,6 +292,71 @@ public static boolean isPrivateIpAddress(@SqlType(StandardTypes.IPADDRESS) Slice return false; } + @Description("Split the input prefix into subnets the size of the new prefix length.") + @ScalarFunction("ip_prefix_subnets") + @SqlType("array(IPPREFIX)") + public static Block ipPrefixSubnets(@SqlType(StandardTypes.IPPREFIX) Slice prefix, @SqlType(StandardTypes.BIGINT) long newPrefixLength) + { + boolean inputIsIpV4 = isIpv4(prefix); + + if (newPrefixLength < 0 || (inputIsIpV4 && newPrefixLength > 32) || (!inputIsIpV4 && newPrefixLength > 128)) { + throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid prefix length for IPv" + (inputIsIpV4 ? "4" : "6") + ": " + newPrefixLength); + } + + int inputPrefixLength = getPrefixLength(prefix); + // An IP prefix is a 'network', or group of contiguous IP addresses. The common format for describing IP prefixes is + // uses 2 parts separated by a '/': (1) the IP address part and the (2) prefix length part (also called subnet size or CIDR). + // For example, in 9.255.255.0/24, 9.255.255.0 is the IP address part and 24 is the prefix length. + // The prefix length describes how many IP addresses the prefix contains in terms of the leading number of bits required. A higher number of bits + // means smaller number of IP addresses. Subnets inherently mean smaller groups of IP addresses. + // We can only disaggregate a prefix if the prefix length is the same length or longer (more-specific) than the length of the input prefix. + // E.g., if the input prefix is 9.255.255.0/24, the prefix length can be /24, /25, /26, etc... but not 23 or larger value than 24. + + int newPrefixCount = 0; // if inputPrefixLength > newPrefixLength, there are no new prefixes and we will return an empty array. + if (inputPrefixLength <= newPrefixLength) { + // Next, count how many new prefixes we will generate. In general, every difference in prefix length doubles the number new prefixes. + // For example if we start with 9.255.255.0/24, and want to split into /25s, we would have 2 new prefixes. If we wanted to split into /26s, + // we would have 4 new prefixes, and /27 would have 8 prefixes etc.... + newPrefixCount = 1 << (newPrefixLength - inputPrefixLength); // 2^N + } + + if (newPrefixCount == 0) { + return EMPTY_BLOCK; + } + + BlockBuilder blockBuilder = IPPREFIX.createBlockBuilder(null, newPrefixCount); + + if (newPrefixCount == 1) { + IPPREFIX.writeSlice(blockBuilder, prefix); // just return the original prefix in an array + return blockBuilder.build(); // returns empty or single entry + } + + int ipVersionMaxBits = inputIsIpV4 ? 32 : 128; + BigInteger newPrefixIpCount = TWO.pow(ipVersionMaxBits - (int) newPrefixLength); + + Slice startingIpAddressAsSlice = ipSubnetMin(prefix); + BigInteger currentIpAddress = toBigInteger(startingIpAddressAsSlice); + + try { + for (int i = 0; i < newPrefixCount; i++) { + InetAddress asInetAddress = bigIntegerToIpAddress(currentIpAddress); + Slice ipPrefixAsSlice = castFromVarcharToIpPrefix(utf8Slice(InetAddresses.toAddrString(asInetAddress) + "/" + newPrefixLength)); + IPPREFIX.writeSlice(blockBuilder, ipPrefixAsSlice); + currentIpAddress = currentIpAddress.add(newPrefixIpCount); // increment to start of next new prefix + } + } + catch (UnknownHostException ex) { + throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unable to convert " + currentIpAddress + " to IP prefix", ex); + } + + return blockBuilder.build(); + } + + private static int getPrefixLength(Slice ipPrefix) + { + return ipPrefix.getByte(IPPREFIX.getFixedSize() - 1) & 0xFF; + } + private static List generateMinIpPrefixes(BigInteger firstIpAddress, BigInteger lastIpAddress, int ipVersionMaxBits) { List ipPrefixSlices = new ArrayList<>(); diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java index a895fdd7e1c5..21f2b615887c 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java @@ -287,4 +287,37 @@ public void testIsPrivateIpNull() { assertFunction("IS_PRIVATE_IP(NULL)", BOOLEAN, null); } + + @Test + public void testIpPrefixSubnets() + { + assertFunction("IP_PREFIX_SUBNETS(IPPREFIX '192.168.1.0/24', 25)", new ArrayType(IPPREFIX), ImmutableList.of("192.168.1.0/25", "192.168.1.128/25")); + assertFunction("IP_PREFIX_SUBNETS(IPPREFIX '192.168.0.0/24', 26)", new ArrayType(IPPREFIX), ImmutableList.of("192.168.0.0/26", "192.168.0.64/26", "192.168.0.128/26", "192.168.0.192/26")); + assertFunction("IP_PREFIX_SUBNETS(IPPREFIX '2A03:2880:C000::/34', 37)", + new ArrayType(IPPREFIX), + ImmutableList.of("2a03:2880:c000::/37", "2a03:2880:c800::/37", "2a03:2880:d000::/37", "2a03:2880:d800::/37", "2a03:2880:e000::/37", "2a03:2880:e800::/37", "2a03:2880:f000::/37", "2a03:2880:f800::/37")); + } + + @Test + public void testIpPrefixSubnetsReturnSelf() + { + assertFunction("IP_PREFIX_SUBNETS(IPPREFIX '192.168.1.0/24', 24)", new ArrayType(IPPREFIX), ImmutableList.of("192.168.1.0/24")); + assertFunction("IP_PREFIX_SUBNETS(IPPREFIX '2804:431:b000::/38', 38)", new ArrayType(IPPREFIX), ImmutableList.of("2804:431:b000::/38")); + } + + @Test + public void testIpPrefixSubnetsNewPrefixLengthLongerReturnsEmpty() + { + assertFunction("IP_PREFIX_SUBNETS(IPPREFIX '192.168.0.0/24', 23)", new ArrayType(IPPREFIX), ImmutableList.of()); + assertFunction("IP_PREFIX_SUBNETS(IPPREFIX '64:ff9b::17/64', 48)", new ArrayType(IPPREFIX), ImmutableList.of()); + } + + @Test + public void testIpPrefixSubnetsInvalidPrefixLengths() + { + assertInvalidFunction("IP_PREFIX_SUBNETS(IPPREFIX '192.168.0.0/24', -1)", "Invalid prefix length for IPv4: -1"); + assertInvalidFunction("IP_PREFIX_SUBNETS(IPPREFIX '192.168.0.0/24', 33)", "Invalid prefix length for IPv4: 33"); + assertInvalidFunction("IP_PREFIX_SUBNETS(IPPREFIX '64:ff9b::17/64', -1)", "Invalid prefix length for IPv6: -1"); + assertInvalidFunction("IP_PREFIX_SUBNETS(IPPREFIX '64:ff9b::17/64', 129)", "Invalid prefix length for IPv6: 129"); + } } From 95d24399ac931c236986acbe24285849ca557f22 Mon Sep 17 00:00:00 2001 From: Anant Aneja <1797669+aaneja@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:26:15 +0530 Subject: [PATCH 12/86] Fix EffectivePredicateExtractor to not pull up expressions only referring to constants --- .../planner/EffectivePredicateExtractor.java | 9 ++++++++- .../TestEffectivePredicateExtractor.java | 20 +++++++++++++++++++ .../presto/tests/AbstractTestQueries.java | 11 ++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/EffectivePredicateExtractor.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/EffectivePredicateExtractor.java index 9e439fb863c4..a59b64b09f85 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/EffectivePredicateExtractor.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/EffectivePredicateExtractor.java @@ -60,6 +60,7 @@ import static com.facebook.presto.common.function.OperatorType.EQUAL; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.expressions.LogicalRowExpressions.FALSE_CONSTANT; import static com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT; import static com.facebook.presto.expressions.LogicalRowExpressions.extractConjuncts; import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.IS_NULL; @@ -418,9 +419,10 @@ private RowExpression pullExpressionThroughVariables(RowExpression expression, C for (RowExpression conjunct : new EqualityInference.Builder(functionManger).nonInferableConjuncts(expression)) { if (determinismEvaluator.isDeterministic(conjunct)) { RowExpression rewritten = equalityInference.rewriteExpression(conjunct, in(variables)); - if (rewritten != null) { + if (rewritten != null && (hasVariableReferences(rewritten) || rewritten.equals(FALSE_CONSTANT))) { effectiveConjuncts.add(rewritten); } + // If equality inference has reduced the predicate to an expression referring to only constants, it does not make sense to pull this predicate up } } @@ -428,5 +430,10 @@ private RowExpression pullExpressionThroughVariables(RowExpression expression, C return logicalRowExpressions.combineConjuncts(effectiveConjuncts.build()); } + + private static boolean hasVariableReferences(RowExpression rowExpression) + { + return !VariablesExtractor.extractUnique(rowExpression).isEmpty(); + } } } diff --git a/presto-main/src/test/java/com/facebook/presto/sql/planner/TestEffectivePredicateExtractor.java b/presto-main/src/test/java/com/facebook/presto/sql/planner/TestEffectivePredicateExtractor.java index a6d672435af3..4e76ddd5d271 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/planner/TestEffectivePredicateExtractor.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/planner/TestEffectivePredicateExtractor.java @@ -238,6 +238,26 @@ public void testProject() equals(DV, EV))); } + @Test + public void testProjectOverFilterWithNoReferencedAssignments() + { + PlanNode node = new ProjectNode(newId(), + filter(baseTableScan, + and( + equals(call("mod", + metadata.getFunctionAndTypeManager().lookupFunction("mod", fromTypes(BIGINT, BIGINT)), + BIGINT, + ImmutableList.of(CV, bigintLiteral(5L))), bigintLiteral(-1L)), + equals(CV, bigintLiteral(10L)))), + assignment(DV, AV)); + + RowExpression effectivePredicate = effectivePredicateExtractor.extract(node); + + // The filter predicate is reduced to `CV = 10 AND mod(10,5) = -1` + // Since we have no references to `CV` in the assignments however, neither of these conjuncts is pulled up through the Project + assertEquals(effectivePredicate, TRUE_CONSTANT); + } + @Test public void testTopN() { diff --git a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java index 77777da9b9d7..1239ed45bb35 100644 --- a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java +++ b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java @@ -1668,6 +1668,17 @@ public void testInUncorrelatedSubquery() "SELECT TRUE, 2"); } + @Test + public void testUncorrelatedSubqueryWithEmptyResult() + { + assertQuery( + "SELECT regionkey, (select name from nation where false) from region", + "SELECT regionkey, NULL from region"); + assertQuery( + "SELECT regionkey, (select name from nation where nationkey = 5 and mod(nationkey,5) = 1) from region", + "SELECT regionkey, NULL from region"); + } + @Test public void testChecksum() { From 8b6b9db32ad78afe0a616c3eec3ff7c509292e97 Mon Sep 17 00:00:00 2001 From: Reetika Agrawal Date: Mon, 23 Sep 2024 11:38:00 +0530 Subject: [PATCH 13/86] Update sqlserver docker image version --- .../conf/docker/singlenode-sqlserver/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-product-tests/conf/docker/singlenode-sqlserver/docker-compose.yml b/presto-product-tests/conf/docker/singlenode-sqlserver/docker-compose.yml index 15e220a77c2f..aec9eaef896f 100644 --- a/presto-product-tests/conf/docker/singlenode-sqlserver/docker-compose.yml +++ b/presto-product-tests/conf/docker/singlenode-sqlserver/docker-compose.yml @@ -2,7 +2,7 @@ services: sqlserver: hostname: sqlserver - image: 'mcr.microsoft.com/mssql/server:2017-CU13' + image: 'mcr.microsoft.com/mssql/server:2022-latest' ports: - '1433:1433' environment: From 30d6a8d116d5f51da26924603205e95ba8a87593 Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Sun, 22 Sep 2024 22:17:21 -0700 Subject: [PATCH 14/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 48c662813101..4e45bc51fc16 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 48c662813101c75851f126ce9ecb86669cd8b06a +Subproject commit 4e45bc51fc164531370abcd9f28201b694623cfe From 649b45a0f40b4cd8a31fcfef91a06a5d3b5303a1 Mon Sep 17 00:00:00 2001 From: Steve Burnett Date: Tue, 10 Sep 2024 17:19:51 -0400 Subject: [PATCH 15/86] Fix formatting in functions/json.rst --- .../src/main/sphinx/functions/json.rst | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/presto-docs/src/main/sphinx/functions/json.rst b/presto-docs/src/main/sphinx/functions/json.rst index ac640be44d38..026e899e1f79 100644 --- a/presto-docs/src/main/sphinx/functions/json.rst +++ b/presto-docs/src/main/sphinx/functions/json.rst @@ -5,25 +5,25 @@ JSON Functions and Operators Cast to JSON ------------ - Casting from ``BOOLEAN``, ``TINYINT``, ``SMALLINT``, ``INTEGER``, - ``BIGINT``, ``REAL``, ``DOUBLE`` or ``VARCHAR`` is supported. - Casting from ``ARRAY``, ``MAP`` or ``ROW`` is supported when the element type of - the array is one of the supported types, or when the key type of the map - is ``VARCHAR`` and value type of the map is one of the supported types, - or when every field type of the row is one of the supported types. - Behaviors of the casts are shown with the examples below:: - - SELECT CAST(NULL AS JSON); -- NULL - SELECT CAST(1 AS JSON); -- JSON '1' - SELECT CAST(9223372036854775807 AS JSON); -- JSON '9223372036854775807' - SELECT CAST('abc' AS JSON); -- JSON '"abc"' - SELECT CAST(true AS JSON); -- JSON 'true' - SELECT CAST(1.234 AS JSON); -- JSON '1.234' - SELECT CAST(ARRAY[1, 23, 456] AS JSON); -- JSON '[1,23,456]' - SELECT CAST(ARRAY[1, NULL, 456] AS JSON); -- JSON '[1,null,456]' - SELECT CAST(ARRAY[ARRAY[1, 23], ARRAY[456]] AS JSON); -- JSON '[[1,23],[456]]' - SELECT CAST(MAP_FROM_ENTRIES(ARRAY[('k1', 1), ('k2', 23), ('k3', 456)]) AS JSON); -- JSON '{"k1":1,"k2":23,"k3":456}' - SELECT CAST(CAST(ROW(123, 'abc', true) AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)) AS JSON); -- JSON '[123,"abc",true]' +Casting from ``BOOLEAN``, ``TINYINT``, ``SMALLINT``, ``INTEGER``, +``BIGINT``, ``REAL``, ``DOUBLE`` or ``VARCHAR`` is supported. +Casting from ``ARRAY``, ``MAP`` or ``ROW`` is supported when the element type of +the array is one of the supported types, or when the key type of the map +is ``VARCHAR`` and value type of the map is one of the supported types, +or when every field type of the row is one of the supported types. +Behaviors of the casts are shown with the examples below:: + + SELECT CAST(NULL AS JSON); -- NULL + SELECT CAST(1 AS JSON); -- JSON '1' + SELECT CAST(9223372036854775807 AS JSON); -- JSON '9223372036854775807' + SELECT CAST('abc' AS JSON); -- JSON '"abc"' + SELECT CAST(true AS JSON); -- JSON 'true' + SELECT CAST(1.234 AS JSON); -- JSON '1.234' + SELECT CAST(ARRAY[1, 23, 456] AS JSON); -- JSON '[1,23,456]' + SELECT CAST(ARRAY[1, NULL, 456] AS JSON); -- JSON '[1,null,456]' + SELECT CAST(ARRAY[ARRAY[1, 23], ARRAY[456]] AS JSON); -- JSON '[[1,23],[456]]' + SELECT CAST(MAP_FROM_ENTRIES(ARRAY[('k1', 1), ('k2', 23), ('k3', 456)]) AS JSON); -- JSON '{"k1":1,"k2":23,"k3":456}' + SELECT CAST(CAST(ROW(123, 'abc', true) AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)) AS JSON); -- JSON '[123,"abc",true]' .. note:: @@ -55,25 +55,25 @@ Cast to JSON Cast from JSON -------------- - Casting to ``BOOLEAN``, ``TINYINT``, ``SMALLINT``, ``INTEGER``, - ``BIGINT``, ``REAL``, ``DOUBLE`` or ``VARCHAR`` is supported. - Casting to ``ARRAY`` and ``MAP`` is supported when the element type of - the array is one of the supported types, or when the key type of the map - is ``VARCHAR`` and value type of the map is one of the supported types. - Behaviors of the casts are shown with the examples below:: - - SELECT CAST(JSON 'null' AS VARCHAR); -- NULL - SELECT CAST(JSON '1' AS INTEGER); -- 1 - SELECT CAST(JSON '9223372036854775807' AS BIGINT); -- 9223372036854775807 - SELECT CAST(JSON '"abc"' AS VARCHAR); -- abc - SELECT CAST(JSON 'true' AS BOOLEAN); -- true - SELECT CAST(JSON '1.234' AS DOUBLE); -- 1.234 - SELECT CAST(JSON '[1,23,456]' AS ARRAY(INTEGER)); -- [1, 23, 456] - SELECT CAST(JSON '[1,null,456]' AS ARRAY(INTEGER)); -- [1, NULL, 456] - SELECT CAST(JSON '[[1,23],[456]]' AS ARRAY(ARRAY(INTEGER))); -- [[1, 23], [456]] - SELECT CAST(JSON '{"k1":1,"k2":23,"k3":456}' AS MAP(VARCHAR, INTEGER)); -- {k1=1, k2=23, k3=456} - SELECT CAST(JSON '{"v1":123,"v2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true} - SELECT CAST(JSON '[123,"abc",true]' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {value1=123, value2=abc, value3=true} +Casting to ``BOOLEAN``, ``TINYINT``, ``SMALLINT``, ``INTEGER``, +``BIGINT``, ``REAL``, ``DOUBLE`` or ``VARCHAR`` is supported. +Casting to ``ARRAY`` and ``MAP`` is supported when the element type of +the array is one of the supported types, or when the key type of the map +is ``VARCHAR`` and value type of the map is one of the supported types. +Behaviors of the casts are shown with the examples below:: + + SELECT CAST(JSON 'null' AS VARCHAR); -- NULL + SELECT CAST(JSON '1' AS INTEGER); -- 1 + SELECT CAST(JSON '9223372036854775807' AS BIGINT); -- 9223372036854775807 + SELECT CAST(JSON '"abc"' AS VARCHAR); -- abc + SELECT CAST(JSON 'true' AS BOOLEAN); -- true + SELECT CAST(JSON '1.234' AS DOUBLE); -- 1.234 + SELECT CAST(JSON '[1,23,456]' AS ARRAY(INTEGER)); -- [1, 23, 456] + SELECT CAST(JSON '[1,null,456]' AS ARRAY(INTEGER)); -- [1, NULL, 456] + SELECT CAST(JSON '[[1,23],[456]]' AS ARRAY(ARRAY(INTEGER))); -- [[1, 23], [456]] + SELECT CAST(JSON '{"k1":1,"k2":23,"k3":456}' AS MAP(VARCHAR, INTEGER)); -- {k1=1, k2=23, k3=456} + SELECT CAST(JSON '{"v1":123,"v2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true} + SELECT CAST(JSON '[123,"abc",true]' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true} .. note:: From 87e2beb53a0e55c0974482185bcd74bfb148572a Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Mon, 23 Sep 2024 09:03:08 -0400 Subject: [PATCH 16/86] Remove test-jar dependency --- presto-tests/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-tests/pom.xml b/presto-tests/pom.xml index bb806244f302..8fbb3a863f8a 100644 --- a/presto-tests/pom.xml +++ b/presto-tests/pom.xml @@ -261,13 +261,13 @@ com.facebook.presto presto-resource-group-managers + test-jar test com.facebook.presto presto-resource-group-managers - test-jar test From 2f4ea64d3bf3fa8c89b3e805d5d0987ac6d971c2 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Thu, 30 May 2024 10:58:49 -0400 Subject: [PATCH 17/86] Use semantic asserts --- .../com/facebook/presto/hive/AbstractTestHiveClient.java | 6 +++--- .../com/facebook/presto/hive/TestHiveLogicalPlanner.java | 3 ++- .../presto/hive/TestHiveMaterializedViewLogicalPlanner.java | 4 ++-- .../facebook/presto/hive/TestHiveNativeLogicalPlanner.java | 3 ++- .../com/facebook/presto/hive/TestHiveSplitScheduling.java | 3 --- .../java/com/facebook/presto/hive/TestHiveSplitSource.java | 2 +- .../java/com/facebook/presto/hive/TestJsonHiveHandles.java | 2 +- .../hive/metastore/glue/TestHiveClientGlueMetastore.java | 5 +++-- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java index 6d36b4863cf3..c4f1f468b544 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java @@ -4308,7 +4308,7 @@ private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName // verify all temp files start with the unique prefix stagingPathRoot = getStagingPathRoot(insertTableHandle); Set tempFiles = listAllDataFiles(context, stagingPathRoot); - assertTrue(!tempFiles.isEmpty()); + assertFalse(tempFiles.isEmpty()); for (String filePath : tempFiles) { assertTrue(new Path(filePath).getName().startsWith(session.getQueryId())); } @@ -4535,7 +4535,7 @@ private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTab insertTableHandle.getLocationHandle().getTargetPath().toString(), false); Set tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle)); - assertTrue(!tempFiles.isEmpty()); + assertFalse(tempFiles.isEmpty()); for (String filePath : tempFiles) { assertTrue(new Path(filePath).getName().startsWith(session.getQueryId())); } @@ -4663,7 +4663,7 @@ private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, Sche insertTableHandle.getLocationHandle().getTargetPath().toString(), false); Set tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle)); - assertTrue(!tempFiles.isEmpty()); + assertFalse(tempFiles.isEmpty()); for (String filePath : tempFiles) { assertTrue(new Path(filePath).getName().startsWith(session.getQueryId())); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java index c6499696293b..3388022ded00 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java @@ -148,6 +148,7 @@ import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotSame; import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) @@ -2126,7 +2127,7 @@ private void assertNoAggregatedColumns(Plan plan, String tableName) for (ColumnHandle columnHandle : tableScan.getAssignments().values()) { assertTrue(columnHandle instanceof HiveColumnHandle); HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) columnHandle; - assertFalse(hiveColumnHandle.getColumnType() == HiveColumnHandle.ColumnType.AGGREGATED); + assertNotSame(hiveColumnHandle.getColumnType(), HiveColumnHandle.ColumnType.AGGREGATED); assertFalse(hiveColumnHandle.getPartialAggregation().isPresent()); } } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMaterializedViewLogicalPlanner.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMaterializedViewLogicalPlanner.java index 5d423606df0e..957afcd0297b 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMaterializedViewLogicalPlanner.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMaterializedViewLogicalPlanner.java @@ -87,7 +87,7 @@ import static java.lang.String.format; import static java.util.Collections.emptyList; import static java.util.stream.Collectors.joining; -import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) @@ -812,7 +812,7 @@ public void testMaterializedViewSampledRelations() // from sampled table and full table String viewHalfQuery = format("SELECT * from %s ORDER BY nationkey", viewHalf); MaterializedResult viewHalfTable = computeActual(viewHalfQuery); - assertFalse(viewFullTable.equals(viewHalfTable)); + assertNotEquals(viewFullTable, viewHalfTable); } finally { queryRunner.execute("DROP MATERIALIZED VIEW IF EXISTS " + viewFull); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveNativeLogicalPlanner.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveNativeLogicalPlanner.java index 1c15a14cc420..3c74dca08ce5 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveNativeLogicalPlanner.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveNativeLogicalPlanner.java @@ -33,6 +33,7 @@ import static com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher.searchFrom; import static io.airlift.tpch.TpchTable.ORDERS; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotSame; import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) @@ -74,7 +75,7 @@ private void assertNoAggregatedColumns(Plan plan, String tableName) for (ColumnHandle columnHandle : tableScan.getAssignments().values()) { assertTrue(columnHandle instanceof HiveColumnHandle); HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) columnHandle; - assertFalse(hiveColumnHandle.getColumnType() == HiveColumnHandle.ColumnType.AGGREGATED); + assertNotSame(hiveColumnHandle.getColumnType(), HiveColumnHandle.ColumnType.AGGREGATED); assertFalse(hiveColumnHandle.getPartialAggregation().isPresent()); } } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitScheduling.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitScheduling.java index 8a95cc41e951..ee95390f1993 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitScheduling.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitScheduling.java @@ -65,9 +65,6 @@ public void testDynamicSplits() assertTrue(numberOfSplitsWithDynamicSplitScheduling < numberOfSplitsWithoutDynamicSplitScheduling, "Expected less splits with dynamic split scheduling"); }); } - catch (Exception e) { - assertTrue(false, e.getMessage()); - } finally { getQueryRunner().execute("DROP TABLE IF EXISTS test_orders"); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java index 65ae49a577e7..7411ba49428f 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java @@ -327,7 +327,7 @@ public void testReaderWaitsForSplits() // sleep for a bit, and assure the thread is blocked MILLISECONDS.sleep(200); - assertTrue(!splits.isDone()); + assertFalse(splits.isDone()); // add a split hiveSplitSource.addToQueue(new TestSplit(33)); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestJsonHiveHandles.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestJsonHiveHandles.java index 7538783a47e5..97097df4914e 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestJsonHiveHandles.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestJsonHiveHandles.java @@ -98,7 +98,7 @@ public void testColumnHandleDeserialize() assertEquals(columnHandle.getTypeSignature(), DOUBLE.getTypeSignature()); assertEquals(columnHandle.getHiveType(), HiveType.HIVE_FLOAT); assertEquals(columnHandle.getHiveColumnIndex(), -1); - assertEquals(columnHandle.isPartitionKey(), true); + assertTrue(columnHandle.isPartitionKey()); } private void testJsonEquals(String json, Map expectedMap) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java index a2454f17859a..94d2b15b4a11 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java @@ -95,6 +95,7 @@ import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; public class TestHiveClientGlueMetastore @@ -356,7 +357,7 @@ public void testGetPartitionsWithFilterUsingReservedKeywordsAsColumnName() tableName.getTableName(), predicates); - assertTrue(!partitionNames.isEmpty()); + assertFalse(partitionNames.isEmpty()); assertEquals(partitionNames, ImmutableList.of("key=value2/int_partition=2")); // KEY is a reserved keyword in the grammar of the SQL parser used internally by Glue API @@ -370,7 +371,7 @@ public void testGetPartitionsWithFilterUsingReservedKeywordsAsColumnName() tableName.getSchemaName(), tableName.getTableName(), predicates); - assertTrue(!partitionNames.isEmpty()); + assertFalse(partitionNames.isEmpty()); assertEquals(partitionNames, ImmutableList.of("key=value1/int_partition=1", "key=value2/int_partition=2")); } finally { From 98ff012587bc57bf87aad5a67adaea2dc98e5bdb Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Wed, 18 Sep 2024 14:57:21 -0400 Subject: [PATCH 18/86] Avoid confusion between ErrorCode and ErrorCodeSupplier --- .../java/com/facebook/presto/spi/PrestoException.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/PrestoException.java b/presto-spi/src/main/java/com/facebook/presto/spi/PrestoException.java index ed7caa318f5c..628782a3bc71 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/PrestoException.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/PrestoException.java @@ -20,14 +20,14 @@ public class PrestoException { private final ErrorCode errorCode; - public PrestoException(ErrorCodeSupplier errorCode, String message) + public PrestoException(ErrorCodeSupplier errorCodeSupplier, String message) { - this(errorCode, message, null); + this(errorCodeSupplier, message, null); } - public PrestoException(ErrorCodeSupplier errorCode, Throwable throwable) + public PrestoException(ErrorCodeSupplier errorCodeSupplier, Throwable throwable) { - this(errorCode, null, throwable); + this(errorCodeSupplier, null, throwable); } public PrestoException(ErrorCodeSupplier errorCodeSupplier, String message, Throwable cause) From 7d9a0b0ae10a9e78b38d458cb0fafa04088b70d1 Mon Sep 17 00:00:00 2001 From: wangd Date: Tue, 24 Sep 2024 21:45:09 +0800 Subject: [PATCH 19/86] Remove redundant check for short decimal type in parquet decoders --- .../presto/parquet/batchreader/decoders/Decoders.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/Decoders.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/Decoders.java index e698211564b4..f2fc89c30009 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/Decoders.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/Decoders.java @@ -170,7 +170,7 @@ private static ValuesDecoder createValuesDecoder(ColumnDescriptor columnDescript switch (type) { case INT32: case FLOAT: { - if (isDecimalType(columnDescriptor) && isShortDecimalType(columnDescriptor)) { + if (isShortDecimalType(columnDescriptor)) { return new Int32ShortDecimalRLEDictionaryValuesDecoder(bitWidth, inputStream, (IntegerDictionary) dictionary); } return new Int32RLEDictionaryValuesDecoder(bitWidth, inputStream, (IntegerDictionary) dictionary); @@ -179,7 +179,7 @@ private static ValuesDecoder createValuesDecoder(ColumnDescriptor columnDescript if (isTimeStampMicrosType(columnDescriptor) || isTimeMicrosType(columnDescriptor)) { return new Int64TimeAndTimestampMicrosRLEDictionaryValuesDecoder(bitWidth, inputStream, (LongDictionary) dictionary); } - if (isDecimalType(columnDescriptor) && isShortDecimalType(columnDescriptor)) { + if (isShortDecimalType(columnDescriptor)) { return new Int64RLEDictionaryValuesDecoder(bitWidth, inputStream, (LongDictionary) dictionary); } } From 9bbfbaf869107f36ae3b62aa9a89d1a37433b797 Mon Sep 17 00:00:00 2001 From: Ge Gao Date: Sun, 25 Aug 2024 23:29:12 -0700 Subject: [PATCH 20/86] [native] Add native e2e tests for unicode in JSON --- .../AbstractTestNativeGeneralQueries.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java index 7a1a409c76f8..090b7375b320 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java @@ -25,6 +25,8 @@ import com.google.common.collect.ImmutableMap; import org.testng.annotations.Test; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -1767,6 +1769,54 @@ public void testCorrelatedExistsSubqueries() "WHERE (SELECT * FROM (SELECT EXISTS(SELECT 1 WHERE o.orderkey > 10 OR o.orderkey != 3)))"); } + @Test + public void testUnicodeInJson() + { + // Test casting to JSON returning the same results for all unicode characters in the entire range. + List unicodeRanges = new ArrayList() + {{ + add(new int[] {0, 0x7F}); + add(new int[] {0x80, 0xD7FF}); + add(new int[] {0xE000, 0xFFFF}); + }}; + for (int start = 0x10000; start < 0x110000; ) { + int end = start + 0x10000; + unicodeRanges.add(new int[] {start, end - 1}); + start = end; + } + List unicodeStrings = unicodeRanges.stream().map(range -> { + StringBuilder unicodeString = new StringBuilder(); + for (int u = range[0]; u <= range[1]; u++) { + String hex = Integer.toHexString(u); + switch (hex.length()) { + case 1: + unicodeString.append("\\000"); + break; + case 2: + unicodeString.append("\\00"); + break; + case 3: + unicodeString.append("\\0"); + break; + case 4: + unicodeString.append("\\"); + break; + case 5: + unicodeString.append("\\+0"); + break; + default: + unicodeString.append("\\+"); + } + unicodeString.append(hex); + } + return unicodeString.toString(); + }).collect(ImmutableList.toImmutableList()); + + for (String unicodeString : unicodeStrings) { + assertQuery(String.format("SELECT CAST(a as JSON) FROM ( VALUES(U&'%s') ) t(a)", unicodeString)); + } + } + private void assertQueryResultCount(String sql, int expectedResultCount) { assertEquals(getQueryRunner().execute(sql).getRowCount(), expectedResultCount); From 6f4d3cca45560c63a7144af8fe47aa9a039363db Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Tue, 24 Sep 2024 13:19:46 -0700 Subject: [PATCH 21/86] [native] Add native_selective_nimble_reader_enabled session property This is a hidden temporary session property to help controlled rollout of selective Nimble reader. Will be removed after the selective Nimble reader is fully rolled out. --- presto-docs/src/main/sphinx/presto_cpp/features.rst | 10 ++++++++++ .../com/facebook/presto/SystemSessionProperties.java | 8 ++++++++ .../presto_cpp/main/SessionProperties.cpp | 10 ++++++++++ .../presto_cpp/main/SessionProperties.h | 6 ++++++ .../presto_cpp/main/tests/QueryContextManagerTest.cpp | 2 ++ 5 files changed, 36 insertions(+) diff --git a/presto-docs/src/main/sphinx/presto_cpp/features.rst b/presto-docs/src/main/sphinx/presto_cpp/features.rst index ebd0040b190c..c497a1ff6ab2 100644 --- a/presto-docs/src/main/sphinx/presto_cpp/features.rst +++ b/presto-docs/src/main/sphinx/presto_cpp/features.rst @@ -314,6 +314,16 @@ If set to ``true``, disables the optimization in expression evaluation to delay This should only be used for debugging purposes. +``native_selective_nimble_reader_enabled`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``boolean`` +* **Default value:** ``false`` + +Temporary flag to control whether selective Nimble reader should be used in this +query or not. Will be removed after the selective Nimble reader is fully rolled +out. + ``native_join_spill_enabled`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java index 1afbc272f89b..a2a688ed4b2c 100644 --- a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -355,6 +355,7 @@ public final class SystemSessionProperties public static final String NATIVE_DEBUG_DISABLE_COMMON_SUB_EXPRESSION = "native_debug_disable_common_sub_expressions"; public static final String NATIVE_DEBUG_DISABLE_EXPRESSION_WITH_MEMOIZATION = "native_debug_disable_expression_with_memoization"; public static final String NATIVE_DEBUG_DISABLE_EXPRESSION_WITH_LAZY_INPUTS = "native_debug_disable_expression_with_lazy_inputs"; + public static final String NATIVE_SELECTIVE_NIMBLE_READER_ENABLED = "native_selective_nimble_reader_enabled"; public static final String NATIVE_MAX_PARTIAL_AGGREGATION_MEMORY = "native_max_partial_aggregation_memory"; public static final String NATIVE_MAX_EXTENDED_PARTIAL_AGGREGATION_MEMORY = "native_max_extended_partial_aggregation_memory"; @@ -1750,6 +1751,13 @@ public SystemSessionProperties( "of lazy inputs unless required. Should only be used for debugging.", false, true), + booleanProperty( + NATIVE_SELECTIVE_NIMBLE_READER_ENABLED, + "Temporary flag to control whether selective Nimble reader should be " + + "used in this query or not. Will be removed after the selective Nimble " + + "reader is fully rolled out.", + false, + true), longProperty( NATIVE_MAX_PARTIAL_AGGREGATION_MEMORY, "The max partial aggregation memory when data reduction is not optimal.", diff --git a/presto-native-execution/presto_cpp/main/SessionProperties.cpp b/presto-native-execution/presto_cpp/main/SessionProperties.cpp index c2413ac24d37..338d17766e72 100644 --- a/presto-native-execution/presto_cpp/main/SessionProperties.cpp +++ b/presto-native-execution/presto_cpp/main/SessionProperties.cpp @@ -244,6 +244,16 @@ SessionProperties::SessionProperties() { QueryConfig::kDebugDisableExpressionWithLazyInputs, boolToString(c.debugDisableExpressionsWithLazyInputs())); + addSessionProperty( + kSelectiveNimbleReaderEnabled, + "Temporary flag to control whether selective Nimble reader should be " + "used in this query or not. Will be removed after the selective Nimble " + "reader is fully rolled out.", + BOOLEAN(), + false, + QueryConfig::kSelectiveNimbleReaderEnabled, + boolToString(c.selectiveNimbleReaderEnabled())); + // If `legacy_timestamp` is true, the coordinator expects timestamp // conversions without a timezone to be converted to the user's // session_timezone. diff --git a/presto-native-execution/presto_cpp/main/SessionProperties.h b/presto-native-execution/presto_cpp/main/SessionProperties.h index bf43c21e2c78..50f9869267e6 100644 --- a/presto-native-execution/presto_cpp/main/SessionProperties.h +++ b/presto-native-execution/presto_cpp/main/SessionProperties.h @@ -165,6 +165,12 @@ class SessionProperties { static constexpr const char* kDebugDisableExpressionWithLazyInputs = "native_debug_disable_expression_with_lazy_inputs"; + /// Temporary flag to control whether selective Nimble reader should be used + /// in this query or not. Will be removed after the selective Nimble reader + /// is fully rolled out. + static constexpr const char* kSelectiveNimbleReaderEnabled = + "native_selective_nimble_reader_enabled"; + /// Enable timezone-less timestamp conversions. static constexpr const char* kLegacyTimestamp = "legacy_timestamp"; diff --git a/presto-native-execution/presto_cpp/main/tests/QueryContextManagerTest.cpp b/presto-native-execution/presto_cpp/main/tests/QueryContextManagerTest.cpp index 43bd39074e76..75730cca8aeb 100644 --- a/presto-native-execution/presto_cpp/main/tests/QueryContextManagerTest.cpp +++ b/presto-native-execution/presto_cpp/main/tests/QueryContextManagerTest.cpp @@ -56,6 +56,7 @@ TEST_F(QueryContextManagerTest, nativeSessionProperties) { {"native_debug_disable_common_sub_expressions", "true"}, {"native_debug_disable_expression_with_memoization", "true"}, {"native_debug_disable_expression_with_lazy_inputs", "true"}, + {"native_selective_nimble_reader_enabled", "true"}, {"aggregation_spill_all", "true"}}}; auto queryCtx = taskManager_->getQueryContextManager()->findOrCreateQueryCtx( taskId, session); @@ -67,6 +68,7 @@ TEST_F(QueryContextManagerTest, nativeSessionProperties) { EXPECT_TRUE(queryCtx->queryConfig().debugDisableCommonSubExpressions()); EXPECT_TRUE(queryCtx->queryConfig().debugDisableExpressionsWithMemoization()); EXPECT_TRUE(queryCtx->queryConfig().debugDisableExpressionsWithLazyInputs()); + EXPECT_TRUE(queryCtx->queryConfig().selectiveNimbleReaderEnabled()); EXPECT_EQ(queryCtx->queryConfig().spillWriteBufferSize(), 1024); } From 62ca0801fba8448b162fe89f8dab10e0c14158e1 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Tue, 24 Sep 2024 16:57:04 -0400 Subject: [PATCH 22/86] Apply suggestions from code review Co-authored-by: Steve Burnett --- presto-docs/src/main/sphinx/presto_cpp/features.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/presto-docs/src/main/sphinx/presto_cpp/features.rst b/presto-docs/src/main/sphinx/presto_cpp/features.rst index c497a1ff6ab2..7b814b1adbd2 100644 --- a/presto-docs/src/main/sphinx/presto_cpp/features.rst +++ b/presto-docs/src/main/sphinx/presto_cpp/features.rst @@ -321,8 +321,7 @@ This should only be used for debugging purposes. * **Default value:** ``false`` Temporary flag to control whether selective Nimble reader should be used in this -query or not. Will be removed after the selective Nimble reader is fully rolled -out. +query or not. ``native_join_spill_enabled`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From a93eafca41c14100d70ea6f1a7bbc2cea15552bc Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Tue, 24 Sep 2024 12:31:41 -0700 Subject: [PATCH 23/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 4e45bc51fc16..8ad7c0c04028 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 4e45bc51fc164531370abcd9f28201b694623cfe +Subproject commit 8ad7c0c040283b53d18f1c1e553b5f2aa36f3a0a From 2242d6c76621d7cb4b9c3fd7e65dddd19f6c0300 Mon Sep 17 00:00:00 2001 From: Ajay Kharat Date: Tue, 24 Sep 2024 14:53:40 +0530 Subject: [PATCH 24/86] Upgrade Postgres JDBC Driver to 42.6.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3985475d962a..5b0c7d82b6aa 100644 --- a/pom.xml +++ b/pom.xml @@ -1226,7 +1226,7 @@ org.postgresql postgresql - 42.6.0 + 42.6.1 From 12afd2664b7e33b82332dbe7e4d81878470cc449 Mon Sep 17 00:00:00 2001 From: Jialiang Tan Date: Wed, 25 Sep 2024 18:53:13 -0700 Subject: [PATCH 25/86] [native] Remove memory transfer capacity --- .../presto_cpp/main/PrestoServer.cpp | 2 -- .../presto_cpp/main/common/Configs.cpp | 10 ---------- .../presto_cpp/main/common/Configs.h | 7 ------- 3 files changed, 19 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 3c919b744892..91e3a39ae13b 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -834,8 +834,6 @@ void PrestoServer::initializeVeloxMemory() { systemConfig->sharedArbitratorMemoryPoolInitialCapacity()}, {std::string(SharedArbitratorConfig::kMemoryPoolReservedCapacity), systemConfig->sharedArbitratorMemoryPoolReservedCapacity()}, - {std::string(SharedArbitratorConfig::kMemoryPoolTransferCapacity), - systemConfig->sharedArbitratorMemoryPoolTransferCapacity()}, {std::string(SharedArbitratorConfig::kMemoryReclaimMaxWaitTime), systemConfig->sharedArbitratorMemoryReclaimWaitTime()}, {std::string(SharedArbitratorConfig::kMemoryPoolMinFreeCapacity), diff --git a/presto-native-execution/presto_cpp/main/common/Configs.cpp b/presto-native-execution/presto_cpp/main/common/Configs.cpp index 5ff693d88518..2fd152242761 100644 --- a/presto-native-execution/presto_cpp/main/common/Configs.cpp +++ b/presto-native-execution/presto_cpp/main/common/Configs.cpp @@ -192,7 +192,6 @@ SystemConfig::SystemConfig() { STR_PROP(kSharedArbitratorReservedCapacity, "4GB"), STR_PROP(kSharedArbitratorMemoryPoolInitialCapacity, "128MB"), STR_PROP(kSharedArbitratorMemoryPoolReservedCapacity, "64MB"), - STR_PROP(kSharedArbitratorMemoryPoolTransferCapacity, "32MB"), STR_PROP(kSharedArbitratorMemoryReclaimMaxWaitTime, "5m"), STR_PROP(kSharedArbitratorGlobalArbitrationEnabled, "false"), NUM_PROP(kLargestSizeClassPages, 256), @@ -553,15 +552,6 @@ std::string SystemConfig::sharedArbitratorMemoryPoolReservedCapacity() const { std::string(kSharedArbitratorMemoryPoolReservedCapacityDefault)); } -std::string SystemConfig::sharedArbitratorMemoryPoolTransferCapacity() const { - static constexpr std::string_view - kSharedArbitratorMemoryPoolTransferCapacityDefault = "32MB"; - return optionalProperty( - kSharedArbitratorMemoryPoolTransferCapacity) - .value_or( - std::string(kSharedArbitratorMemoryPoolTransferCapacityDefault)); -} - std::string SystemConfig::sharedArbitratorMemoryReclaimWaitTime() const { static constexpr std::string_view kSharedArbitratorMemoryReclaimMaxWaitTimeDefault = "5m"; diff --git a/presto-native-execution/presto_cpp/main/common/Configs.h b/presto-native-execution/presto_cpp/main/common/Configs.h index 8f38f89d6af4..2466698ef0d1 100644 --- a/presto-native-execution/presto_cpp/main/common/Configs.h +++ b/presto-native-execution/presto_cpp/main/common/Configs.h @@ -419,11 +419,6 @@ class SystemConfig : public ConfigBase { static constexpr std::string_view kSharedArbitratorMemoryPoolReservedCapacity{ "shared-arbitrator.memory-pool-reserved-capacity"}; - /// The minimal memory capacity in bytes transferred between memory pools - /// during memory arbitration. - static constexpr std::string_view kSharedArbitratorMemoryPoolTransferCapacity{ - "shared-arbitrator.memory-pool-transfer-capacity"}; - /// Specifies the max time to wait for memory reclaim by arbitration. The /// memory reclaim might fail if the max wait time has exceeded. If it is /// zero, then there is no timeout. @@ -790,8 +785,6 @@ class SystemConfig : public ConfigBase { std::string sharedArbitratorMemoryPoolReservedCapacity() const; - std::string sharedArbitratorMemoryPoolTransferCapacity() const; - std::string sharedArbitratorMemoryReclaimWaitTime() const; std::string sharedArbitratorMemoryPoolInitialCapacity() const; From 2c57ef3e701d80ec6369ae16cd86c7db1bd7d791 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Meng Date: Wed, 25 Sep 2024 20:18:22 -0700 Subject: [PATCH 26/86] [native]Add to set bucket property flag in table write plan node This helps velox query planner to set table write driver thread properly --- .../presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp | 8 ++++++++ presto-native-execution/velox | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp index 9d3245244462..c4e96972c69b 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp @@ -16,6 +16,7 @@ #include "presto_cpp/main/types/PrestoToVeloxConnector.h" #include "presto_cpp/main/types/PrestoToVeloxQueryPlan.h" #include +#include "velox/connectors/hive/HiveDataSink.h" #include "velox/core/QueryCtx.h" #include "velox/exec/HashPartitionFunction.h" #include "velox/exec/RoundRobinPartitionFunction.h" @@ -1316,6 +1317,12 @@ VeloxQueryPlanConverterBase::toVeloxQueryPlan( auto insertTableHandle = std::make_shared( connectorId, connectorInsertHandle); + bool hasBucketProperty{false}; + if (auto* HiveInsertTableHandle = + dynamic_cast( + connectorInsertHandle.get())) { + hasBucketProperty = HiveInsertTableHandle->bucketProperty() != nullptr; + } const auto outputType = toRowType( generateOutputVariables( @@ -1341,6 +1348,7 @@ VeloxQueryPlanConverterBase::toVeloxQueryPlan( std::move(aggregationNode), std::move(insertTableHandle), node->partitioningScheme != nullptr, + hasBucketProperty, outputType, getCommitStrategy(), sourceVeloxPlan); diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 8ad7c0c04028..272995b26bc8 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 8ad7c0c040283b53d18f1c1e553b5f2aa36f3a0a +Subproject commit 272995b26bc887b224cbf4a131caf98ee15db328 From 4bdd580a186bf8e802b912d192e0dfba64475ea8 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Mon, 23 Sep 2024 14:04:42 -0400 Subject: [PATCH 27/86] Delete unused code --- .../presto/execution/executor/Histogram.java | 186 -------- .../executor/SimulationController.java | 275 ----------- .../execution/executor/SimulationSplit.java | 261 ---------- .../execution/executor/SimulationTask.java | 171 ------- .../execution/executor/SplitGenerators.java | 347 -------------- .../executor/SplitSpecification.java | 86 ---- .../executor/TaskExecutorSimulator.java | 449 ------------------ 7 files changed, 1775 deletions(-) delete mode 100644 presto-main/src/test/java/com/facebook/presto/execution/executor/Histogram.java delete mode 100644 presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationController.java delete mode 100644 presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationSplit.java delete mode 100644 presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationTask.java delete mode 100644 presto-main/src/test/java/com/facebook/presto/execution/executor/SplitGenerators.java delete mode 100644 presto-main/src/test/java/com/facebook/presto/execution/executor/SplitSpecification.java delete mode 100644 presto-main/src/test/java/com/facebook/presto/execution/executor/TaskExecutorSimulator.java diff --git a/presto-main/src/test/java/com/facebook/presto/execution/executor/Histogram.java b/presto-main/src/test/java/com/facebook/presto/execution/executor/Histogram.java deleted file mode 100644 index 7f6ffbe5948c..000000000000 --- a/presto-main/src/test/java/com/facebook/presto/execution/executor/Histogram.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.execution.executor; - -import com.google.common.collect.ImmutableList; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.function.Function; - -import static com.google.common.base.Preconditions.checkArgument; - -class Histogram> -{ - private final List buckets; - private final boolean discrete; - - private Histogram(Collection buckets, boolean discrete) - { - this.buckets = new ArrayList<>(buckets); - this.discrete = discrete; - Collections.sort(this.buckets); - } - - public static > Histogram fromDiscrete(Collection buckets) - { - return new Histogram<>(buckets, true); - } - - public static > Histogram fromContinuous(Collection buckets) - { - return new Histogram<>(buckets, false); - } - - public static Histogram fromContinuous(Collection initialData, Function keyFunction) - { - if (initialData.isEmpty()) { - return new Histogram<>(ImmutableList.of(), false); - } - - int numBuckets = Math.min(10, (int) Math.sqrt(initialData.size())); - long min = initialData.stream() - .mapToLong(keyFunction::apply) - .min() - .getAsLong(); - long max = initialData.stream() - .mapToLong(keyFunction::apply) - .max() - .getAsLong(); - - checkArgument(max > min); - - long bucketSize = (max - min) / numBuckets; - long bucketRemainder = (max - min) % numBuckets; - - List minimums = new ArrayList<>(); - - long currentMin = min; - for (int i = 0; i < numBuckets; i++) { - minimums.add(currentMin); - long currentMax = currentMin + bucketSize; - if (bucketRemainder > 0) { - currentMax++; - bucketRemainder--; - } - currentMin = currentMax + 1; - } - - minimums.add(numBuckets, currentMin); - - return new Histogram<>(minimums, false); - } - - public void printDistribution( - Collection data, - Function keyFunction, - Function keyFormatter) - { - if (buckets.isEmpty()) { - System.out.println("No buckets"); - return; - } - - if (data.isEmpty()) { - System.out.println("No data"); - return; - } - - long[] bucketData = new long[buckets.size()]; - - for (D datum : data) { - K key = keyFunction.apply(datum); - - for (int i = 0; i < buckets.size(); i++) { - if (key.compareTo(buckets.get(i)) >= 0 && (i == (buckets.size() - 1) || key.compareTo(buckets.get(i + 1)) < 0)) { - bucketData[i]++; - break; - } - } - } - - if (!discrete) { - for (int i = 0; i < bucketData.length - 1; i++) { - System.out.printf("%8s - %8s : (%5s values)\n", - keyFormatter.apply(buckets.get(i)), - keyFormatter.apply(buckets.get(i + 1)), - bucketData[i]); - } - } - else { - for (int i = 0; i < bucketData.length; i++) { - System.out.printf("%8s : (%5s values)\n", - keyFormatter.apply(buckets.get(i)), - bucketData[i]); - } - } - } - - public void printDistribution( - Collection data, - Function keyFunction, - Function valueFunction, - Function keyFormatter, - Function, G> valueFormatter) - { - if (buckets.isEmpty()) { - System.out.println("No buckets"); - return; - } - - if (data.isEmpty()) { - System.out.println("No data"); - return; - } - - SortedMap> bucketData = new TreeMap<>(); - for (int i = 0; i < buckets.size(); i++) { - bucketData.put(i, new ArrayList<>()); - } - - for (D datum : data) { - K key = keyFunction.apply(datum); - V value = valueFunction.apply(datum); - - for (int i = 0; i < buckets.size(); i++) { - if (key.compareTo(buckets.get(i)) >= 0 && (i == (buckets.size() - 1) || key.compareTo(buckets.get(i + 1)) < 0)) { - bucketData.get(i).add(value); - break; - } - } - } - - if (!discrete) { - for (int i = 0; i < bucketData.size() - 1; i++) { - System.out.printf("%8s - %8s : (%5s values) %s\n", - keyFormatter.apply(buckets.get(i)), - keyFormatter.apply(buckets.get(i + 1)), - bucketData.get(i).size(), - valueFormatter.apply(bucketData.get(i))); - } - } - else { - for (int i = 0; i < bucketData.size(); i++) { - System.out.printf("%19s : (%5s values) %s\n", - keyFormatter.apply(buckets.get(i)), - bucketData.get(i).size(), - valueFormatter.apply(bucketData.get(i))); - } - } - } -} diff --git a/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationController.java b/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationController.java deleted file mode 100644 index e931f9495b5e..000000000000 --- a/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationController.java +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.execution.executor; - -import com.facebook.presto.execution.TaskId; -import com.facebook.presto.execution.executor.SimulationTask.IntermediateTask; -import com.facebook.presto.execution.executor.SimulationTask.LeafTask; -import com.facebook.presto.execution.executor.SplitGenerators.SplitGenerator; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ListMultimap; -import com.google.common.collect.Multimaps; - -import java.util.Map; -import java.util.OptionalInt; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.BiConsumer; - -import static com.facebook.presto.execution.executor.SimulationController.TaskSpecification.Type.LEAF; -import static java.util.concurrent.Executors.newSingleThreadExecutor; -import static java.util.concurrent.TimeUnit.MILLISECONDS; - -class SimulationController -{ - private static final int DEFAULT_MIN_SPLITS_PER_TASK = 3; - - private final TaskExecutor taskExecutor; - private final BiConsumer callback; - - private final ExecutorService controllerExecutor = newSingleThreadExecutor(); - - private final Map specificationEnabled = new ConcurrentHashMap<>(); - private final ListMultimap runningTasks = Multimaps.synchronizedListMultimap(ArrayListMultimap.create()); - - private final ListMultimap completedTasks = Multimaps.synchronizedListMultimap(ArrayListMultimap.create()); - private final AtomicBoolean clearPendingQueue = new AtomicBoolean(); - - private final AtomicBoolean stopped = new AtomicBoolean(); - - public SimulationController(TaskExecutor taskExecutor, BiConsumer callback) - { - this.taskExecutor = taskExecutor; - this.callback = callback; - } - - public synchronized void addTaskSpecification(TaskSpecification spec) - { - specificationEnabled.put(spec, false); - } - - public synchronized void clearPendingQueue() - { - System.out.println("Clearing pending queue.."); - clearPendingQueue.set(true); - } - - public synchronized void stop() - { - stopped.set(true); - controllerExecutor.shutdownNow(); - taskExecutor.stop(); - } - - public synchronized void enableSpecification(TaskSpecification specification) - { - specificationEnabled.replace(specification, false, true); - startSpec(specification); - } - - public synchronized void disableSpecification(TaskSpecification specification) - { - if (specificationEnabled.replace(specification, true, false) && callback != null) { - runCallback(); - } - } - - public synchronized void runCallback() - { - callback.accept(this, taskExecutor); - } - - public void run() - { - controllerExecutor.submit(() -> { - while (!stopped.get()) { - replaceCompletedTasks(); - scheduleSplitsForRunningTasks(); - - try { - MILLISECONDS.sleep(500); - } - catch (InterruptedException e) { - return; - } - } - }); - } - - private synchronized void scheduleSplitsForRunningTasks() - { - if (clearPendingQueue.get()) { - if (taskExecutor.getWaitingSplits() > (taskExecutor.getIntermediateSplits() - taskExecutor.getBlockedSplits())) { - return; - } - - System.out.println("Cleared pending queue."); - clearPendingQueue.set(false); - } - - for (TaskSpecification specification : specificationEnabled.keySet()) { - if (!specificationEnabled.get(specification)) { - continue; - } - - for (SimulationTask task : runningTasks.get(specification)) { - if (specification.getType() == LEAF) { - int remainingSplits = specification.getNumSplitsPerTask() - (task.getRunningSplits().size() + task.getCompletedSplits().size()); - int candidateSplits = DEFAULT_MIN_SPLITS_PER_TASK - task.getRunningSplits().size(); - for (int i = 0; i < Math.min(remainingSplits, candidateSplits); i++) { - task.schedule(taskExecutor, 1); - } - } - else { - int remainingSplits = specification.getNumSplitsPerTask() - (task.getRunningSplits().size() + task.getCompletedSplits().size()); - task.schedule(taskExecutor, remainingSplits); - } - } - } - } - - private synchronized void replaceCompletedTasks() - { - boolean moved; - do { - moved = false; - - for (TaskSpecification specification : specificationEnabled.keySet()) { - if (specification.getTotalTasks().isPresent() && - specificationEnabled.get(specification) && - specification.getTotalTasks().getAsInt() <= completedTasks.get(specification).size() + runningTasks.get(specification).size()) { - System.out.println(); - System.out.println(specification.getName() + " disabled for reaching target count " + specification.getTotalTasks()); - System.out.println(); - disableSpecification(specification); - continue; - } - for (SimulationTask task : runningTasks.get(specification)) { - if (task.getCompletedSplits().size() >= specification.getNumSplitsPerTask()) { - completedTasks.put(specification, task); - runningTasks.remove(specification, task); - taskExecutor.removeTask(task.getTaskHandle()); - - if (!specificationEnabled.get(specification)) { - continue; - } - - createTask(specification); - moved = true; - break; - } - } - } - } - while (moved); - } - - private void createTask(TaskSpecification specification) - { - if (specification.getType() == LEAF) { - runningTasks.put(specification, new LeafTask( - taskExecutor, - specification, - new TaskId(specification.getName(), 0, 0, runningTasks.get(specification).size() + completedTasks.get(specification).size(), 0))); - } - else { - runningTasks.put(specification, new IntermediateTask( - taskExecutor, - specification, - new TaskId(specification.getName(), 0, 0, runningTasks.get(specification).size() + completedTasks.get(specification).size(), 0))); - } - } - - public Map getSpecificationEnabled() - { - return specificationEnabled; - } - - public ListMultimap getRunningTasks() - { - return runningTasks; - } - - public ListMultimap getCompletedTasks() - { - return completedTasks; - } - - private void startSpec(TaskSpecification specification) - { - if (!specificationEnabled.get(specification)) { - return; - } - for (int i = 0; i < specification.getNumConcurrentTasks(); i++) { - createTask(specification); - } - } - - public static class TaskSpecification - { - enum Type - { - LEAF, - INTERMEDIATE - } - - private final Type type; - private final String name; - private final OptionalInt totalTasks; - private final int numConcurrentTasks; - private final int numSplitsPerTask; - private final SplitGenerator splitGenerator; - - TaskSpecification(Type type, String name, OptionalInt totalTasks, int numConcurrentTasks, int numSplitsPerTask, SplitGenerator splitGenerator) - { - this.type = type; - this.name = name; - this.totalTasks = totalTasks; - this.numConcurrentTasks = numConcurrentTasks; - this.numSplitsPerTask = numSplitsPerTask; - this.splitGenerator = splitGenerator; - } - - Type getType() - { - return type; - } - - String getName() - { - return name; - } - - int getNumConcurrentTasks() - { - return numConcurrentTasks; - } - - int getNumSplitsPerTask() - { - return numSplitsPerTask; - } - - OptionalInt getTotalTasks() - { - return totalTasks; - } - - SplitSpecification nextSpecification() - { - return splitGenerator.next(); - } - } -} diff --git a/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationSplit.java b/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationSplit.java deleted file mode 100644 index 874d37b2bee6..000000000000 --- a/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationSplit.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.execution.executor; - -import com.facebook.presto.execution.SplitRunner; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.SettableFuture; -import io.airlift.units.Duration; - -import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -import static com.facebook.presto.operator.Operator.NOT_BLOCKED; -import static io.airlift.units.Duration.succinctNanos; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.NANOSECONDS; - -abstract class SimulationSplit - implements SplitRunner -{ - private final SimulationTask task; - - private final AtomicInteger calls = new AtomicInteger(0); - - private final AtomicLong completedProcessNanos = new AtomicLong(); - private final AtomicLong startNanos = new AtomicLong(-1); - private final AtomicLong doneNanos = new AtomicLong(-1); - private final AtomicLong waitNanos = new AtomicLong(); - private final AtomicLong lastReadyTime = new AtomicLong(-1); - private final AtomicBoolean killed = new AtomicBoolean(false); - - private final long scheduledTimeNanos; - - SimulationSplit(SimulationTask task, long scheduledTimeNanos) - { - this.task = requireNonNull(task, "task is null"); - this.scheduledTimeNanos = scheduledTimeNanos; - } - - long getCompletedProcessNanos() - { - return completedProcessNanos.get(); - } - - long getWaitNanos() - { - return waitNanos.get(); - } - - int getCalls() - { - return calls.get(); - } - - long getScheduledTimeNanos() - { - return scheduledTimeNanos; - } - - void setKilled() - { - waitNanos.addAndGet(System.nanoTime() - lastReadyTime.get()); - killed.set(true); - task.setKilled(); - } - - @Override - public boolean isFinished() - { - return doneNanos.get() >= 0; - } - - @Override - public void close() - { - } - - abstract boolean process(); - - abstract ListenableFuture getProcessResult(); - - void setSplitReady() - { - lastReadyTime.set(System.nanoTime()); - } - - @Override - public ListenableFuture processFor(Duration duration) - { - calls.incrementAndGet(); - - long callStart = System.nanoTime(); - startNanos.compareAndSet(-1, callStart); - lastReadyTime.compareAndSet(-1, callStart); - waitNanos.addAndGet(callStart - lastReadyTime.get()); - - boolean done = process(); - - long callEnd = System.nanoTime(); - - completedProcessNanos.addAndGet(callEnd - callStart); - - if (done) { - doneNanos.compareAndSet(-1, callEnd); - - if (!killed.get()) { - task.splitComplete(this); - } - - return Futures.immediateFuture(null); - } - - ListenableFuture processResult = getProcessResult(); - if (processResult.isDone()) { - setSplitReady(); - } - - return processResult; - } - - static class LeafSplit - extends SimulationSplit - { - private final long perQuantaNanos; - - LeafSplit(SimulationTask task, long scheduledTimeNanos, long perQuantaNanos) - { - super(task, scheduledTimeNanos); - this.perQuantaNanos = perQuantaNanos; - } - - boolean process() - { - if (getCompletedProcessNanos() >= super.scheduledTimeNanos) { - return true; - } - - long processNanos = Math.min(super.scheduledTimeNanos - getCompletedProcessNanos(), perQuantaNanos); - if (processNanos > 0) { - try { - NANOSECONDS.sleep(processNanos); - } - catch (InterruptedException e) { - setKilled(); - return true; - } - } - - return false; - } - - ListenableFuture getProcessResult() - { - return NOT_BLOCKED; - } - - @Override - public String getInfo() - { - double pct = (100.0 * getCompletedProcessNanos() / super.scheduledTimeNanos); - return String.format("leaf %3s%% done (total: %8s, per quanta: %8s)", - (int) (pct > 100.00 ? 100.0 : pct), - succinctNanos(super.scheduledTimeNanos), - succinctNanos(perQuantaNanos)); - } - } - - static class IntermediateSplit - extends SimulationSplit - { - private final long wallTimeNanos; - private final long numQuantas; - private final long perQuantaNanos; - private final long betweenQuantaNanos; - - private final ScheduledExecutorService executorService; - - private SettableFuture future = SettableFuture.create(); - private SettableFuture doneFuture = SettableFuture.create(); - - IntermediateSplit(SimulationTask task, long scheduledTimeNanos, long wallTimeNanos, long numQuantas, long perQuantaNanos, long betweenQuantaNanos, ScheduledExecutorService executorService) - { - super(task, scheduledTimeNanos); - this.wallTimeNanos = wallTimeNanos; - this.numQuantas = numQuantas; - this.perQuantaNanos = perQuantaNanos; - this.betweenQuantaNanos = betweenQuantaNanos; - this.executorService = executorService; - - doneFuture.set(null); - } - - boolean process() - { - try { - if (getCalls() < numQuantas) { - NANOSECONDS.sleep(perQuantaNanos); - return false; - } - } - catch (InterruptedException ignored) { - setKilled(); - return true; - } - - return true; - } - - ListenableFuture getProcessResult() - { - future = SettableFuture.create(); - try { - executorService.schedule(() -> { - try { - if (!executorService.isShutdown()) { - future.set(null); - } - else { - setKilled(); - } - setSplitReady(); - } - catch (RuntimeException ignored) { - setKilled(); - } - }, betweenQuantaNanos, NANOSECONDS); - } - catch (RejectedExecutionException ignored) { - setKilled(); - return doneFuture; - } - return future; - } - - @Override - public String getInfo() - { - double pct = (100.0 * getCalls() / numQuantas); - return String.format("intr %3s%% done (wall: %9s, per quanta: %8s, between quanta: %8s)", - (int) (pct > 100.00 ? 100.0 : pct), - succinctNanos(wallTimeNanos), - succinctNanos(perQuantaNanos), - succinctNanos(betweenQuantaNanos)); - } - } -} diff --git a/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationTask.java b/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationTask.java deleted file mode 100644 index 6898ef9b71e9..000000000000 --- a/presto-main/src/test/java/com/facebook/presto/execution/executor/SimulationTask.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.execution.executor; - -import com.facebook.presto.execution.TaskId; -import com.facebook.presto.execution.executor.SimulationController.TaskSpecification; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Sets; -import io.airlift.units.Duration; - -import java.util.OptionalInt; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; - -import static java.util.concurrent.TimeUnit.SECONDS; - -abstract class SimulationTask -{ - private final TaskSpecification specification; - private final TaskId taskId; - - private final Set runningSplits = Sets.newConcurrentHashSet(); - private final Set completedSplits = Sets.newConcurrentHashSet(); - - private final TaskHandle taskHandle; - private final AtomicBoolean killed = new AtomicBoolean(); - - public SimulationTask(TaskExecutor taskExecutor, TaskSpecification specification, TaskId taskId) - { - this.specification = specification; - this.taskId = taskId; - taskHandle = taskExecutor.addTask(taskId, () -> 0, 10, new Duration(1, SECONDS), OptionalInt.empty()); - } - - public void setKilled() - { - killed.set(true); - } - - public boolean isKilled() - { - return killed.get(); - } - - public Set getCompletedSplits() - { - return completedSplits; - } - - TaskId getTaskId() - { - return taskId; - } - - public TaskHandle getTaskHandle() - { - return taskHandle; - } - - public Set getRunningSplits() - { - return runningSplits; - } - - public synchronized void splitComplete(SimulationSplit split) - { - runningSplits.remove(split); - completedSplits.add(split); - } - - public TaskSpecification getSpecification() - { - return specification; - } - - public long getTotalWaitTimeNanos() - { - long runningWaitTime = runningSplits.stream() - .mapToLong(SimulationSplit::getWaitNanos) - .sum(); - - long completedWaitTime = completedSplits.stream() - .mapToLong(SimulationSplit::getWaitNanos) - .sum(); - - return runningWaitTime + completedWaitTime; - } - - public long getProcessedTimeNanos() - { - long runningProcessedTime = runningSplits.stream() - .mapToLong(SimulationSplit::getCompletedProcessNanos) - .sum(); - - long completedProcessedTime = completedSplits.stream() - .mapToLong(SimulationSplit::getCompletedProcessNanos) - .sum(); - - return runningProcessedTime + completedProcessedTime; - } - - public long getScheduledTimeNanos() - { - long runningWallTime = runningSplits.stream() - .mapToLong(SimulationSplit::getScheduledTimeNanos) - .sum(); - - long completedWallTime = completedSplits.stream() - .mapToLong(SimulationSplit::getScheduledTimeNanos) - .sum(); - - return runningWallTime + completedWallTime; - } - - public abstract void schedule(TaskExecutor taskExecutor, int numSplits); - - public static class LeafTask - extends SimulationTask - { - private final TaskSpecification taskSpecification; - - public LeafTask(TaskExecutor taskExecutor, TaskSpecification specification, TaskId taskId) - { - super(taskExecutor, specification, taskId); - this.taskSpecification = specification; - } - - public void schedule(TaskExecutor taskExecutor, int numSplits) - { - ImmutableList.Builder splits = ImmutableList.builder(); - for (int i = 0; i < numSplits; i++) { - splits.add(taskSpecification.nextSpecification().instantiate(this)); - } - super.runningSplits.addAll(splits.build()); - taskExecutor.enqueueSplits(getTaskHandle(), false, splits.build()); - } - } - - public static class IntermediateTask - extends SimulationTask - { - private final SplitSpecification splitSpecification; - - public IntermediateTask(TaskExecutor taskExecutor, TaskSpecification specification, TaskId taskId) - { - super(taskExecutor, specification, taskId); - this.splitSpecification = specification.nextSpecification(); - } - - public void schedule(TaskExecutor taskExecutor, int numSplits) - { - ImmutableList.Builder splits = ImmutableList.builder(); - for (int i = 0; i < numSplits; i++) { - splits.add(splitSpecification.instantiate(this)); - } - super.runningSplits.addAll(splits.build()); - taskExecutor.enqueueSplits(getTaskHandle(), true, splits.build()); - } - } -} diff --git a/presto-main/src/test/java/com/facebook/presto/execution/executor/SplitGenerators.java b/presto-main/src/test/java/com/facebook/presto/execution/executor/SplitGenerators.java deleted file mode 100644 index 366db8fd0582..000000000000 --- a/presto-main/src/test/java/com/facebook/presto/execution/executor/SplitGenerators.java +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.execution.executor; - -import com.facebook.presto.execution.executor.SplitSpecification.IntermediateSplitSpecification; -import com.facebook.presto.execution.executor.SplitSpecification.LeafSplitSpecification; -import com.google.common.collect.ImmutableList; -import io.airlift.units.Duration; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ThreadLocalRandom; - -import static com.facebook.presto.execution.executor.Histogram.fromContinuous; -import static java.util.concurrent.TimeUnit.DAYS; -import static java.util.concurrent.TimeUnit.MICROSECONDS; -import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static java.util.concurrent.TimeUnit.MINUTES; - -class SplitGenerators -{ - private SplitGenerators() {} - - public static void main(String[] args) - { - Histogram bins = fromContinuous(ImmutableList.of( - MILLISECONDS.toNanos(0), - MILLISECONDS.toNanos(1), - MILLISECONDS.toNanos(10), - MILLISECONDS.toNanos(100), - MILLISECONDS.toNanos(1_000), - MILLISECONDS.toNanos(10_000), - MILLISECONDS.toNanos(60_000), - MILLISECONDS.toNanos(300_000), - MINUTES.toNanos(20), - DAYS.toNanos(1))); - - IntermediateSplitGenerator intermediateSplitGenerator = new IntermediateSplitGenerator(null); - List intermediateSpecs = new ArrayList<>(); - for (int i = 0; i < 10_000; i++) { - IntermediateSplitSpecification next = intermediateSplitGenerator.next(); - intermediateSpecs.add(next); - } - - System.out.println("Scheduled time distributions"); - System.out.println("============================"); - System.out.println(); - System.out.println("Tasks with 8x " + IntermediateSplitGenerator.class.getSimpleName()); - bins.printDistribution(intermediateSpecs, t -> t.getScheduledTimeNanos() * 8, a -> 1, Duration::succinctNanos, a -> ""); - - List leafSplitGenerators = ImmutableList.of( - new FastLeafSplitGenerator(), - new SlowLeafSplitGenerator(), - new L4LeafSplitGenerator(), - new QuantaExceedingSplitGenerator(), - new AggregatedLeafSplitGenerator()); - - for (SplitGenerator generator : leafSplitGenerators) { - List leafSpecs = new ArrayList<>(); - for (int i = 0; i < 17000; i++) { - leafSpecs.add(generator.next()); - } - - System.out.println(); - System.out.println("Tasks with 4x " + generator.getClass().getSimpleName()); - bins.printDistribution(leafSpecs, t -> t.getScheduledTimeNanos() * 4, Duration::succinctNanos); - - System.out.println("Per quanta:"); - bins.printDistribution(leafSpecs, SplitSpecification::getPerQuantaNanos, Duration::succinctNanos); - } - } - - interface SplitGenerator - { - SplitSpecification next(); - } - - public static class IntermediateSplitGenerator - implements SplitGenerator - { - private final ScheduledExecutorService wakeupExecutor; - - IntermediateSplitGenerator(ScheduledExecutorService wakeupExecutor) - { - this.wakeupExecutor = wakeupExecutor; - } - - public IntermediateSplitSpecification next() - { - long numQuanta = generateIntermediateSplitNumQuanta(0, 1); - - long wallNanos = MILLISECONDS.toNanos(generateIntermediateSplitWallTimeMs(0, 1)); - long scheduledNanos = MILLISECONDS.toNanos(generateIntermediateSplitScheduledTimeMs(0, 1)); - - long blockedNanos = (long) (ThreadLocalRandom.current().nextDouble(0.97, 0.99) * wallNanos); - - long perQuantaNanos = scheduledNanos / numQuanta; - long betweenQuantaNanos = blockedNanos / numQuanta; - - return new IntermediateSplitSpecification(scheduledNanos, wallNanos, numQuanta, perQuantaNanos, betweenQuantaNanos, wakeupExecutor); - } - } - - public static class AggregatedLeafSplitGenerator - implements SplitGenerator - { - public LeafSplitSpecification next() - { - long totalNanos = MILLISECONDS.toNanos(generateLeafSplitScheduledTimeMs(0, 1)); - long quantaNanos = Math.min(totalNanos, MICROSECONDS.toNanos(generateLeafSplitPerCallMicros(0, 1))); - - return new LeafSplitSpecification(totalNanos, quantaNanos); - } - } - - public static class FastLeafSplitGenerator - implements SplitGenerator - { - public LeafSplitSpecification next() - { - long totalNanos = MILLISECONDS.toNanos(generateLeafSplitScheduledTimeMs(0, 0.75)); - long quantaNanos = Math.min(totalNanos, MICROSECONDS.toNanos(generateLeafSplitPerCallMicros(0, 1))); - - return new LeafSplitSpecification(totalNanos, quantaNanos); - } - } - - public static class SlowLeafSplitGenerator - implements SplitGenerator - { - public LeafSplitSpecification next() - { - long totalNanos = MILLISECONDS.toNanos(generateLeafSplitScheduledTimeMs(0.75, 1)); - long quantaNanos = Math.min(totalNanos, MICROSECONDS.toNanos(generateLeafSplitPerCallMicros(0, 1))); - - return new LeafSplitSpecification(totalNanos, quantaNanos); - } - } - - public static class L4LeafSplitGenerator - implements SplitGenerator - { - public LeafSplitSpecification next() - { - long totalNanos = MILLISECONDS.toNanos(generateLeafSplitScheduledTimeMs(0.99, 1)); - long quantaNanos = Math.min(totalNanos, MICROSECONDS.toNanos(generateLeafSplitPerCallMicros(0, 0.9))); - - return new LeafSplitSpecification(totalNanos, quantaNanos); - } - } - - public static class QuantaExceedingSplitGenerator - implements SplitGenerator - { - public LeafSplitSpecification next() - { - long totalNanos = MILLISECONDS.toNanos(generateLeafSplitScheduledTimeMs(0.99, 1)); - long quantaNanos = Math.min(totalNanos, MICROSECONDS.toNanos(generateLeafSplitPerCallMicros(0.75, 1))); - - return new LeafSplitSpecification(totalNanos, quantaNanos); - } - } - - public static class SimpleLeafSplitGenerator - implements SplitGenerator - { - private final long totalNanos; - private final long quantaNanos; - - public SimpleLeafSplitGenerator(long totalNanos, long quantaNanos) - { - this.totalNanos = totalNanos; - this.quantaNanos = quantaNanos; - } - - public LeafSplitSpecification next() - { - return new LeafSplitSpecification(totalNanos, quantaNanos); - } - } - - // these numbers come from real world stats - private static long generateLeafSplitScheduledTimeMs(double origin, double bound) - { - ThreadLocalRandom generator = ThreadLocalRandom.current(); - double value = generator.nextDouble(origin, bound); - // in reality, max is several hours, but this would make the simulation too slow - if (value > 0.998) { - return generator.nextLong(5 * 60 * 1000, 10 * 60 * 1000); - } - - if (value > 0.99) { - return generator.nextLong(60 * 1000, 5 * 60 * 1000); - } - - if (value > 0.95) { - return generator.nextLong(10_000, 60 * 1000); - } - - if (value > 0.50) { - return generator.nextLong(1000, 10_000); - } - - if (value > 0.25) { - return generator.nextLong(100, 1000); - } - - if (value > 0.10) { - return generator.nextLong(10, 100); - } - - return generator.nextLong(1, 10); - } - - private static long generateLeafSplitPerCallMicros(double origin, double bound) - { - ThreadLocalRandom generator = ThreadLocalRandom.current(); - double value = generator.nextDouble(origin, bound); - if (value > 0.9999) { - return 200_000_000; - } - - if (value > 0.99) { - return generator.nextLong(3_000_000, 15_000_000); - } - - if (value > 0.95) { - return generator.nextLong(2_000_000, 5_000_000); - } - - if (value > 0.90) { - return generator.nextLong(1_500_000, 5_000_000); - } - - if (value > 0.75) { - return generator.nextLong(1_000_000, 2_000_000); - } - - if (value > 0.50) { - return generator.nextLong(500_000, 1_000_000); - } - - if (value > 0.1) { - return generator.nextLong(100_000, 500_000); - } - - return generator.nextLong(250, 500); - } - - private static long generateIntermediateSplitScheduledTimeMs(double origin, double bound) - { - ThreadLocalRandom generator = ThreadLocalRandom.current(); - double value = generator.nextDouble(origin, bound); - // in reality, max is several hours, but this would make the simulation too slow - - if (value > 0.999) { - return generator.nextLong(5 * 60 * 1000, 10 * 60 * 1000); - } - - if (value > 0.99) { - return generator.nextLong(60 * 1000, 5 * 60 * 1000); - } - - if (value > 0.95) { - return generator.nextLong(10_000, 60 * 1000); - } - - if (value > 0.75) { - return generator.nextLong(1000, 10_000); - } - - if (value > 0.45) { - return generator.nextLong(100, 1000); - } - - if (value > 0.20) { - return generator.nextLong(10, 100); - } - - return generator.nextLong(1, 10); - } - - private static long generateIntermediateSplitWallTimeMs(double origin, double bound) - { - ThreadLocalRandom generator = ThreadLocalRandom.current(); - double value = generator.nextDouble(origin, bound); - // in reality, max is several hours, but this would make the simulation too slow - - if (value > 0.90) { - return generator.nextLong(400_000, 800_000); - } - - if (value > 0.75) { - return generator.nextLong(100_000, 200_000); - } - - if (value > 0.50) { - return generator.nextLong(50_000, 100_000); - } - - if (value > 0.40) { - return generator.nextLong(30_000, 50_000); - } - - if (value > 0.30) { - return generator.nextLong(20_000, 30_000); - } - - if (value > 0.20) { - return generator.nextLong(10_000, 15_000); - } - - if (value > 0.10) { - return generator.nextLong(5_000, 10_000); - } - - return generator.nextLong(1_000, 5_000); - } - - private static long generateIntermediateSplitNumQuanta(double origin, double bound) - { - ThreadLocalRandom generator = ThreadLocalRandom.current(); - double value = generator.nextDouble(origin, bound); - - if (value > 0.95) { - return generator.nextLong(2000, 20_000); - } - - if (value > 0.90) { - return generator.nextLong(1_000, 2_000); - } - - return generator.nextLong(10, 1000); - } -} diff --git a/presto-main/src/test/java/com/facebook/presto/execution/executor/SplitSpecification.java b/presto-main/src/test/java/com/facebook/presto/execution/executor/SplitSpecification.java deleted file mode 100644 index 1dccbb25aa73..000000000000 --- a/presto-main/src/test/java/com/facebook/presto/execution/executor/SplitSpecification.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.execution.executor; - -import com.facebook.presto.execution.executor.SimulationSplit.IntermediateSplit; -import com.facebook.presto.execution.executor.SimulationSplit.LeafSplit; - -import java.util.concurrent.ScheduledExecutorService; - -abstract class SplitSpecification -{ - private final long scheduledTimeNanos; - private final long perQuantaNanos; - - private SplitSpecification(long scheduledTimeNanos, long perQuantaNanos) - { - this.scheduledTimeNanos = scheduledTimeNanos; - this.perQuantaNanos = perQuantaNanos; - } - - public long getScheduledTimeNanos() - { - return scheduledTimeNanos; - } - - public long getPerQuantaNanos() - { - return perQuantaNanos; - } - - public abstract SimulationSplit instantiate(SimulationTask task); - - public static class LeafSplitSpecification - extends SplitSpecification - { - public LeafSplitSpecification(long scheduledTimeNanos, long perQuantaNanos) - { - super(scheduledTimeNanos, perQuantaNanos); - } - - public LeafSplit instantiate(SimulationTask task) - { - return new LeafSplit(task, super.getScheduledTimeNanos(), super.getPerQuantaNanos()); - } - } - - public static class IntermediateSplitSpecification - extends SplitSpecification - { - private final long wallTimeNanos; - private final long numQuantas; - private final long betweenQuantaNanos; - private final ScheduledExecutorService wakeupExecutor; - - public IntermediateSplitSpecification( - long scheduledTimeNanos, - long perQuantaNanos, - long wallTimeNanos, - long numQuantas, - long betweenQuantaNanos, - ScheduledExecutorService wakeupExecutor) - { - super(scheduledTimeNanos, perQuantaNanos); - this.wallTimeNanos = wallTimeNanos; - this.numQuantas = numQuantas; - this.betweenQuantaNanos = betweenQuantaNanos; - this.wakeupExecutor = wakeupExecutor; - } - - public IntermediateSplit instantiate(SimulationTask task) - { - return new IntermediateSplit(task, wallTimeNanos, numQuantas, super.getPerQuantaNanos(), betweenQuantaNanos, super.getScheduledTimeNanos(), wakeupExecutor); - } - } -} diff --git a/presto-main/src/test/java/com/facebook/presto/execution/executor/TaskExecutorSimulator.java b/presto-main/src/test/java/com/facebook/presto/execution/executor/TaskExecutorSimulator.java deleted file mode 100644 index 1c25bb082634..000000000000 --- a/presto-main/src/test/java/com/facebook/presto/execution/executor/TaskExecutorSimulator.java +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.execution.executor; - -import com.facebook.presto.execution.executor.SimulationController.TaskSpecification; -import com.facebook.presto.execution.executor.SplitGenerators.AggregatedLeafSplitGenerator; -import com.facebook.presto.execution.executor.SplitGenerators.FastLeafSplitGenerator; -import com.facebook.presto.execution.executor.SplitGenerators.IntermediateSplitGenerator; -import com.facebook.presto.execution.executor.SplitGenerators.L4LeafSplitGenerator; -import com.facebook.presto.execution.executor.SplitGenerators.QuantaExceedingSplitGenerator; -import com.facebook.presto.execution.executor.SplitGenerators.SimpleLeafSplitGenerator; -import com.facebook.presto.execution.executor.SplitGenerators.SlowLeafSplitGenerator; -import com.google.common.base.Ticker; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.ListMultimap; -import com.google.common.util.concurrent.ListeningExecutorService; -import io.airlift.units.Duration; -import org.joda.time.DateTime; - -import java.io.Closeable; -import java.util.List; -import java.util.LongSummaryStatistics; -import java.util.Map; -import java.util.OptionalInt; -import java.util.Set; -import java.util.concurrent.ScheduledExecutorService; -import java.util.stream.Collectors; - -import static com.facebook.airlift.concurrent.Threads.threadsNamed; -import static com.facebook.presto.execution.TaskManagerConfig.TaskPriorityTracking.TASK_FAIR; -import static com.facebook.presto.execution.executor.Histogram.fromContinuous; -import static com.facebook.presto.execution.executor.Histogram.fromDiscrete; -import static com.facebook.presto.execution.executor.SimulationController.TaskSpecification.Type.INTERMEDIATE; -import static com.facebook.presto.execution.executor.SimulationController.TaskSpecification.Type.LEAF; -import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator; -import static io.airlift.units.Duration.nanosSince; -import static io.airlift.units.Duration.succinctNanos; -import static java.util.concurrent.Executors.newCachedThreadPool; -import static java.util.concurrent.Executors.newScheduledThreadPool; -import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor; -import static java.util.concurrent.TimeUnit.DAYS; -import static java.util.concurrent.TimeUnit.HOURS; -import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static java.util.concurrent.TimeUnit.MINUTES; -import static java.util.concurrent.TimeUnit.SECONDS; -import static java.util.function.Function.identity; - -public class TaskExecutorSimulator - implements Closeable -{ - public static void main(String[] args) - throws Exception - { - try (TaskExecutorSimulator simulator = new TaskExecutorSimulator()) { - simulator.run(); - } - } - - private final ListeningExecutorService submissionExecutor = listeningDecorator(newCachedThreadPool(threadsNamed(getClass().getSimpleName() + "-%s"))); - private final ScheduledExecutorService overallStatusPrintExecutor = newSingleThreadScheduledExecutor(); - private final ScheduledExecutorService runningSplitsPrintExecutor = newSingleThreadScheduledExecutor(); - private final ScheduledExecutorService wakeupExecutor = newScheduledThreadPool(32); - - private final TaskExecutor taskExecutor; - private final MultilevelSplitQueue splitQueue; - - private TaskExecutorSimulator() - { - splitQueue = new MultilevelSplitQueue(2); - taskExecutor = new TaskExecutor(36, 72, 3, 8, TASK_FAIR, splitQueue, Ticker.systemTicker()); - taskExecutor.start(); - } - - @Override - public void close() - { - submissionExecutor.shutdownNow(); - overallStatusPrintExecutor.shutdownNow(); - runningSplitsPrintExecutor.shutdownNow(); - wakeupExecutor.shutdownNow(); - taskExecutor.stop(); - } - - public void run() - throws Exception - { - long start = System.nanoTime(); - scheduleStatusPrinter(start); - - SimulationController controller = new SimulationController(taskExecutor, TaskExecutorSimulator::printSummaryStats); - - // Uncomment one of these: - // runExperimentOverloadedCluster(controller); - // runExperimentMisbehavingQuanta(controller); - // runExperimentStarveSlowSplits(controller); - runExperimentWithinLevelFairness(controller); - - System.out.println("Stopped scheduling new tasks. Ending simulation.."); - controller.stop(); - close(); - - SECONDS.sleep(5); - - System.out.println(); - System.out.println("Simulation finished at " + DateTime.now() + ". Runtime: " + nanosSince(start)); - System.out.println(); - - printSummaryStats(controller, taskExecutor); - } - - private void runExperimentOverloadedCluster(SimulationController controller) - throws InterruptedException - { - /* - Designed to simulate a somewhat overloaded Hive cluster. - The following data is a point-in-time snapshot representative production cluster: - - 60 running queries => 45 queries/node - - 80 tasks/node - - 600 splits scheduled/node (80% intermediate => ~480, 20% leaf => 120) - - Only 60% intermediate splits will ever get data (~300) - - Desired result: - This experiment should demonstrate the trade-offs that will be made during periods when a - node is under heavy load. Ideally, the different classes of tasks should each accumulate - scheduled time, and not spend disproportionately long waiting. - */ - - System.out.println("Overload experiment started."); - TaskSpecification leafSpec = new TaskSpecification(LEAF, "leaf", OptionalInt.empty(), 16, 30, new AggregatedLeafSplitGenerator()); - controller.addTaskSpecification(leafSpec); - - TaskSpecification slowLeafSpec = new TaskSpecification(LEAF, "slow_leaf", OptionalInt.empty(), 16, 10, new SlowLeafSplitGenerator()); - controller.addTaskSpecification(slowLeafSpec); - - TaskSpecification intermediateSpec = new TaskSpecification(INTERMEDIATE, "intermediate", OptionalInt.empty(), 8, 40, new IntermediateSplitGenerator(wakeupExecutor)); - controller.addTaskSpecification(intermediateSpec); - - controller.enableSpecification(leafSpec); - controller.enableSpecification(slowLeafSpec); - controller.enableSpecification(intermediateSpec); - controller.run(); - - SECONDS.sleep(30); - - // this gets the executor into a more realistic point-in-time state, where long running tasks start to make progress - for (int i = 0; i < 20; i++) { - controller.clearPendingQueue(); - MINUTES.sleep(1); - } - - System.out.println("Overload experiment completed."); - } - - private void runExperimentStarveSlowSplits(SimulationController controller) - throws InterruptedException - { - /* - Designed to simulate how higher level admission control affects short-term scheduling decisions. - A fixed, large number of tasks (120) are submitted at approximately the same time. - - Desired result: - Presto is designed to prioritize fast, short tasks at the expense of longer slower tasks. - This experiment allows us to quantify exactly how this preference manifests itself. It is - expected that shorter tasks will complete faster, however, longer tasks should not starve - for more than a couple of minutes at a time. - */ - - System.out.println("Starvation experiment started."); - TaskSpecification slowLeafSpec = new TaskSpecification(LEAF, "slow_leaf", OptionalInt.of(600), 40, 4, new SlowLeafSplitGenerator()); - controller.addTaskSpecification(slowLeafSpec); - - TaskSpecification intermediateSpec = new TaskSpecification(INTERMEDIATE, "intermediate", OptionalInt.of(400), 40, 8, new IntermediateSplitGenerator(wakeupExecutor)); - controller.addTaskSpecification(intermediateSpec); - - TaskSpecification fastLeafSpec = new TaskSpecification(LEAF, "fast_leaf", OptionalInt.of(600), 40, 4, new FastLeafSplitGenerator()); - controller.addTaskSpecification(fastLeafSpec); - - controller.enableSpecification(slowLeafSpec); - controller.enableSpecification(fastLeafSpec); - controller.enableSpecification(intermediateSpec); - - controller.run(); - - for (int i = 0; i < 60; i++) { - SECONDS.sleep(20); - controller.clearPendingQueue(); - } - - System.out.println("Starvation experiment completed."); - } - - private void runExperimentMisbehavingQuanta(SimulationController controller) - throws InterruptedException - { - /* - Designed to simulate how Presto allocates resources in scenarios where there is variance in - quanta run-time between tasks. - - Desired result: - Variance in quanta run time should not affect total accrued scheduled time. It is - acceptable, however, to penalize tasks that use extremely short quanta, as each quanta - incurs scheduling overhead. - */ - - System.out.println("Misbehaving quanta experiment started."); - - TaskSpecification slowLeafSpec = new TaskSpecification(LEAF, "good_leaf", OptionalInt.empty(), 16, 4, new L4LeafSplitGenerator()); - controller.addTaskSpecification(slowLeafSpec); - - TaskSpecification misbehavingLeafSpec = new TaskSpecification(LEAF, "bad_leaf", OptionalInt.empty(), 16, 4, new QuantaExceedingSplitGenerator()); - controller.addTaskSpecification(misbehavingLeafSpec); - - controller.enableSpecification(slowLeafSpec); - controller.enableSpecification(misbehavingLeafSpec); - - controller.run(); - - for (int i = 0; i < 120; i++) { - controller.clearPendingQueue(); - SECONDS.sleep(20); - } - - System.out.println("Misbehaving quanta experiment completed."); - } - - private void runExperimentWithinLevelFairness(SimulationController controller) - throws InterruptedException - { - /* - Designed to simulate how Presto allocates resources to tasks at the same level of the - feedback queue when there is large variance in accrued scheduled time. - - Desired result: - Scheduling within levels should be fair - total accrued time should not affect what - fraction of resources tasks are allocated as long as they are in the same level. - */ - - System.out.println("Level fairness experiment started."); - - TaskSpecification longLeafSpec = new TaskSpecification(INTERMEDIATE, "l4_long", OptionalInt.empty(), 2, 16, new SimpleLeafSplitGenerator(MINUTES.toNanos(4), SECONDS.toNanos(1))); - controller.addTaskSpecification(longLeafSpec); - - TaskSpecification shortLeafSpec = new TaskSpecification(INTERMEDIATE, "l4_short", OptionalInt.empty(), 2, 16, new SimpleLeafSplitGenerator(MINUTES.toNanos(2), SECONDS.toNanos(1))); - controller.addTaskSpecification(shortLeafSpec); - - controller.enableSpecification(longLeafSpec); - controller.run(); - - // wait until long tasks are all well into L4 - MINUTES.sleep(1); - controller.runCallback(); - - // start short leaf tasks - controller.enableSpecification(shortLeafSpec); - - // wait until short tasks hit L4 - SECONDS.sleep(25); - controller.runCallback(); - - // now watch for L4 fairness at this point - MINUTES.sleep(2); - - System.out.println("Level fairness experiment completed."); - } - - private void scheduleStatusPrinter(long start) - { - overallStatusPrintExecutor.scheduleAtFixedRate(() -> { - try { - System.out.printf( - "%6s -- %4s splits (R: %2s L: %3s I: %3s B: %3s W: %3s C: %5s) | %3s tasks (%3s %3s %3s %3s %3s) | Selections: %4s %4s %4s %4s %3s\n", - nanosSince(start), - taskExecutor.getTotalSplits(), - taskExecutor.getRunningSplits(), - taskExecutor.getTotalSplits() - taskExecutor.getIntermediateSplits(), - taskExecutor.getIntermediateSplits(), - taskExecutor.getBlockedSplits(), - taskExecutor.getWaitingSplits(), - taskExecutor.getCompletedSplitsLevel0() + taskExecutor.getCompletedSplitsLevel1() + taskExecutor.getCompletedSplitsLevel2() + taskExecutor.getCompletedSplitsLevel3() + taskExecutor.getCompletedSplitsLevel4(), - taskExecutor.getTasks(), - taskExecutor.getRunningTasksLevel0(), - taskExecutor.getRunningTasksLevel1(), - taskExecutor.getRunningTasksLevel2(), - taskExecutor.getRunningTasksLevel3(), - taskExecutor.getRunningTasksLevel4(), - (int) splitQueue.getSelectedCountLevel0().getOneMinute().getRate(), - (int) splitQueue.getSelectedCountLevel1().getOneMinute().getRate(), - (int) splitQueue.getSelectedCountLevel2().getOneMinute().getRate(), - (int) splitQueue.getSelectedCountLevel3().getOneMinute().getRate(), - (int) splitQueue.getSelectedCountLevel4().getOneMinute().getRate()); - } - catch (Exception ignored) { - } - }, 1, 1, SECONDS); - } - - private static void printSummaryStats(SimulationController controller, TaskExecutor taskExecutor) - { - Map specEnabled = controller.getSpecificationEnabled(); - - ListMultimap completedTasks = controller.getCompletedTasks(); - ListMultimap runningTasks = controller.getRunningTasks(); - Set allTasks = ImmutableSet.builder().addAll(completedTasks.values()).addAll(runningTasks.values()).build(); - - long completedSplits = completedTasks.values().stream().mapToInt(t -> t.getCompletedSplits().size()).sum(); - long runningSplits = runningTasks.values().stream().mapToInt(t -> t.getCompletedSplits().size()).sum(); - - System.out.println("Completed tasks : " + completedTasks.size()); - System.out.println("Remaining tasks : " + runningTasks.size()); - System.out.println("Completed splits: " + completedSplits); - System.out.println("Remaining splits: " + runningSplits); - System.out.println(); - System.out.println("Completed tasks L0: " + taskExecutor.getCompletedTasksLevel0()); - System.out.println("Completed tasks L1: " + taskExecutor.getCompletedTasksLevel1()); - System.out.println("Completed tasks L2: " + taskExecutor.getCompletedTasksLevel2()); - System.out.println("Completed tasks L3: " + taskExecutor.getCompletedTasksLevel3()); - System.out.println("Completed tasks L4: " + taskExecutor.getCompletedTasksLevel4()); - System.out.println(); - System.out.println("Completed splits L0: " + taskExecutor.getCompletedSplitsLevel0()); - System.out.println("Completed splits L1: " + taskExecutor.getCompletedSplitsLevel1()); - System.out.println("Completed splits L2: " + taskExecutor.getCompletedSplitsLevel2()); - System.out.println("Completed splits L3: " + taskExecutor.getCompletedSplitsLevel3()); - System.out.println("Completed splits L4: " + taskExecutor.getCompletedSplitsLevel4()); - - Histogram levelsHistogram = fromContinuous(ImmutableList.of( - MILLISECONDS.toNanos(0L), - MILLISECONDS.toNanos(1_000), - MILLISECONDS.toNanos(10_000L), - MILLISECONDS.toNanos(60_000L), - MILLISECONDS.toNanos(300_000L), - HOURS.toNanos(1), - DAYS.toNanos(1))); - - System.out.println(); - System.out.println("Levels - Completed Task Processed Time"); - levelsHistogram.printDistribution( - completedTasks.values().stream().filter(t -> t.getSpecification().getType() == LEAF).collect(Collectors.toList()), - SimulationTask::getScheduledTimeNanos, - SimulationTask::getProcessedTimeNanos, - Duration::succinctNanos, - TaskExecutorSimulator::formatNanos); - - System.out.println(); - System.out.println("Levels - Running Task Processed Time"); - levelsHistogram.printDistribution( - runningTasks.values().stream().filter(t -> t.getSpecification().getType() == LEAF).collect(Collectors.toList()), - SimulationTask::getScheduledTimeNanos, - SimulationTask::getProcessedTimeNanos, - Duration::succinctNanos, - TaskExecutorSimulator::formatNanos); - - System.out.println(); - System.out.println("Levels - All Task Wait Time"); - levelsHistogram.printDistribution( - runningTasks.values().stream().filter(t -> t.getSpecification().getType() == LEAF).collect(Collectors.toList()), - SimulationTask::getScheduledTimeNanos, - SimulationTask::getTotalWaitTimeNanos, - Duration::succinctNanos, - TaskExecutorSimulator::formatNanos); - - System.out.println(); - System.out.println("Specification - Processed time"); - Set specifications = runningTasks.values().stream().map(t -> t.getSpecification().getName()).collect(Collectors.toSet()); - fromDiscrete(specifications).printDistribution( - allTasks, - t -> t.getSpecification().getName(), - SimulationTask::getProcessedTimeNanos, - identity(), - TaskExecutorSimulator::formatNanos); - - System.out.println(); - System.out.println("Specification - Wait time"); - fromDiscrete(specifications).printDistribution( - allTasks, - t -> t.getSpecification().getName(), - SimulationTask::getTotalWaitTimeNanos, - identity(), - TaskExecutorSimulator::formatNanos); - - System.out.println(); - System.out.println("Breakdown by specification"); - System.out.println("##########################"); - for (TaskSpecification specification : specEnabled.keySet()) { - List allSpecificationTasks = ImmutableList.builder() - .addAll(completedTasks.get(specification)) - .addAll(runningTasks.get(specification)) - .build(); - - System.out.println(specification.getName()); - System.out.println("============================="); - System.out.println("Completed tasks : " + completedTasks.get(specification).size()); - System.out.println("In-progress tasks : " + runningTasks.get(specification).size()); - System.out.println("Total tasks : " + specification.getTotalTasks()); - System.out.println("Splits/task : " + specification.getNumSplitsPerTask()); - System.out.println("Current required time : " + succinctNanos(allSpecificationTasks.stream().mapToLong(SimulationTask::getScheduledTimeNanos).sum())); - System.out.println("Completed scheduled time : " + succinctNanos(allSpecificationTasks.stream().mapToLong(SimulationTask::getProcessedTimeNanos).sum())); - System.out.println("Total wait time : " + succinctNanos(allSpecificationTasks.stream().mapToLong(SimulationTask::getTotalWaitTimeNanos).sum())); - - System.out.println(); - System.out.println("All Tasks by Scheduled time - Processed Time"); - levelsHistogram.printDistribution( - allSpecificationTasks, - SimulationTask::getScheduledTimeNanos, - SimulationTask::getProcessedTimeNanos, - Duration::succinctNanos, - TaskExecutorSimulator::formatNanos); - - System.out.println(); - System.out.println("All Tasks by Scheduled time - Wait Time"); - levelsHistogram.printDistribution( - allSpecificationTasks, - SimulationTask::getScheduledTimeNanos, - SimulationTask::getTotalWaitTimeNanos, - Duration::succinctNanos, - TaskExecutorSimulator::formatNanos); - - System.out.println(); - System.out.println("Complete Tasks by Scheduled time - Wait Time"); - levelsHistogram.printDistribution( - completedTasks.get(specification), - SimulationTask::getScheduledTimeNanos, - SimulationTask::getTotalWaitTimeNanos, - Duration::succinctNanos, - TaskExecutorSimulator::formatNanos); - } - } - - private static String formatNanos(List list) - { - LongSummaryStatistics stats = list.stream().mapToLong(Long::new).summaryStatistics(); - return String.format("Min: %8s Max: %8s Avg: %8s Sum: %8s", - succinctNanos(stats.getMin() == Long.MAX_VALUE ? 0 : stats.getMin()), - succinctNanos(stats.getMax() == Long.MIN_VALUE ? 0 : stats.getMax()), - succinctNanos((long) stats.getAverage()), - succinctNanos(stats.getSum())); - } -} From 27d38bf0f807b6ba70889283d94cbcb0b2180da7 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Wed, 5 Jun 2024 12:33:25 -0400 Subject: [PATCH 28/86] Remove format() calls for constant strings with no arguments --- .../facebook/presto/plugin/bigquery/BigQuerySplitManager.java | 3 +-- .../facebook/presto/druid/segment/SmooshedColumnSource.java | 2 +- .../AbstractSqlInvokedFunctionNamespaceManager.java | 2 +- .../src/main/java/com/facebook/presto/hive/HiveMetadata.java | 2 +- .../facebook/presto/hive/TestHiveIntegrationSmokeTest.java | 2 +- .../presto/nativeworker/PrestoNativeQueryRunnerUtils.java | 2 +- .../java/com/facebook/presto/pinot/PinotBrokerPageSource.java | 2 +- .../com/facebook/presto/spark/TestPrestoSparkQueryRunner.java | 4 ++-- 8 files changed, 9 insertions(+), 10 deletions(-) diff --git a/presto-bigquery/src/main/java/com/facebook/presto/plugin/bigquery/BigQuerySplitManager.java b/presto-bigquery/src/main/java/com/facebook/presto/plugin/bigquery/BigQuerySplitManager.java index 17b5f335153a..453b31842b55 100644 --- a/presto-bigquery/src/main/java/com/facebook/presto/plugin/bigquery/BigQuerySplitManager.java +++ b/presto-bigquery/src/main/java/com/facebook/presto/plugin/bigquery/BigQuerySplitManager.java @@ -36,7 +36,6 @@ import static com.facebook.presto.plugin.bigquery.BigQueryErrorCode.BIGQUERY_FAILED_TO_EXECUTE_QUERY; import static com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession; import static com.google.common.collect.ImmutableList.toImmutableList; -import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; import static java.util.stream.IntStream.range; @@ -131,7 +130,7 @@ private List createEmptyProjection(TableId tableId, int actualPar return splits; } catch (BigQueryException e) { - throw new PrestoException(BIGQUERY_FAILED_TO_EXECUTE_QUERY, format("Failed to compute empty projection"), e); + throw new PrestoException(BIGQUERY_FAILED_TO_EXECUTE_QUERY, "Failed to compute empty projection", e); } } } diff --git a/presto-druid/src/main/java/com/facebook/presto/druid/segment/SmooshedColumnSource.java b/presto-druid/src/main/java/com/facebook/presto/druid/segment/SmooshedColumnSource.java index 21364a361124..e731bc261764 100644 --- a/presto-druid/src/main/java/com/facebook/presto/druid/segment/SmooshedColumnSource.java +++ b/presto-druid/src/main/java/com/facebook/presto/druid/segment/SmooshedColumnSource.java @@ -82,7 +82,7 @@ private void loadSmooshFileMetadata() BufferedReader in = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(metadata))); String line = in.readLine(); if (line == null) { - throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, format("Malformed metadata file: first line should be version,maxChunkSize,numChunks, got null.")); + throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, "Malformed metadata file: first line should be version,maxChunkSize,numChunks, got null."); } String[] splits = line.split(","); diff --git a/presto-function-namespace-managers/src/main/java/com/facebook/presto/functionNamespace/AbstractSqlInvokedFunctionNamespaceManager.java b/presto-function-namespace-managers/src/main/java/com/facebook/presto/functionNamespace/AbstractSqlInvokedFunctionNamespaceManager.java index f8701da6284b..b0bfdbc34d03 100644 --- a/presto-function-namespace-managers/src/main/java/com/facebook/presto/functionNamespace/AbstractSqlInvokedFunctionNamespaceManager.java +++ b/presto-function-namespace-managers/src/main/java/com/facebook/presto/functionNamespace/AbstractSqlInvokedFunctionNamespaceManager.java @@ -331,7 +331,7 @@ protected ScalarFunctionImplementation sqlInvokedFunctionToImplementation(SqlInv throw new IllegalStateException( format("SqlInvokedFunction %s has BUILTIN implementation type but %s cannot manage BUILTIN functions", function.getSignature().getName(), this.getClass())); case CPP: - throw new IllegalStateException(format("Presto coordinator can not resolve implementation of CPP UDF functions")); + throw new IllegalStateException("Presto coordinator can not resolve implementation of CPP UDF functions"); default: throw new IllegalStateException(format("Unknown function implementation type: %s", implementationType)); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java index 95a082c4ea0f..cb55230ed41d 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java @@ -3509,7 +3509,7 @@ protected Optional getTableEncryptionPropertiesFromTa } if (seenColumns.contains(columnWithSubfield.toString())) { - throw new PrestoException(INVALID_TABLE_PROPERTY, format("The same column/subfield cannot have 2 encryption keys")); + throw new PrestoException(INVALID_TABLE_PROPERTY, "The same column/subfield cannot have 2 encryption keys"); } if (columnWithSubfield.getSubfieldPath().isPresent()) { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java index 815b60088109..27a1df19f6c1 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java @@ -6395,7 +6395,7 @@ public void testAddTableConstraints() // Negative tests assertQueryFails(addPrimaryKeyStmt, format("Primary key already exists for: %s.%s", getSession().getSchema().get(), tableName)); - assertQueryFails(addUniqueConstraintStmt, format("Constraint already exists: 'uq3'")); + assertQueryFails(addUniqueConstraintStmt, "Constraint already exists: 'uq3'"); String dropNonExistentConstraint = format("ALTER TABLE %s.%s.%s DROP CONSTRAINT missingconstraint", getSession().getCatalog().get(), getSession().getSchema().get(), tableName); assertQueryFails(dropNonExistentConstraint, "Constraint 'missingconstraint' not found"); diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java index 69bb2433b3f1..e21d87f0182a 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java @@ -466,7 +466,7 @@ public static Optional> getExternalWorkerLaunc } else { Files.write(catalogDirectoryPath.resolve(format("%s.properties", catalogName)), - format("connector.name=hive").getBytes()); + "connector.name=hive".getBytes()); } // Add catalog with caching always enabled. Files.write(catalogDirectoryPath.resolve(format("%scached.properties", catalogName)), diff --git a/presto-pinot-toolkit/src/main/java/com/facebook/presto/pinot/PinotBrokerPageSource.java b/presto-pinot-toolkit/src/main/java/com/facebook/presto/pinot/PinotBrokerPageSource.java index f61319de689c..2e5e3aa2093b 100644 --- a/presto-pinot-toolkit/src/main/java/com/facebook/presto/pinot/PinotBrokerPageSource.java +++ b/presto-pinot-toolkit/src/main/java/com/facebook/presto/pinot/PinotBrokerPageSource.java @@ -391,7 +391,7 @@ public int populateFromQueryResults( throw new PinotException( PINOT_UNEXPECTED_RESPONSE, Optional.of(sql), - String.format("Expected data schema in the response")); + "Expected data schema in the response"); } JsonNode columnDataTypes = dataSchema.get("columnDataTypes"); JsonNode columnNames = dataSchema.get("columnNames"); diff --git a/presto-spark-base/src/test/java/com/facebook/presto/spark/TestPrestoSparkQueryRunner.java b/presto-spark-base/src/test/java/com/facebook/presto/spark/TestPrestoSparkQueryRunner.java index 31f227ff76c1..1a030f4184c0 100644 --- a/presto-spark-base/src/test/java/com/facebook/presto/spark/TestPrestoSparkQueryRunner.java +++ b/presto-spark-base/src/test/java/com/facebook/presto/spark/TestPrestoSparkQueryRunner.java @@ -133,10 +133,10 @@ public void testZeroFileCreatorForBucketedTable() { assertUpdate( getSession(), - format("CREATE TABLE hive.hive_test.test_hive_orders_bucketed_join_zero_file WITH (bucketed_by=array['orderkey'], bucket_count=8) AS " + + "CREATE TABLE hive.hive_test.test_hive_orders_bucketed_join_zero_file WITH (bucketed_by=array['orderkey'], bucket_count=8) AS " + "SELECT orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment " + "FROM orders_bucketed " + - "WHERE orderkey = 1"), + "WHERE orderkey = 1", 1); } From 6c57a3e3c299f016bab7ba7481f188d58f2ace83 Mon Sep 17 00:00:00 2001 From: wangd Date: Wed, 25 Sep 2024 09:34:20 +0800 Subject: [PATCH 29/86] [Iceberg]Support timestamp without timezone in time travel expressions --- .../src/main/sphinx/connector/iceberg.rst | 11 +- .../iceberg/IcebergAbstractMetadata.java | 6 + .../iceberg/TestIcebergTableVersion.java | 106 ++++++++++++++++-- .../sql/analyzer/StatementAnalyzer.java | 5 +- 4 files changed, 117 insertions(+), 11 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/iceberg.rst b/presto-docs/src/main/sphinx/connector/iceberg.rst index 727ba555f0fb..3cad43804f08 100644 --- a/presto-docs/src/main/sphinx/connector/iceberg.rst +++ b/presto-docs/src/main/sphinx/connector/iceberg.rst @@ -1631,9 +1631,11 @@ In this example, SYSTEM_TIME can be used as an alias for TIMESTAMP. // In following query, timestamp string is matching with second inserted record. SELECT * FROM ctas_nation FOR TIMESTAMP AS OF TIMESTAMP '2023-10-17 13:29:46.822 America/Los_Angeles'; + SELECT * FROM ctas_nation FOR TIMESTAMP AS OF TIMESTAMP '2023-10-17 13:29:46.822'; // Same example using SYSTEM_TIME as an alias for TIMESTAMP SELECT * FROM ctas_nation FOR SYSTEM_TIME AS OF TIMESTAMP '2023-10-17 13:29:46.822 America/Los_Angeles'; + SELECT * FROM ctas_nation FOR SYSTEM_TIME AS OF TIMESTAMP '2023-10-17 13:29:46.822'; .. code-block:: text @@ -1643,8 +1645,12 @@ In this example, SYSTEM_TIME can be used as an alias for TIMESTAMP. 20 | canada | 2 | comment (2 rows) -The option following FOR TIMESTAMP AS OF can accept any expression that returns a timestamp with time zone value. -For example, `TIMESTAMP '2023-10-17 13:29:46.822 America/Los_Angeles'` is a constant string for the expression. +.. note:: + + Timestamp without timezone will be parsed and rendered in the session time zone. See `TIMESTAMP `_. + +The option following FOR TIMESTAMP AS OF can accept any expression that returns a timestamp or timestamp with time zone value. +For example, `TIMESTAMP '2023-10-17 13:29:46.822 America/Los_Angeles'` and `TIMESTAMP '2023-10-17 13:29:46.822'` are both valid timestamps. The first specifies the timestamp within the timezone `America/Los_Angeles`. The second will use the timestamp based on the user's session timezone. In the following query, the expression CURRENT_TIMESTAMP returns the current timestamp with time zone value. .. code-block:: sql @@ -1665,6 +1671,7 @@ In the following query, the expression CURRENT_TIMESTAMP returns the current tim // In following query, timestamp string is matching with second inserted record. // BEFORE clause returns first record which is less than timestamp of the second record. SELECT * FROM ctas_nation FOR TIMESTAMP BEFORE TIMESTAMP '2023-10-17 13:29:46.822 America/Los_Angeles'; + SELECT * FROM ctas_nation FOR TIMESTAMP BEFORE TIMESTAMP '2023-10-17 13:29:46.822'; .. code-block:: text diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java index c920f22e6f6b..f60e13abd2b0 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java @@ -19,6 +19,7 @@ import com.facebook.presto.common.predicate.TupleDomain; import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.SqlTimestampWithTimeZone; +import com.facebook.presto.common.type.TimestampType; import com.facebook.presto.common.type.TimestampWithTimeZoneType; import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.common.type.VarcharType; @@ -981,6 +982,11 @@ private static long getSnapshotIdForTableVersion(Table table, ConnectorTableVers long millisUtc = new SqlTimestampWithTimeZone((long) tableVersion.getTableVersion()).getMillisUtc(); return getSnapshotIdTimeOperator(table, millisUtc, tableVersion.getVersionOperator()); } + else if (tableVersion.getVersionExpressionType() instanceof TimestampType) { + long timestampValue = (long) tableVersion.getTableVersion(); + long millisUtc = ((TimestampType) tableVersion.getVersionExpressionType()).getPrecision().toMillis(timestampValue); + return getSnapshotIdTimeOperator(table, millisUtc, tableVersion.getVersionOperator()); + } throw new PrestoException(NOT_SUPPORTED, "Unsupported table version expression type: " + tableVersion.getVersionExpressionType()); } if (tableVersion.getVersionType() == VersionType.VERSION) { diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergTableVersion.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergTableVersion.java index 8da55ce029aa..7f75dfcea61c 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergTableVersion.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergTableVersion.java @@ -14,21 +14,31 @@ package com.facebook.presto.iceberg; import com.facebook.presto.Session; +import com.facebook.presto.Session.SessionBuilder; +import com.facebook.presto.common.type.TimeZoneKey; import com.facebook.presto.testing.QueryRunner; import com.facebook.presto.tests.AbstractTestQueryFramework; import com.facebook.presto.tests.DistributedQueryRunner; import com.google.common.collect.ImmutableMap; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.nio.file.Path; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; import java.util.Map; +import static com.facebook.presto.SystemSessionProperties.LEGACY_TIMESTAMP; import static com.facebook.presto.iceberg.CatalogType.HIVE; import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static com.facebook.presto.iceberg.IcebergQueryRunner.getIcebergDataDirectoryPath; import static com.facebook.presto.testing.TestingSession.testSessionBuilder; +import static java.lang.String.format; +import static org.testng.Assert.assertTrue; public class TestIcebergTableVersion extends AbstractTestQueryFramework @@ -271,6 +281,55 @@ public void testTableVersionMisc() assertQuery("SELECT count(*) FROM " + viewName3 + " INNER JOIN " + viewName4 + " ON " + viewName3 + ".id = " + viewName4 + ".id", "VALUES 2"); } + @DataProvider(name = "timezones") + public Object[][] timezones() + { + return new Object[][] { + {"UTC", true}, + {"America/Los_Angeles", true}, + {"Asia/Shanghai", true}, + {"UTC", false}}; + } + + @Test(dataProvider = "timezones") + public void testTableVersionWithTimestamp(String zoneId, boolean legacyTimestamp) + { + Session session = sessionForTimezone(zoneId, legacyTimestamp); + String tableName = schemaName + "." + "table_version_with_timestamp"; + try { + assertUpdate(session, "CREATE TABLE " + tableName + " (id integer, desc varchar) WITH(partitioning = ARRAY['id'])"); + assertUpdate(session, "INSERT INTO " + tableName + " VALUES(1, 'aaa')", 1); + waitUntilAfter(System.currentTimeMillis()); + + long timestampMillis1 = System.currentTimeMillis(); + String timestampWithoutTZ1 = getTimestampString(timestampMillis1, zoneId); + waitUntilAfter(timestampMillis1); + + assertUpdate(session, "INSERT INTO " + tableName + " VALUES(2, 'bbb')", 1); + waitUntilAfter(System.currentTimeMillis()); + + long timestampMillis2 = System.currentTimeMillis(); + String timestampWithoutTZ2 = getTimestampString(timestampMillis2, zoneId); + waitUntilAfter(timestampMillis2); + + assertUpdate(session, "INSERT INTO " + tableName + " VALUES(3, 'ccc')", 1); + waitUntilAfter(System.currentTimeMillis()); + + long timestampMillis3 = System.currentTimeMillis(); + String timestampWithoutTZ3 = getTimestampString(timestampMillis3, zoneId); + + assertQuery(session, "SELECT desc FROM " + tableName + " FOR TIMESTAMP AS OF TIMESTAMP " + "'" + timestampWithoutTZ1 + "'", "VALUES 'aaa'"); + assertQuery(session, "SELECT desc FROM " + tableName + " FOR TIMESTAMP BEFORE TIMESTAMP " + "'" + timestampWithoutTZ1 + "'", "VALUES 'aaa'"); + assertQuery(session, "SELECT desc FROM " + tableName + " FOR TIMESTAMP AS OF TIMESTAMP " + "'" + timestampWithoutTZ2 + "'", "VALUES 'aaa', 'bbb'"); + assertQuery(session, "SELECT desc FROM " + tableName + " FOR TIMESTAMP BEFORE TIMESTAMP " + "'" + timestampWithoutTZ2 + "'", "VALUES 'aaa', 'bbb'"); + assertQuery(session, "SELECT desc FROM " + tableName + " FOR TIMESTAMP AS OF TIMESTAMP " + "'" + timestampWithoutTZ3 + "'", "VALUES 'aaa', 'bbb', 'ccc'"); + assertQuery(session, "SELECT desc FROM " + tableName + " FOR TIMESTAMP BEFORE TIMESTAMP " + "'" + timestampWithoutTZ3 + "'", "VALUES 'aaa', 'bbb', 'ccc'"); + } + finally { + assertQuerySucceeds("DROP TABLE IF EXISTS " + tableName); + } + } + @Test public void testTableVersionErrors() { @@ -284,23 +343,56 @@ public void testTableVersionErrors() assertQueryFails("SELECT desc FROM " + tableName2 + " FOR VERSION AS OF " + tab2VersionId1 + " - " + tab2VersionId1, "Iceberg snapshot ID does not exists: 0"); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR VERSION AS OF CAST (100 AS BIGINT)", "Iceberg snapshot ID does not exists: 100"); - assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF 100", ".* Type integer is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone."); - assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF 'bad'", ".* Type varchar\\(3\\) is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone."); + assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF 100", ".* Type integer is invalid. Supported table version AS OF/BEFORE expression type is Timestamp or Timestamp with Time Zone."); + assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF 'bad'", ".* Type varchar\\(3\\) is invalid. Supported table version AS OF/BEFORE expression type is Timestamp or Timestamp with Time Zone."); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF id", ".* cannot be resolved"); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF (SELECT CURRENT_TIMESTAMP)", ".* Constant expression cannot contain a subquery"); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF NULL", "Table version AS OF/BEFORE expression cannot be NULL for .*"); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF TIMESTAMP " + "'" + tab2Timestamp1 + "' - INTERVAL '1' MONTH", "No history found based on timestamp for table \"test_tt_schema\".\"test_table_version_tab2\""); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF CAST ('2023-01-01' AS TIMESTAMP WITH TIME ZONE)", "No history found based on timestamp for table \"test_tt_schema\".\"test_table_version_tab2\""); - assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF CAST ('2023-01-01' AS TIMESTAMP)", ".* Type timestamp is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone."); - assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF CAST ('2023-01-01' AS DATE)", ".* Type date is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone."); - assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF CURRENT_DATE", ".* Type date is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone."); - assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF TIMESTAMP '2023-01-01 00:00:00.000'", ".* Type timestamp is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone."); + assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF CAST ('2023-01-01' AS TIMESTAMP)", "No history found based on timestamp for table \"test_tt_schema\".\"test_table_version_tab2\""); + assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF CAST ('2023-01-01' AS DATE)", ".* Type date is invalid. Supported table version AS OF/BEFORE expression type is Timestamp or Timestamp with Time Zone."); + assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF CURRENT_DATE", ".* Type date is invalid. Supported table version AS OF/BEFORE expression type is Timestamp or Timestamp with Time Zone."); + assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP AS OF TIMESTAMP '2023-01-01 00:00:00.000'", "No history found based on timestamp for table \"test_tt_schema\".\"test_table_version_tab2\""); assertQueryFails("SELECT desc FROM " + tableName1 + " FOR VERSION BEFORE " + tab1VersionId1 + " ORDER BY 1", "No history found based on timestamp for table \"test_tt_schema\".\"test_table_version_tab1\""); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP BEFORE TIMESTAMP " + "'" + tab2Timestamp1 + "' - INTERVAL '1' MONTH", "No history found based on timestamp for table \"test_tt_schema\".\"test_table_version_tab2\""); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR VERSION BEFORE 100", ".* Type integer is invalid. Supported table version AS OF/BEFORE expression type is BIGINT or VARCHAR"); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR VERSION BEFORE " + tab2VersionId1 + " - " + tab2VersionId1, "Iceberg snapshot ID does not exists: 0"); - assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP BEFORE 'bad'", ".* Type varchar\\(3\\) is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone."); + assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP BEFORE 'bad'", ".* Type varchar\\(3\\) is invalid. Supported table version AS OF/BEFORE expression type is Timestamp or Timestamp with Time Zone."); assertQueryFails("SELECT desc FROM " + tableName2 + " FOR TIMESTAMP BEFORE NULL", "Table version AS OF/BEFORE expression cannot be NULL for .*"); } + + private Session sessionForTimezone(String zoneId, boolean legacyTimestamp) + { + SessionBuilder sessionBuilder = Session.builder(getSession()) + .setSystemProperty(LEGACY_TIMESTAMP, String.valueOf(legacyTimestamp)); + if (legacyTimestamp) { + sessionBuilder.setTimeZoneKey(TimeZoneKey.getTimeZoneKey(zoneId)); + } + return sessionBuilder.build(); + } + + private long waitUntilAfter(long snapshotTimeMillis) + { + long currentTimeMillis = System.currentTimeMillis(); + assertTrue(snapshotTimeMillis - currentTimeMillis <= 10, + format("Snapshot time %s is greater than the current time %s by more than 10ms", snapshotTimeMillis, currentTimeMillis)); + + while (currentTimeMillis <= snapshotTimeMillis) { + currentTimeMillis = System.currentTimeMillis(); + } + return currentTimeMillis; + } + + private String getTimestampString(long timeMillsUtc, String zoneId) + { + Instant instant = Instant.ofEpochMilli(timeMillsUtc); + LocalDateTime localDateTime = instant + .atZone(ZoneId.of(zoneId)) + .toLocalDateTime(); + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"); + formatter = formatter.withZone(ZoneId.of(zoneId)); + return localDateTime.format(formatter); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java index 49297ff101fc..0bd1e9030404 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java @@ -27,6 +27,7 @@ import com.facebook.presto.common.type.MapType; import com.facebook.presto.common.type.RealType; import com.facebook.presto.common.type.RowType; +import com.facebook.presto.common.type.TimestampType; import com.facebook.presto.common.type.TimestampWithTimeZoneType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; @@ -1415,9 +1416,9 @@ private Optional processTableVersion(Table table, QualifiedObjectNa } Object evalStateExpr = evaluateConstantExpression(stateExpr, stateExprType, metadata, session, analysis.getParameters()); if (tableVersionType == TIMESTAMP) { - if (!(stateExprType instanceof TimestampWithTimeZoneType)) { + if (!(stateExprType instanceof TimestampWithTimeZoneType || stateExprType instanceof TimestampType)) { throw new SemanticException(TYPE_MISMATCH, stateExpr, - "Type %s is invalid. Supported table version AS OF/BEFORE expression type is Timestamp with Time Zone.", + "Type %s is invalid. Supported table version AS OF/BEFORE expression type is Timestamp or Timestamp with Time Zone.", stateExprType.getDisplayName()); } } From a8c1ee397a2ab74e5e673edaef156baf2c900114 Mon Sep 17 00:00:00 2001 From: Steve Burnett Date: Wed, 25 Sep 2024 13:40:31 -0400 Subject: [PATCH 30/86] Add doc for driver.cancel-tasks-with-stuck-operators-threshold-ms --- presto-docs/src/main/sphinx/presto_cpp/properties.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/presto-docs/src/main/sphinx/presto_cpp/properties.rst b/presto-docs/src/main/sphinx/presto_cpp/properties.rst index 8c28419548db..cd2abe473270 100644 --- a/presto-docs/src/main/sphinx/presto_cpp/properties.rst +++ b/presto-docs/src/main/sphinx/presto_cpp/properties.rst @@ -32,6 +32,16 @@ Presto C++ workers. These Presto coordinator configuration properties are described here, in alphabetical order. +``driver.cancel-tasks-with-stuck-operators-threshold-ms`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* **Type:** ``string`` +* **Default value:** ``240000`` (40 minutes) + + Cancels any task when at least one operator has been stuck for at + least the time specified by this threshold. + + Set this property to ``0`` to disable canceling. + ``experimental.table-writer-merge-operator-enabled`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 375dd5a460343cf4bceeaffb5c276875cab26c2c Mon Sep 17 00:00:00 2001 From: Ananthu-Nair Date: Fri, 27 Sep 2024 00:32:16 +0530 Subject: [PATCH 31/86] Add TLS support for Prometheus connector --- .../src/main/sphinx/connector/prometheus.rst | 8 ++ .../plugin/prometheus/PrometheusClient.java | 90 +++++++++++++++++-- .../prometheus/PrometheusConnectorConfig.java | 51 +++++++++++ .../prometheus/PrometheusErrorCode.java | 3 +- .../TestPrometheusConnectorConfig.java | 14 ++- 5 files changed, 159 insertions(+), 7 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/prometheus.rst b/presto-docs/src/main/sphinx/connector/prometheus.rst index 317076e13fc0..035be908cb7a 100644 --- a/presto-docs/src/main/sphinx/connector/prometheus.rst +++ b/presto-docs/src/main/sphinx/connector/prometheus.rst @@ -25,6 +25,10 @@ replacing the properties as appropriate: prometheus.max-query-duration=1h prometheus.cache-ttl=30s prometheus.bearer-token-file=/path/to/bearer/token/file + prometheus.tls.enabled=true + prometheus.tls.truststore-path=/path/to/truststore + prometheus.tls.truststore-password=truststorePassword + verify-host-name=true Configuration Properties ------------------------ @@ -39,6 +43,10 @@ Property Name Description ``prometheus.max-query-duration`` Width of overall query to Prometheus, will be divided into query-chunk-duration queries ``prometheus.cache-ttl`` How long the config values are cached ``prometheus.bearer-token-file`` File holding bearer token for access to Prometheus +``prometheus.tls.enabled`` Enable or disable TLS for securing communication with Prometheus +``prometheus.tls.truststore-path`` Path to the trust store containing the SSL certificates +``prometheus.tls.truststore-password`` Password to access the trust store for TLS verification +``verify-host-name`` Enable or disable hostname verification in the SSL certificate ======================================== ============================================================================================ Not Exhausting Your Presto Available Heap diff --git a/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusClient.java b/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusClient.java index 179cf1c7ac60..555e5aba9e78 100644 --- a/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusClient.java +++ b/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusClient.java @@ -28,12 +28,24 @@ import okhttp3.Response; import javax.inject.Inject; +import javax.net.ssl.HostnameVerifier; +import javax.net.ssl.SSLContext; +import javax.net.ssl.SSLHandshakeException; +import javax.net.ssl.SSLPeerUnverifiedException; +import javax.net.ssl.TrustManagerFactory; +import javax.net.ssl.X509TrustManager; import java.io.File; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Files; +import java.security.KeyManagementException; +import java.security.KeyStore; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; import java.util.List; import java.util.Map; import java.util.Optional; @@ -42,6 +54,7 @@ import static com.facebook.presto.common.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.plugin.prometheus.PrometheusColumn.mapType; +import static com.facebook.presto.plugin.prometheus.PrometheusErrorCode.PROMETHEUS_SECURE_COMMUNICATION_ERROR; import static com.facebook.presto.plugin.prometheus.PrometheusErrorCode.PROMETHEUS_TABLES_METRICS_RETRIEVE_ERROR; import static com.facebook.presto.plugin.prometheus.PrometheusErrorCode.PROMETHEUS_UNKNOWN_ERROR; import static java.nio.charset.StandardCharsets.UTF_8; @@ -57,13 +70,14 @@ public class PrometheusClient private final Optional bearerTokenFile; private final Supplier> tableSupplier; private final Type varcharMapType; + private PrometheusConnectorConfig config; private static final Logger log = Logger.get(PrometheusClient.class); @Inject public PrometheusClient(PrometheusConnectorConfig config, JsonCodec> metricCodec, TypeManager typeManager) { - requireNonNull(config, "config is null"); + this.config = requireNonNull(config, "config is null"); requireNonNull(metricCodec, "metricCodec is null"); requireNonNull(typeManager, "typeManager is null"); @@ -148,17 +162,54 @@ public byte[] fetchUri(URI uri) { Request.Builder requestBuilder = new Request.Builder().url(uri.toString()); getBearerAuthInfoFromFile().map(bearerToken -> requestBuilder.header("Authorization", "Bearer " + bearerToken)); - Response response; try { - response = httpClient.newCall(requestBuilder.build()).execute(); - if (response.isSuccessful() && response.body() != null) { - return response.body().bytes(); + if (config.isTlsEnabled()) { + OkHttpClient httpClient; + HostnameVerifier hostnameVerifier = (hostname, session) -> true; + if (!config.getVerifyHostName()) { + httpClient = new OkHttpClient.Builder() + .hostnameVerifier(hostnameVerifier) + .sslSocketFactory(getSSLContext().getSocketFactory(), (X509TrustManager) getTrustManagerFactory().getTrustManagers()[0]) + .build(); + } + else { + httpClient = new OkHttpClient.Builder() + .sslSocketFactory(getSSLContext().getSocketFactory(), (X509TrustManager) getTrustManagerFactory().getTrustManagers()[0]) + .build(); + } + response = httpClient.newCall(requestBuilder.build()).execute(); + if (response.isSuccessful() && response.body() != null) { + return response.body().bytes(); + } + } + else { + response = httpClient.newCall(requestBuilder.build()).execute(); + if (response.isSuccessful() && response.body() != null) { + return response.body().bytes(); + } } } + catch (SSLHandshakeException e) { + throw new PrestoException(PROMETHEUS_SECURE_COMMUNICATION_ERROR, "An SSL handshake error occurred while establishing a secure connection. Try the following measures to resolve the error:\n\n" + "- Upload a valid SSL certificate for authentication\n- Verify the expiration status of the uploaded certificate.\n- If you are connecting with SSL, enable SSL on both ends of the connection.\n", e); + } + catch (SSLPeerUnverifiedException e) { + throw new PrestoException(PROMETHEUS_SECURE_COMMUNICATION_ERROR, "Peer verification failed. These measures might resolve the issue \n" + + "- Add correct Hostname in the SSL certificate's SAN list \n" + + "- The certificate chain might be incomplete. Check your SSL certificate\n", e); + } catch (IOException e) { throw new PrestoException(PROMETHEUS_UNKNOWN_ERROR, "Error reading metrics", e); } + catch (NoSuchAlgorithmException e) { + throw new PrestoException(PROMETHEUS_SECURE_COMMUNICATION_ERROR, "Requested cryptographic algorithm is not available", e); + } + catch (KeyStoreException e) { + throw new PrestoException(PROMETHEUS_SECURE_COMMUNICATION_ERROR, "Keystore operation error", e); + } + catch (KeyManagementException e) { + throw new PrestoException(PROMETHEUS_SECURE_COMMUNICATION_ERROR, "Key management operation error", e); + } throw new PrestoException(PROMETHEUS_UNKNOWN_ERROR, "Bad response " + response.code() + response.message()); } @@ -174,4 +225,33 @@ private Optional getBearerAuthInfoFromFile() } }); } + + private SSLContext getSSLContext() + throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException + { + SSLContext sslContext = SSLContext.getInstance("TLS"); + sslContext.init(null, getTrustManagerFactory().getTrustManagers(), new java.security.SecureRandom()); + return sslContext; + } + + public TrustManagerFactory getTrustManagerFactory() + throws KeyStoreException + { + KeyStore truststore = KeyStore.getInstance(KeyStore.getDefaultType()); + try { + truststore.load(new URL("file://" + config.getTrustStorePath()).openStream(), config.getTruststorePassword().toCharArray()); + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm()); + trustManagerFactory.init(truststore); + return trustManagerFactory; + } + catch (IOException e) { + throw new PrestoException(PROMETHEUS_UNKNOWN_ERROR, "I/O Error", e); + } + catch (NoSuchAlgorithmException e) { + throw new PrestoException(PROMETHEUS_SECURE_COMMUNICATION_ERROR, "Requested cryptographic algorithm is not available", e); + } + catch (CertificateException e) { + throw new PrestoException(PROMETHEUS_SECURE_COMMUNICATION_ERROR, "Error while parsing or validating the certificate", e); + } + } } diff --git a/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusConnectorConfig.java b/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusConnectorConfig.java index db8d1d391205..dda73ed5fe81 100644 --- a/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusConnectorConfig.java +++ b/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusConnectorConfig.java @@ -36,6 +36,10 @@ public class PrometheusConnectorConfig private Duration maxQueryRangeDuration = new Duration(1, TimeUnit.HOURS); private Duration cacheDuration = new Duration(30, TimeUnit.SECONDS); private File bearerTokenFile; + private boolean tlsEnabled; + private String trustStorePath; + private String truststorePassword; + private boolean verifyHostName; @NotNull public URI getPrometheusURI() @@ -115,4 +119,51 @@ public void checkConfig() throw new ConfigurationException(ImmutableList.of(new Message("prometheus.max-query-duration must be greater than prometheus.query-chunk-duration"))); } } + public boolean isTlsEnabled() + { + return tlsEnabled; + } + + @Config("prometheus.tls.enabled") + public PrometheusConnectorConfig setTlsEnabled(boolean tlsEnabled) + { + this.tlsEnabled = tlsEnabled; + return this; + } + + public String getTrustStorePath() + { + return trustStorePath; + } + + @Config("prometheus.tls.truststore-path") + public PrometheusConnectorConfig setTrustStorePath(String path) + { + this.trustStorePath = path; + return this; + } + + public String getTruststorePassword() + { + return truststorePassword; + } + + @Config("prometheus.tls.truststore-password") + public PrometheusConnectorConfig setTruststorePassword(String password) + { + this.truststorePassword = password; + return this; + } + + public boolean getVerifyHostName() + { + return verifyHostName; + } + + @Config("verify-host-name") + public PrometheusConnectorConfig setVerifyHostName(boolean val) + { + this.verifyHostName = val; + return this; + } } diff --git a/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusErrorCode.java b/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusErrorCode.java index fe1edd4be60e..606ba0a90435 100644 --- a/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusErrorCode.java +++ b/presto-prometheus/src/main/java/com/facebook/presto/plugin/prometheus/PrometheusErrorCode.java @@ -26,7 +26,8 @@ public enum PrometheusErrorCode PROMETHEUS_UNKNOWN_ERROR(0, EXTERNAL), PROMETHEUS_TABLES_METRICS_RETRIEVE_ERROR(1, USER_ERROR), PROMETHEUS_PARSE_ERROR(2, EXTERNAL), - PROMETHEUS_OUTPUT_ERROR(3, EXTERNAL); + PROMETHEUS_OUTPUT_ERROR(3, EXTERNAL), + PROMETHEUS_SECURE_COMMUNICATION_ERROR(4, EXTERNAL); private final ErrorCode errorCode; diff --git a/presto-prometheus/src/test/java/com/facebook/presto/plugin/prometheus/TestPrometheusConnectorConfig.java b/presto-prometheus/src/test/java/com/facebook/presto/plugin/prometheus/TestPrometheusConnectorConfig.java index fe602e996605..b2e49fb9007f 100644 --- a/presto-prometheus/src/test/java/com/facebook/presto/plugin/prometheus/TestPrometheusConnectorConfig.java +++ b/presto-prometheus/src/test/java/com/facebook/presto/plugin/prometheus/TestPrometheusConnectorConfig.java @@ -39,7 +39,11 @@ public void testDefaults() .setQueryChunkSizeDuration(Duration.valueOf("10m")) .setMaxQueryRangeDuration(Duration.valueOf("1h")) .setCacheDuration(Duration.valueOf("30s")) - .setBearerTokenFile(null)); + .setBearerTokenFile(null) + .setTlsEnabled(false) + .setTruststorePassword(null) + .setVerifyHostName(false) + .setTrustStorePath(null)); } @Test @@ -51,6 +55,10 @@ public void testExplicitPropertyMappings() .put("prometheus.max-query-duration", "1095d") .put("prometheus.cache-ttl", "60s") .put("prometheus.bearer-token-file", "/tmp/bearer_token.txt") + .put("prometheus.tls.enabled", "true") + .put("prometheus.tls.truststore-password", "password") + .put("prometheus.tls.truststore-path", "/tmp/path/truststore") + .put("verify-host-name", "true") .build(); URI uri = URI.create("file://test.json"); @@ -60,6 +68,10 @@ public void testExplicitPropertyMappings() expected.setMaxQueryRangeDuration(Duration.valueOf("1095d")); expected.setCacheDuration(Duration.valueOf("60s")); expected.setBearerTokenFile(new File("/tmp/bearer_token.txt")); + expected.setTlsEnabled(true); + expected.setTruststorePassword("password"); + expected.setTrustStorePath("/tmp/path/truststore"); + expected.setVerifyHostName(true); assertFullMapping(properties, expected); } From 83851634590467292d6b870b779af89201ed9bc8 Mon Sep 17 00:00:00 2001 From: Rohan Pal Sidhu Date: Wed, 25 Sep 2024 22:52:06 -0700 Subject: [PATCH 32/86] Add QueryType to AccessControlContext --- .../ranger/TestRangerBasedAccessControl.java | 2 +- .../java/com/facebook/presto/Session.java | 157 +++++++++++------- .../presto/SessionRepresentation.java | 3 +- .../presto/dispatcher/DispatchManager.java | 17 +- .../presto/security/AccessControlUtils.java | 6 +- .../presto/server/NoOpSessionSupplier.java | 8 + .../presto/server/QuerySessionSupplier.java | 16 +- .../server/SessionPropertyDefaults.java | 18 +- .../presto/server/SessionSupplier.java | 4 + .../presto/testing/LocalQueryRunner.java | 3 +- .../security/TestAccessControlManager.java | 16 +- .../TestFileBasedSystemAccessControl.java | 2 +- .../server/TestSessionPropertyDefaults.java | 16 +- .../security/TestFileBasedAccessControl.java | 2 +- .../PrestoSparkQueryExecutionFactory.java | 15 +- .../spi/security/AccessControlContext.java | 10 +- .../presto/tests/AbstractTestQueries.java | 3 +- 17 files changed, 187 insertions(+), 111 deletions(-) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/security/ranger/TestRangerBasedAccessControl.java b/presto-hive/src/test/java/com/facebook/presto/hive/security/ranger/TestRangerBasedAccessControl.java index ec0d27388df6..b93d5ac82487 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/security/ranger/TestRangerBasedAccessControl.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/security/ranger/TestRangerBasedAccessControl.java @@ -48,7 +48,7 @@ public class TestRangerBasedAccessControl { public static final ConnectorTransactionHandle TRANSACTION_HANDLE = new ConnectorTransactionHandle() {}; - public static final AccessControlContext CONTEXT = new AccessControlContext(new QueryId("query_id"), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()); + public static final AccessControlContext CONTEXT = new AccessControlContext(new QueryId("query_id"), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()); @Test public void testTablePriviledgesRolesNotAllowed() diff --git a/presto-main/src/main/java/com/facebook/presto/Session.java b/presto-main/src/main/java/com/facebook/presto/Session.java index 24441303ae1d..2e8c35f510d8 100644 --- a/presto-main/src/main/java/com/facebook/presto/Session.java +++ b/presto-main/src/main/java/com/facebook/presto/Session.java @@ -15,6 +15,7 @@ import com.facebook.presto.common.RuntimeStats; import com.facebook.presto.common.function.SqlFunctionProperties; +import com.facebook.presto.common.resourceGroups.QueryType; import com.facebook.presto.common.transaction.TransactionId; import com.facebook.presto.common.type.TimeZoneKey; import com.facebook.presto.cost.PlanCostEstimate; @@ -99,6 +100,7 @@ public final class Session private final Optional tracer; private final WarningCollector warningCollector; private final RuntimeStats runtimeStats; + private final Optional queryType; private final OptimizerInformationCollector optimizerInformationCollector = new OptimizerInformationCollector(); private final OptimizerResultCollector optimizerResultCollector = new OptimizerResultCollector(); @@ -131,7 +133,8 @@ public Session( Map sessionFunctions, Optional tracer, WarningCollector warningCollector, - RuntimeStats runtimeStats) + RuntimeStats runtimeStats, + Optional queryType) { this.queryId = requireNonNull(queryId, "queryId is null"); this.transactionId = requireNonNull(transactionId, "transactionId is null"); @@ -172,7 +175,8 @@ public Session( this.tracer = requireNonNull(tracer, "tracer is null"); this.warningCollector = requireNonNull(warningCollector, "warningCollector is null"); this.runtimeStats = requireNonNull(runtimeStats, "runtimeStats is null"); - this.context = new AccessControlContext(queryId, clientInfo, clientTags, source, warningCollector, runtimeStats); + this.queryType = requireNonNull(queryType, "queryType is null"); + this.context = new AccessControlContext(queryId, clientInfo, clientTags, source, warningCollector, runtimeStats, queryType); } public QueryId getQueryId() @@ -353,6 +357,11 @@ public Map getPlanNodeCostMap() return planNodeCostMap; } + public Optional getQueryType() + { + return queryType; + } + public Session beginTransactionId(TransactionId transactionId, TransactionManager transactionManager, AccessControl accessControl) { requireNonNull(transactionId, "transactionId is null"); @@ -447,63 +456,8 @@ public Session beginTransactionId(TransactionId transactionId, TransactionManage sessionFunctions, tracer, warningCollector, - runtimeStats); - } - - public Session withDefaultProperties( - SystemSessionPropertyConfiguration systemPropertyConfiguration, - Map> catalogPropertyDefaults) - { - requireNonNull(systemPropertyConfiguration, "systemPropertyConfiguration is null"); - requireNonNull(catalogPropertyDefaults, "catalogPropertyDefaults is null"); - - // to remove this check properties must be authenticated and validated as in beginTransactionId - checkState( - !this.transactionId.isPresent() && this.connectorProperties.isEmpty(), - "Session properties cannot be overridden once a transaction is active"); - - Map systemProperties = new HashMap<>(); - systemProperties.putAll(systemPropertyConfiguration.systemPropertyDefaults); - systemProperties.putAll(this.systemProperties); - systemProperties.putAll(systemPropertyConfiguration.systemPropertyOverrides); - - Map> connectorProperties = catalogPropertyDefaults.entrySet().stream() - .map(entry -> Maps.immutableEntry(entry.getKey(), new HashMap<>(entry.getValue()))) - .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); - for (Entry> catalogProperties : this.unprocessedCatalogProperties.entrySet()) { - String catalog = catalogProperties.getKey(); - for (Entry entry : catalogProperties.getValue().entrySet()) { - connectorProperties.computeIfAbsent(catalog, id -> new HashMap<>()) - .put(entry.getKey(), entry.getValue()); - } - } - - return new Session( - queryId, - transactionId, - clientTransactionSupport, - identity, - source, - catalog, - schema, - traceToken, - timeZoneKey, - locale, - remoteUserAddress, - userAgent, - clientInfo, - clientTags, - resourceEstimates, - startTime, - systemProperties, - ImmutableMap.of(), - connectorProperties, - sessionPropertyManager, - preparedStatements, - sessionFunctions, - tracer, - warningCollector, - runtimeStats); + runtimeStats, + queryType); } public ConnectorSession toConnectorSession() @@ -630,6 +584,7 @@ public static class SessionBuilder private final SessionPropertyManager sessionPropertyManager; private final Map preparedStatements = new HashMap<>(); private final Map sessionFunctions = new HashMap<>(); + private Optional queryType = Optional.empty(); private WarningCollector warningCollector = WarningCollector.NOOP; private RuntimeStats runtimeStats = new RuntimeStats(); @@ -665,6 +620,7 @@ private SessionBuilder(Session session) this.tracer = requireNonNull(session.tracer, "tracer is null"); this.warningCollector = requireNonNull(session.warningCollector, "warningCollector is null"); this.runtimeStats = requireNonNull(session.runtimeStats, "runtimeStats is null"); + this.queryType = requireNonNull(session.queryType, "queryType is null"); } public SessionBuilder setQueryId(QueryId queryId) @@ -821,11 +777,57 @@ public SessionBuilder setRuntimeStats(RuntimeStats runtimeStats) return this; } + public SessionBuilder setQueryType(Optional queryType) + { + this.queryType = requireNonNull(queryType, "queryType is null"); + return this; + } + public T getSystemProperty(String name, Class type) { return sessionPropertyManager.decodeSystemPropertyValue(name, systemProperties.get(name), type); } + public WarningCollector getWarningCollector() + { + return this.warningCollector; + } + + public Map getPreparedStatements() + { + return this.preparedStatements; + } + + public Identity getIdentity() + { + return this.identity; + } + + public Optional getSource() + { + return Optional.ofNullable(this.source); + } + + public Set getClientTags() + { + return this.clientTags; + } + + public Optional getClientInfo() + { + return Optional.ofNullable(this.clientInfo); + } + + public Map getSystemProperties() + { + return this.systemProperties; + } + + public Map> getUnprocessedCatalogProperties() + { + return this.catalogSessionProperties; + } + public Session build() { return new Session( @@ -853,7 +855,42 @@ public Session build() sessionFunctions, tracer, warningCollector, - runtimeStats); + runtimeStats, + queryType); + } + + public void applyDefaultProperties(SystemSessionPropertyConfiguration systemPropertyConfiguration, Map> catalogPropertyDefaults) + { + requireNonNull(systemPropertyConfiguration, "systemPropertyConfiguration is null"); + requireNonNull(catalogPropertyDefaults, "catalogPropertyDefaults is null"); + + // to remove this check properties must be authenticated and validated as in beginTransactionId + checkState( + this.transactionId == null && this.connectorProperties.isEmpty(), + "Session properties cannot be overridden once a transaction is active"); + + Map systemProperties = new HashMap<>(); + systemProperties.putAll(systemPropertyConfiguration.systemPropertyDefaults); + systemProperties.putAll(this.systemProperties); + systemProperties.putAll(systemPropertyConfiguration.systemPropertyOverrides); + this.systemProperties.putAll(systemProperties); + + Map> connectorProperties = catalogPropertyDefaults.entrySet().stream() + .map(entry -> Maps.immutableEntry(entry.getKey(), new HashMap<>(entry.getValue()))) + .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); + for (Entry> catalogProperties : this.catalogSessionProperties.entrySet()) { + String catalog = catalogProperties.getKey(); + for (Entry entry : catalogProperties.getValue().entrySet()) { + connectorProperties.computeIfAbsent(catalog, id -> new HashMap<>()).put(entry.getKey(), entry.getValue()); + } + } + + for (Entry> catalogProperties : connectorProperties.entrySet()) { + String catalog = catalogProperties.getKey(); + for (Entry entry : catalogProperties.getValue().entrySet()) { + setCatalogSessionProperty(catalog, entry.getKey(), entry.getValue()); + } + } } } diff --git a/presto-main/src/main/java/com/facebook/presto/SessionRepresentation.java b/presto-main/src/main/java/com/facebook/presto/SessionRepresentation.java index 9089840b8263..7651ab2b9648 100644 --- a/presto-main/src/main/java/com/facebook/presto/SessionRepresentation.java +++ b/presto-main/src/main/java/com/facebook/presto/SessionRepresentation.java @@ -338,6 +338,7 @@ public Session toSession(SessionPropertyManager sessionPropertyManager, Map createQuery(QueryId queryId, String slug, int retryCo private void createQueryInternal(QueryId queryId, String slug, int retryCount, SessionContext sessionContext, String query, ResourceGroupManager resourceGroupManager) { Session session = null; + SessionBuilder sessionBuilder = null; PreparedQuery preparedQuery; try { if (query.length() > maxQueryLength) { @@ -268,16 +270,18 @@ private void createQueryInternal(QueryId queryId, String slug, int retryCoun } // decode session - session = sessionSupplier.createSession(queryId, sessionContext, warningCollectorFactory); + sessionBuilder = sessionSupplier.createSessionBuilder(queryId, sessionContext, warningCollectorFactory); + session = sessionBuilder.build(); // prepare query - AnalyzerOptions analyzerOptions = createAnalyzerOptions(session, session.getWarningCollector()); + AnalyzerOptions analyzerOptions = createAnalyzerOptions(session, sessionBuilder.getWarningCollector()); QueryPreparerProvider queryPreparerProvider = queryPreparerProviderManager.getQueryPreparerProvider(getAnalyzerType(session)); - preparedQuery = queryPreparerProvider.getQueryPreparer().prepareQuery(analyzerOptions, query, session.getPreparedStatements(), session.getWarningCollector()); + preparedQuery = queryPreparerProvider.getQueryPreparer().prepareQuery(analyzerOptions, query, sessionBuilder.getPreparedStatements(), sessionBuilder.getWarningCollector()); query = preparedQuery.getFormattedQuery().orElse(query); // select resource group Optional queryType = preparedQuery.getQueryType(); + sessionBuilder.setQueryType(queryType); SelectionContext selectionContext = resourceGroupManager.selectGroup(new SelectionCriteria( sessionContext.getIdentity().getPrincipal().isPresent(), sessionContext.getIdentity().getUser(), @@ -290,7 +294,12 @@ private void createQueryInternal(QueryId queryId, String slug, int retryCoun sessionContext.getIdentity().getPrincipal().map(Principal::getName))); // apply system default session properties (does not override user set properties) - session = sessionPropertyDefaults.newSessionWithDefaultProperties(session, queryType.map(Enum::name), Optional.of(selectionContext.getResourceGroupId())); + sessionPropertyDefaults.applyDefaultProperties(sessionBuilder, queryType.map(Enum::name), Optional.of(selectionContext.getResourceGroupId())); + + session = sessionBuilder.build(); + if (sessionContext.getTransactionId().isPresent()) { + session = session.beginTransactionId(sessionContext.getTransactionId().get(), transactionManager, accessControl); + } // mark existing transaction as active transactionManager.activateTransaction(session, preparedQuery.isTransactionControlStatement(), accessControl); diff --git a/presto-main/src/main/java/com/facebook/presto/security/AccessControlUtils.java b/presto-main/src/main/java/com/facebook/presto/security/AccessControlUtils.java index 9a0a3dd25cb0..c29c65d46023 100644 --- a/presto-main/src/main/java/com/facebook/presto/security/AccessControlUtils.java +++ b/presto-main/src/main/java/com/facebook/presto/security/AccessControlUtils.java @@ -46,7 +46,8 @@ public static void checkPermissions(AccessControl accessControl, SecurityConfig sessionContext.getClientTags(), Optional.ofNullable(sessionContext.getSource()), WarningCollector.NOOP, - sessionContext.getRuntimeStats()), + sessionContext.getRuntimeStats(), + Optional.empty()), identity.getPrincipal(), identity.getUser()); } @@ -71,7 +72,8 @@ public static Optional getAuthorizedIdentity(AccessControl a sessionContext.getClientTags(), Optional.ofNullable(sessionContext.getSource()), WarningCollector.NOOP, - sessionContext.getRuntimeStats()), + sessionContext.getRuntimeStats(), + Optional.empty()), identity.getUser(), sessionContext.getCertificates()); return Optional.of(authorizedIdentity); diff --git a/presto-main/src/main/java/com/facebook/presto/server/NoOpSessionSupplier.java b/presto-main/src/main/java/com/facebook/presto/server/NoOpSessionSupplier.java index 4b3ce0c3e7c0..a7717bda17c4 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/NoOpSessionSupplier.java +++ b/presto-main/src/main/java/com/facebook/presto/server/NoOpSessionSupplier.java @@ -17,6 +17,8 @@ import com.facebook.presto.execution.warnings.WarningCollectorFactory; import com.facebook.presto.spi.QueryId; +import static com.facebook.presto.Session.SessionBuilder; + /** * Used on workers. */ @@ -28,4 +30,10 @@ public Session createSession(QueryId queryId, SessionContext context, WarningCol { throw new UnsupportedOperationException(); } + + @Override + public SessionBuilder createSessionBuilder(QueryId queryId, SessionContext context, WarningCollectorFactory warningCollectorFactory) + { + throw new UnsupportedOperationException(); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/server/QuerySessionSupplier.java b/presto-main/src/main/java/com/facebook/presto/server/QuerySessionSupplier.java index 2d4062f56abe..260611d19003 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/QuerySessionSupplier.java +++ b/presto-main/src/main/java/com/facebook/presto/server/QuerySessionSupplier.java @@ -75,6 +75,16 @@ public QuerySessionSupplier( @Override public Session createSession(QueryId queryId, SessionContext context, WarningCollectorFactory warningCollectorFactory) + { + Session session = createSessionBuilder(queryId, context, warningCollectorFactory).build(); + if (context.getTransactionId().isPresent()) { + session = session.beginTransactionId(context.getTransactionId().get(), transactionManager, accessControl); + } + return session; + } + + @Override + public SessionBuilder createSessionBuilder(QueryId queryId, SessionContext context, WarningCollectorFactory warningCollectorFactory) { SessionBuilder sessionBuilder = Session.builder(sessionPropertyManager) .setQueryId(queryId) @@ -128,11 +138,7 @@ else if (context.getTimeZoneId() != null) { WarningCollector warningCollector = warningCollectorFactory.create(sessionBuilder.getSystemProperty(WARNING_HANDLING, WarningHandlingLevel.class)); sessionBuilder.setWarningCollector(warningCollector); - Session session = sessionBuilder.build(); - if (context.getTransactionId().isPresent()) { - session = session.beginTransactionId(context.getTransactionId().get(), transactionManager, accessControl); - } - return session; + return sessionBuilder; } private Identity authenticateIdentity(QueryId queryId, SessionContext context) diff --git a/presto-main/src/main/java/com/facebook/presto/server/SessionPropertyDefaults.java b/presto-main/src/main/java/com/facebook/presto/server/SessionPropertyDefaults.java index 4ec9069973d0..71a778646f2a 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/SessionPropertyDefaults.java +++ b/presto-main/src/main/java/com/facebook/presto/server/SessionPropertyDefaults.java @@ -15,7 +15,6 @@ import com.facebook.airlift.log.Logger; import com.facebook.airlift.node.NodeInfo; -import com.facebook.presto.Session; import com.facebook.presto.client.NodeVersion; import com.facebook.presto.spi.resourceGroups.ResourceGroupId; import com.facebook.presto.spi.resourceGroups.SessionPropertyConfigurationManagerContext; @@ -35,6 +34,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; +import static com.facebook.presto.Session.SessionBuilder; import static com.facebook.presto.util.PropertiesUtil.loadProperties; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; @@ -106,27 +106,27 @@ public void setConfigurationManager(String configManagerName, Map queryType, Optional resourceGroupId) { SessionPropertyConfigurationManager configurationManager = delegate.get(); if (configurationManager == null) { - return session; + return; } SessionConfigurationContext context = new SessionConfigurationContext( - session.getIdentity().getUser(), - session.getSource(), - session.getClientTags(), + sessionBuilder.getIdentity().getUser(), + sessionBuilder.getSource(), + sessionBuilder.getClientTags(), queryType, resourceGroupId, - session.getClientInfo(), + sessionBuilder.getClientInfo(), prestoServerVersion); SystemSessionPropertyConfiguration systemPropertyConfiguration = configurationManager.getSystemSessionProperties(context); Map> catalogPropertyOverrides = configurationManager.getCatalogSessionProperties(context); - return session.withDefaultProperties(systemPropertyConfiguration, catalogPropertyOverrides); + sessionBuilder.applyDefaultProperties(systemPropertyConfiguration, catalogPropertyOverrides); } } diff --git a/presto-main/src/main/java/com/facebook/presto/server/SessionSupplier.java b/presto-main/src/main/java/com/facebook/presto/server/SessionSupplier.java index d021f2572440..240439d67ce0 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/SessionSupplier.java +++ b/presto-main/src/main/java/com/facebook/presto/server/SessionSupplier.java @@ -17,7 +17,11 @@ import com.facebook.presto.execution.warnings.WarningCollectorFactory; import com.facebook.presto.spi.QueryId; +import static com.facebook.presto.Session.SessionBuilder; + public interface SessionSupplier { Session createSession(QueryId queryId, SessionContext context, WarningCollectorFactory warningCollectorFactory); + + SessionBuilder createSessionBuilder(QueryId queryId, SessionContext context, WarningCollectorFactory warningCollectorFactory); } diff --git a/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java b/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java index 93a30a8664d1..4bc53439ed6d 100644 --- a/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java +++ b/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java @@ -549,7 +549,8 @@ private LocalQueryRunner(Session defaultSession, FeaturesConfig featuresConfig, defaultSession.getSessionFunctions(), defaultSession.getTracer(), defaultSession.getWarningCollector(), - defaultSession.getRuntimeStats()); + defaultSession.getRuntimeStats(), + defaultSession.getQueryType()); dataDefinitionTask = ImmutableMap., DataDefinitionTask>builder() .put(CreateTable.class, new CreateTableTask()) diff --git a/presto-main/src/test/java/com/facebook/presto/security/TestAccessControlManager.java b/presto-main/src/test/java/com/facebook/presto/security/TestAccessControlManager.java index e41cb1d588ca..7039ba042697 100644 --- a/presto-main/src/test/java/com/facebook/presto/security/TestAccessControlManager.java +++ b/presto-main/src/test/java/com/facebook/presto/security/TestAccessControlManager.java @@ -82,7 +82,7 @@ public void testInitializing() AccessControlManager accessControlManager = new AccessControlManager(createTestTransactionManager()); accessControlManager.checkCanSetUser( new Identity(USER_NAME, Optional.of(PRINCIPAL)), - new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()), + new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()), Optional.empty(), "foo"); } @@ -94,7 +94,7 @@ public void testNoneSystemAccessControl() accessControlManager.setSystemAccessControl(AllowAllSystemAccessControl.NAME, ImmutableMap.of()); accessControlManager.checkCanSetUser( new Identity(USER_NAME, Optional.of(PRINCIPAL)), - new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()), + new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()), Optional.empty(), USER_NAME); } @@ -106,7 +106,7 @@ public void testReadOnlySystemAccessControl() QualifiedObjectName tableName = new QualifiedObjectName("catalog", "schema", "table"); TransactionManager transactionManager = createTestTransactionManager(); AccessControlManager accessControlManager = new AccessControlManager(transactionManager); - AccessControlContext context = new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()); + AccessControlContext context = new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()); accessControlManager.setSystemAccessControl(ReadOnlySystemAccessControl.NAME, ImmutableMap.of()); accessControlManager.checkCanSetUser(identity, context, Optional.of(PRINCIPAL), USER_NAME); @@ -149,7 +149,7 @@ public void testSetAccessControl() accessControlManager.checkCanSetUser( new Identity(USER_NAME, Optional.of(PRINCIPAL)), - new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()), + new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()), Optional.of(PRINCIPAL), USER_NAME); assertEquals(accessControlFactory.getCheckedUserName(), USER_NAME); @@ -160,7 +160,7 @@ public void testSetAccessControl() public void testCheckQueryIntegrity() { AccessControlManager accessControlManager = new AccessControlManager(createTestTransactionManager()); - AccessControlContext context = new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()); + AccessControlContext context = new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()); TestSystemAccessControlFactory accessControlFactory = new TestSystemAccessControlFactory("test"); accessControlManager.addSystemAccessControlFactory(accessControlFactory); @@ -210,7 +210,7 @@ public void testNoCatalogAccessControl() transaction(transactionManager, accessControlManager) .execute(transactionId -> { accessControlManager.checkCanSelectFromColumns(transactionId, new Identity(USER_NAME, Optional.of(PRINCIPAL)), - new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()), + new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()), new QualifiedObjectName("catalog", "schema", "table"), ImmutableSet.of(new Subfield("column"))); }); } @@ -232,7 +232,7 @@ public void testDenyCatalogAccessControl() transaction(transactionManager, accessControlManager) .execute(transactionId -> { accessControlManager.checkCanSelectFromColumns(transactionId, new Identity(USER_NAME, Optional.of(PRINCIPAL)), - new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()), + new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()), new QualifiedObjectName("catalog", "schema", "table"), ImmutableSet.of(new Subfield("column"))); }); } @@ -254,7 +254,7 @@ public void testDenySystemAccessControl() transaction(transactionManager, accessControlManager) .execute(transactionId -> { accessControlManager.checkCanSelectFromColumns(transactionId, new Identity(USER_NAME, Optional.of(PRINCIPAL)), - new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()), + new AccessControlContext(new QueryId(QUERY_ID), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()), new QualifiedObjectName("secured_catalog", "schema", "table"), ImmutableSet.of(new Subfield("column"))); }); } diff --git a/presto-main/src/test/java/com/facebook/presto/security/TestFileBasedSystemAccessControl.java b/presto-main/src/test/java/com/facebook/presto/security/TestFileBasedSystemAccessControl.java index 5bd759aef186..9d0d574e7805 100644 --- a/presto-main/src/test/java/com/facebook/presto/security/TestFileBasedSystemAccessControl.java +++ b/presto-main/src/test/java/com/facebook/presto/security/TestFileBasedSystemAccessControl.java @@ -70,7 +70,7 @@ public class TestFileBasedSystemAccessControl private static final QualifiedObjectName aliceTable = new QualifiedObjectName("alice-catalog", "schema", "table"); private static final QualifiedObjectName aliceView = new QualifiedObjectName("alice-catalog", "schema", "view"); private static final CatalogSchemaName aliceSchema = new CatalogSchemaName("alice-catalog", "schema"); - private static final AccessControlContext context = new AccessControlContext(new QueryId("query_id"), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()); + private static final AccessControlContext context = new AccessControlContext(new QueryId("query_id"), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()); @Test public void testCanSetUserOperations() throws IOException { diff --git a/presto-main/src/test/java/com/facebook/presto/server/TestSessionPropertyDefaults.java b/presto-main/src/test/java/com/facebook/presto/server/TestSessionPropertyDefaults.java index 964183375bdb..034d81caaff7 100644 --- a/presto-main/src/test/java/com/facebook/presto/server/TestSessionPropertyDefaults.java +++ b/presto-main/src/test/java/com/facebook/presto/server/TestSessionPropertyDefaults.java @@ -28,6 +28,7 @@ import java.util.Optional; +import static com.facebook.presto.Session.SessionBuilder; import static com.facebook.presto.SystemSessionProperties.HASH_PARTITION_COUNT; import static com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; import static com.facebook.presto.SystemSessionProperties.QUERY_MAX_MEMORY; @@ -59,33 +60,32 @@ public void testApplyDefaultProperties() sessionPropertyDefaults.addConfigurationManagerFactory(factory); sessionPropertyDefaults.setConfigurationManager(factory.getName(), ImmutableMap.of()); - Session session = Session.builder(new SessionPropertyManager()) + SessionBuilder sessionBuilder = Session.builder(new SessionPropertyManager()) .setQueryId(new QueryId("test_query_id")) .setIdentity(new Identity("testUser", Optional.empty())) .setSystemProperty(QUERY_MAX_MEMORY, "1GB") .setSystemProperty(JOIN_DISTRIBUTION_TYPE, "partitioned") .setSystemProperty(HASH_PARTITION_COUNT, "43") .setSystemProperty("override", "should be overridden") - .setCatalogSessionProperty("testCatalog", "explicit_set", "explicit_set") - .build(); + .setCatalogSessionProperty("testCatalog", "explicit_set", "explicit_set"); - assertEquals(session.getSystemProperties(), ImmutableMap.builder() + assertEquals(sessionBuilder.getSystemProperties(), ImmutableMap.builder() .put(QUERY_MAX_MEMORY, "1GB") .put(JOIN_DISTRIBUTION_TYPE, "partitioned") .put(HASH_PARTITION_COUNT, "43") .put("override", "should be overridden") .build()); assertEquals( - session.getUnprocessedCatalogProperties(), + sessionBuilder.getUnprocessedCatalogProperties(), ImmutableMap.of( "testCatalog", ImmutableMap.builder() .put("explicit_set", "explicit_set") .build())); - session = sessionPropertyDefaults.newSessionWithDefaultProperties(session, Optional.empty(), Optional.of(TEST_RESOURCE_GROUP_ID)); + sessionPropertyDefaults.applyDefaultProperties(sessionBuilder, Optional.empty(), Optional.of(TEST_RESOURCE_GROUP_ID)); - assertEquals(session.getSystemProperties(), ImmutableMap.builder() + assertEquals(sessionBuilder.getSystemProperties(), ImmutableMap.builder() .put(QUERY_MAX_MEMORY, "1GB") .put(JOIN_DISTRIBUTION_TYPE, "partitioned") .put(HASH_PARTITION_COUNT, "43") @@ -93,7 +93,7 @@ public void testApplyDefaultProperties() .put("override", "overridden") .build()); assertEquals( - session.getUnprocessedCatalogProperties(), + sessionBuilder.getUnprocessedCatalogProperties(), ImmutableMap.of( "testCatalog", ImmutableMap.builder() diff --git a/presto-plugin-toolkit/src/test/java/com/facebook/presto/plugin/base/security/TestFileBasedAccessControl.java b/presto-plugin-toolkit/src/test/java/com/facebook/presto/plugin/base/security/TestFileBasedAccessControl.java index 70f39ed214ad..177a0b8ba79d 100644 --- a/presto-plugin-toolkit/src/test/java/com/facebook/presto/plugin/base/security/TestFileBasedAccessControl.java +++ b/presto-plugin-toolkit/src/test/java/com/facebook/presto/plugin/base/security/TestFileBasedAccessControl.java @@ -42,7 +42,7 @@ public class TestFileBasedAccessControl { public static final ConnectorTransactionHandle TRANSACTION_HANDLE = new ConnectorTransactionHandle() {}; - public static final AccessControlContext CONTEXT = new AccessControlContext(new QueryId("query_id"), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats()); + public static final AccessControlContext CONTEXT = new AccessControlContext(new QueryId("query_id"), Optional.empty(), Collections.emptySet(), Optional.empty(), WarningCollector.NOOP, new RuntimeStats(), Optional.empty()); @Test public void testSchemaRules() diff --git a/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java b/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java index cde967875c94..5f84fb6daea8 100644 --- a/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java +++ b/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java @@ -133,6 +133,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import static com.facebook.presto.Session.SessionBuilder; import static com.facebook.presto.SystemSessionProperties.getQueryMaxExecutionTime; import static com.facebook.presto.SystemSessionProperties.getQueryMaxRunTime; import static com.facebook.presto.execution.QueryState.FAILED; @@ -609,28 +610,26 @@ public IPrestoSparkQueryExecution create( credentialsProviders, authenticatorProviders); - Session session = sessionSupplier.createSession(queryId, sessionContext, warningCollectorFactory); - session = sessionPropertyDefaults.newSessionWithDefaultProperties(session, Optional.empty(), Optional.empty()); + SessionBuilder sessionBuilder = sessionSupplier.createSessionBuilder(queryId, sessionContext, warningCollectorFactory); + sessionPropertyDefaults.applyDefaultProperties(sessionBuilder, Optional.empty(), Optional.empty()); if (!executionStrategies.isEmpty()) { log.info("Going to run with following strategies: %s", executionStrategies); - PrestoSparkExecutionSettings prestoSparkExecutionSettings = getExecutionSettings(executionStrategies, session); + PrestoSparkExecutionSettings prestoSparkExecutionSettings = getExecutionSettings(executionStrategies, sessionBuilder.build()); // Update Spark setting in SparkConf, if present prestoSparkExecutionSettings.getSparkConfigProperties().forEach(sparkContext.conf()::set); // Update Presto settings in Session, if present - Session.SessionBuilder sessionBuilder = Session.builder(session); transferSessionPropertiesToSession(sessionBuilder, prestoSparkExecutionSettings.getPrestoSessionProperties()); - Set clientTags = new HashSet<>(session.getClientTags()); + Set clientTags = new HashSet<>(sessionBuilder.getClientTags()); executionStrategies.forEach(s -> clientTags.add(s.name())); sessionBuilder.setClientTags(clientTags); - - session = sessionBuilder.build(); } - WarningCollector warningCollector = session.getWarningCollector(); + WarningCollector warningCollector = sessionBuilder.getWarningCollector(); + Session session = sessionBuilder.build(); PlanAndMore planAndMore = null; try { diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/security/AccessControlContext.java b/presto-spi/src/main/java/com/facebook/presto/spi/security/AccessControlContext.java index 7e28972b4560..3cc3a2a08a23 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/security/AccessControlContext.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/security/AccessControlContext.java @@ -14,6 +14,7 @@ package com.facebook.presto.spi.security; import com.facebook.presto.common.RuntimeStats; +import com.facebook.presto.common.resourceGroups.QueryType; import com.facebook.presto.spi.QueryId; import com.facebook.presto.spi.WarningCollector; @@ -30,8 +31,9 @@ public class AccessControlContext private final Optional source; private final WarningCollector warningCollector; private final RuntimeStats runtimeStats; + private final Optional queryType; - public AccessControlContext(QueryId queryId, Optional clientInfo, Set clientTags, Optional source, WarningCollector warningCollector, RuntimeStats runtimeStats) + public AccessControlContext(QueryId queryId, Optional clientInfo, Set clientTags, Optional source, WarningCollector warningCollector, RuntimeStats runtimeStats, Optional queryType) { this.queryId = requireNonNull(queryId, "queryId is null"); this.clientInfo = requireNonNull(clientInfo, "clientInfo is null"); @@ -39,6 +41,7 @@ public AccessControlContext(QueryId queryId, Optional clientInfo, Set getQueryType() + { + return queryType; + } } diff --git a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java index 1239ed45bb35..e043327d2f6f 100644 --- a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java +++ b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java @@ -3072,7 +3072,8 @@ public void testShowSession() ImmutableMap.of(), getSession().getTracer(), getSession().getWarningCollector(), - getSession().getRuntimeStats()); + getSession().getRuntimeStats(), + getSession().getQueryType()); MaterializedResult result = computeActual(session, "SHOW SESSION"); ImmutableMap properties = Maps.uniqueIndex(result.getMaterializedRows(), input -> { From 092ba79b7c38a5a1df6aaae3d25c7ae9a1f02d19 Mon Sep 17 00:00:00 2001 From: Steve Burnett Date: Fri, 27 Sep 2024 14:23:47 -0400 Subject: [PATCH 33/86] Add a PR link in release-0.282.rst --- presto-docs/src/main/sphinx/release/release-0.282.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/release/release-0.282.rst b/presto-docs/src/main/sphinx/release/release-0.282.rst index a59fb4af226f..fef4e3c48d5f 100644 --- a/presto-docs/src/main/sphinx/release/release-0.282.rst +++ b/presto-docs/src/main/sphinx/release/release-0.282.rst @@ -7,7 +7,7 @@ Release 0.282 General Changes _______________ -* Fix ``TEMPORARY`` definition :doc:`/sql/create-function` and :doc:`/sql/drop-function`. +* Fix ``TEMPORARY`` definition :doc:`/sql/create-function` and :doc:`/sql/drop-function`. :pr:`19429` * Fix a bug where ``cardinality(map_keys(x))`` and ``cardinality(map_values(x))`` would return wrong results. * Improve performance of ``Explain (TYPE VALIDATE)`` by returning immediately after analysis and ACL checks complete without executing a dummy query. The output column is now called ``result`` rather than ``valid``. * Improve error handling when using custom ``FunctionNamespaceManagers``. From ffb7643f0694a5828f3e37817827a6dc0f8afb56 Mon Sep 17 00:00:00 2001 From: Chen Yang Date: Wed, 18 Sep 2024 15:20:15 -0700 Subject: [PATCH 34/86] orc_output_buffer_chunk_size --- .../presto/orc/ChunkedSliceOutput.java | 4 +-- .../presto/orc/ColumnWriterOptions.java | 36 +++++++++++++++++++ .../facebook/presto/orc/OrcOutputBuffer.java | 9 ++--- .../com/facebook/presto/orc/OrcWriter.java | 2 ++ .../facebook/presto/orc/OrcWriterOptions.java | 36 +++++++++++++++++++ 5 files changed, 81 insertions(+), 6 deletions(-) diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java b/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java index 3677e4921631..e21b93e4273c 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java @@ -38,7 +38,7 @@ public final class ChunkedSliceOutput extends SliceOutput { private static final int INSTANCE_SIZE = ClassLayout.parseClass(ChunkedSliceOutput.class).instanceSize(); - private static final int MINIMUM_CHUNK_SIZE = 4096; + private static final int MINIMUM_CHUNK_SIZE = 256; private static final int MAXIMUM_CHUNK_SIZE = 16 * 1024 * 1024; // This must not be larger than MINIMUM_CHUNK_SIZE/2 private static final int MAX_UNUSED_BUFFER_SIZE = 128; @@ -371,8 +371,8 @@ public byte[] get() { byte[] buffer; if (bufferPool.isEmpty()) { - currentSize = min(multiplyExact(currentSize, 2), maxChunkSize); buffer = new byte[currentSize]; + currentSize = min(multiplyExact(currentSize, 2), maxChunkSize); } else { buffer = bufferPool.remove(0); diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/ColumnWriterOptions.java b/presto-orc/src/main/java/com/facebook/presto/orc/ColumnWriterOptions.java index 87bc8dd3ad25..86b3a6659364 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/ColumnWriterOptions.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/ColumnWriterOptions.java @@ -24,7 +24,9 @@ import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_COMPRESSION_BUFFER_SIZE; import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_FLATTENED_MAP_KEY_COUNT; +import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_OUTPUT_BUFFER_CHUNK_SIZE; import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MAX_STRING_STATISTICS_LIMIT; +import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_MIN_OUTPUT_BUFFER_CHUNK_SIZE; import static com.facebook.presto.orc.OrcWriterOptions.DEFAULT_PRESERVE_DIRECT_ENCODING_STRIPE_COUNT; import static com.google.common.base.Preconditions.checkArgument; import static io.airlift.units.DataSize.Unit.BYTE; @@ -36,6 +38,8 @@ public class ColumnWriterOptions private final CompressionKind compressionKind; private final OptionalInt compressionLevel; private final int compressionMaxBufferSize; + private final int minOutputBufferChunkSize; + private final int maxOutputBufferChunkSize; private final DataSize stringStatisticsLimit; private final boolean integerDictionaryEncodingEnabled; private final boolean stringDictionarySortingEnabled; @@ -51,6 +55,8 @@ public ColumnWriterOptions( CompressionKind compressionKind, OptionalInt compressionLevel, DataSize compressionMaxBufferSize, + DataSize minOutputBufferChunkSize, + DataSize maxOutputBufferChunkSize, DataSize stringStatisticsLimit, boolean integerDictionaryEncodingEnabled, boolean stringDictionarySortingEnabled, @@ -68,6 +74,8 @@ public ColumnWriterOptions( this.compressionKind = requireNonNull(compressionKind, "compressionKind is null"); this.compressionLevel = requireNonNull(compressionLevel, "compressionLevel is null"); this.compressionMaxBufferSize = toIntExact(compressionMaxBufferSize.toBytes()); + this.minOutputBufferChunkSize = toIntExact(minOutputBufferChunkSize.toBytes()); + this.maxOutputBufferChunkSize = toIntExact(maxOutputBufferChunkSize.toBytes()); this.stringStatisticsLimit = requireNonNull(stringStatisticsLimit, "stringStatisticsLimit is null"); this.integerDictionaryEncodingEnabled = integerDictionaryEncodingEnabled; this.stringDictionarySortingEnabled = stringDictionarySortingEnabled; @@ -95,6 +103,16 @@ public int getCompressionMaxBufferSize() return compressionMaxBufferSize; } + public int getMinOutputBufferChunkSize() + { + return minOutputBufferChunkSize; + } + + public int getMaxOutputBufferChunkSize() + { + return maxOutputBufferChunkSize; + } + public int getStringStatisticsLimit() { return toIntExact(stringStatisticsLimit.toBytes()); @@ -162,6 +180,8 @@ public Builder toBuilder() .setCompressionKind(getCompressionKind()) .setCompressionLevel(getCompressionLevel()) .setCompressionMaxBufferSize(new DataSize(getCompressionMaxBufferSize(), BYTE)) + .setMinOutputBufferChunkSize(new DataSize(getMinOutputBufferChunkSize(), BYTE)) + .setMaxOutputBufferChunkSize(new DataSize(getMaxOutputBufferChunkSize(), BYTE)) .setStringStatisticsLimit(new DataSize(getStringStatisticsLimit(), BYTE)) .setIntegerDictionaryEncodingEnabled(isIntegerDictionaryEncodingEnabled()) .setStringDictionarySortingEnabled(isStringDictionarySortingEnabled()) @@ -184,6 +204,8 @@ public static class Builder private CompressionKind compressionKind; private OptionalInt compressionLevel = OptionalInt.empty(); private DataSize compressionMaxBufferSize = DEFAULT_MAX_COMPRESSION_BUFFER_SIZE; + private DataSize minOutputBufferChunkSize = DEFAULT_MIN_OUTPUT_BUFFER_CHUNK_SIZE; + private DataSize maxOutputBufferChunkSize = DEFAULT_MAX_OUTPUT_BUFFER_CHUNK_SIZE; private DataSize stringStatisticsLimit = DEFAULT_MAX_STRING_STATISTICS_LIMIT; private boolean integerDictionaryEncodingEnabled; private boolean stringDictionarySortingEnabled = true; @@ -215,6 +237,18 @@ public Builder setCompressionMaxBufferSize(DataSize compressionMaxBufferSize) return this; } + public Builder setMinOutputBufferChunkSize(DataSize minOutputBufferChunkSize) + { + this.minOutputBufferChunkSize = minOutputBufferChunkSize; + return this; + } + + public Builder setMaxOutputBufferChunkSize(DataSize maxOutputBufferChunkSize) + { + this.maxOutputBufferChunkSize = maxOutputBufferChunkSize; + return this; + } + public Builder setStringStatisticsLimit(DataSize stringStatisticsLimit) { this.stringStatisticsLimit = stringStatisticsLimit; @@ -281,6 +315,8 @@ public ColumnWriterOptions build() compressionKind, compressionLevel, compressionMaxBufferSize, + minOutputBufferChunkSize, + maxOutputBufferChunkSize, stringStatisticsLimit, integerDictionaryEncodingEnabled, stringDictionarySortingEnabled, diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java b/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java index a8a3d5f2589e..6c87f1c8a622 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java @@ -53,10 +53,9 @@ public class OrcOutputBuffer private static final int INSTANCE_SIZE = ClassLayout.parseClass(OrcOutputBuffer.class).instanceSize(); private static final int PAGE_HEADER_SIZE = 3; // ORC spec 3 byte header private static final int INITIAL_BUFFER_SIZE = 256; - private static final int MINIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 4 * 1024; - private static final int MAXIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 1024 * 1024; - private final int maxBufferSize; + private final int minOutputBufferChunkSize; + private final int maxOutputBufferChunkSize; private final int minCompressibleSize; private final CompressionBufferPool compressionBufferPool; @@ -86,6 +85,8 @@ public OrcOutputBuffer(ColumnWriterOptions columnWriterOptions, Optional Date: Fri, 27 Sep 2024 13:49:17 -0400 Subject: [PATCH 35/86] Add doc requirement for new methods to CONTRIBUTING.md --- CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7dccbc7688ca..ff53c6523d5a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -82,6 +82,7 @@ To commit code, you should: 1. User friendliness 1. Config options have names and descriptions that can be understood by someone configuring Presto 1. All new language features, new functions, and major features have documentation added + 1. When adding a new method to [Plugin.java](https://github.com/prestodb/presto/blob/master/presto-spi/src/main/java/com/facebook/presto/spi/Plugin.java), include documentation for the new method in the [Presto Developer Guide](https://prestodb.io/docs/current/develop.html). 1. Release notes following the [Release Note Guidelines](https://github.com/prestodb/presto/wiki/Release-Notes-Guidelines) are added for user visible changes * For large features, discuss your design with relevant code owners before you start implementing it. From 9e2ed296ec08dababcd0d1625acbec3f25c68498 Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Sat, 28 Sep 2024 15:49:56 -0700 Subject: [PATCH 36/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 272995b26bc8..42940c8bb790 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 272995b26bc887b224cbf4a131caf98ee15db328 +Subproject commit 42940c8bb790e354be124a1f059ed02f6cf28f1e From bd07a0e15f7ca5e35879b4ad847f3f73b7388c64 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Tue, 10 Sep 2024 14:40:33 -0400 Subject: [PATCH 37/86] Remove use of deprecated getTypeLength method --- .../main/java/com/facebook/presto/parquet/ParquetEncoding.java | 2 +- .../presto/parquet/batchreader/dictionary/Dictionaries.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetEncoding.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetEncoding.java index 2bacdacd55a7..460af9e7d978 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetEncoding.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetEncoding.java @@ -70,7 +70,7 @@ public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valu case INT96: return new FixedLenByteArrayPlainValuesReader(INT96_TYPE_LENGTH); case FIXED_LEN_BYTE_ARRAY: - return new FixedLenByteArrayPlainValuesReader(descriptor.getTypeLength()); + return new FixedLenByteArrayPlainValuesReader(descriptor.getPrimitiveType().getTypeLength()); default: throw new ParquetDecodingException("Plain values reader does not support: " + descriptor.getType()); } diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/dictionary/Dictionaries.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/dictionary/Dictionaries.java index 46ccdaca4b0f..64ccff17fbd3 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/dictionary/Dictionaries.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/dictionary/Dictionaries.java @@ -44,7 +44,7 @@ public static Dictionary createDictionary(ColumnDescriptor columnDescriptor, Dic case BINARY: return new BinaryBatchDictionary(dictionaryPage); case FIXED_LEN_BYTE_ARRAY: - return new BinaryBatchDictionary(dictionaryPage, columnDescriptor.getTypeLength()); + return new BinaryBatchDictionary(dictionaryPage, columnDescriptor.getPrimitiveType().getTypeLength()); case BOOLEAN: default: break; From 24a488bb41b568e9323085873f24d82eaf861ade Mon Sep 17 00:00:00 2001 From: Xiaoxuan Meng Date: Mon, 30 Sep 2024 09:01:15 -0700 Subject: [PATCH 38/86] Allow task writer counts to be non-power of two for native engine task_partitioned_writer_count is used to configure the number of drivers at worker size which doesn't have to be power of two. In Prestissimo we need to configure this to avoid the number of table writer workers is a multiple of the number of table writer threads per worker. As Presto java engine still requires table writer threads to be power of two so keep the check for non-native engine scenario. --- .../src/main/sphinx/admin/properties.rst | 19 +++++++++++++++++++ .../presto/SystemSessionProperties.java | 4 ++-- .../presto/execution/TaskManagerConfig.java | 8 ++++---- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/presto-docs/src/main/sphinx/admin/properties.rst b/presto-docs/src/main/sphinx/admin/properties.rst index 0435d11c2966..6a9af7fc6682 100644 --- a/presto-docs/src/main/sphinx/admin/properties.rst +++ b/presto-docs/src/main/sphinx/admin/properties.rst @@ -53,6 +53,25 @@ output data set is not skewed in order to avoid the overhead of hashing and redistributing all the data across the network. This can also be specified on a per-query basis using the ``redistribute_writes`` session property. +``task_writer_count`` +^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``integer`` +* **Default value:** ``1`` + +Default number of local parallel table writer threads per worker. It is required +to be a power of two for a Java query engine. + +``task_partitioned_writer_count`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``integer`` +* **Default value:** ``task_writer_count`` + +Number of local parallel table writer threads per worker for partitioned writes. If not +set, the number set by ``task_writer_count`` will be used. It is required to be a power +of two for a Java query engine. + .. _tuning-memory: Memory Management Properties diff --git a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java index a2a688ed4b2c..8bd8a3351efd 100644 --- a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -520,7 +520,7 @@ public SystemSessionProperties( Integer.class, taskManagerConfig.getWriterCount(), false, - value -> validateValueIsPowerOfTwo(requireNonNull(value, "value is null"), TASK_WRITER_COUNT), + featuresConfig.isNativeExecutionEnabled() ? value -> validateNullablePositiveIntegerValue(value, TASK_WRITER_COUNT) : value -> validateValueIsPowerOfTwo(value, TASK_WRITER_COUNT), value -> value), new PropertyMetadata<>( TASK_PARTITIONED_WRITER_COUNT, @@ -529,7 +529,7 @@ public SystemSessionProperties( Integer.class, taskManagerConfig.getPartitionedWriterCount(), false, - value -> validateValueIsPowerOfTwo(value, TASK_PARTITIONED_WRITER_COUNT), + featuresConfig.isNativeExecutionEnabled() ? value -> validateNullablePositiveIntegerValue(value, TASK_PARTITIONED_WRITER_COUNT) : value -> validateValueIsPowerOfTwo(value, TASK_PARTITIONED_WRITER_COUNT), value -> value), booleanProperty( REDISTRIBUTE_WRITES, diff --git a/presto-main/src/main/java/com/facebook/presto/execution/TaskManagerConfig.java b/presto-main/src/main/java/com/facebook/presto/execution/TaskManagerConfig.java index ea34e1d4cc36..24c87fc2095f 100644 --- a/presto-main/src/main/java/com/facebook/presto/execution/TaskManagerConfig.java +++ b/presto-main/src/main/java/com/facebook/presto/execution/TaskManagerConfig.java @@ -428,14 +428,14 @@ public TaskManagerConfig setInfoMaxAge(Duration infoMaxAge) } @Min(1) - @PowerOfTwo public int getWriterCount() { return writerCount; } + // NOTE: writer count needs to be a power of two for java query engine. @Config("task.writer-count") - @ConfigDescription("Number of writers per task") + @ConfigDescription("Number of writer threads per task") public TaskManagerConfig setWriterCount(int writerCount) { this.writerCount = writerCount; @@ -443,14 +443,14 @@ public TaskManagerConfig setWriterCount(int writerCount) } @Min(1) - @PowerOfTwo public Integer getPartitionedWriterCount() { return partitionedWriterCount; } + // NOTE: partitioned writer count needs to be a power of two for java query engine. @Config("task.partitioned-writer-count") - @ConfigDescription("Number of writers per task for partitioned writes. If not set, the number set by task.writer-count will be used") + @ConfigDescription("Number of writer threads per task for partitioned writes. If not set, the number set by task.writer-count will be used") public TaskManagerConfig setPartitionedWriterCount(Integer partitionedWriterCount) { this.partitionedWriterCount = partitionedWriterCount; From 83cb0f6ffbfeffa74a027ad5f5ec73b39adaf36c Mon Sep 17 00:00:00 2001 From: Rebecca Schlussel Date: Mon, 3 Jun 2024 15:54:58 -0400 Subject: [PATCH 39/86] Fix typo in TupleDomainFilterUtils --- .../presto/common/predicate/TupleDomainFilterUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomainFilterUtils.java b/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomainFilterUtils.java index 016872a91c03..9530c743898f 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomainFilterUtils.java +++ b/presto-common/src/main/java/com/facebook/presto/common/predicate/TupleDomainFilterUtils.java @@ -151,7 +151,7 @@ public static TupleDomainFilter toFilter(Domain domain) } /** - * Returns true is ranges represent != or NOT IN filter for double, float or string column. + * Returns true if ranges represent != or NOT IN filter for double, float or string column. *

* The logic is to return true if ranges are next to each other, but don't include the touch value. */ From 266ccecd6aa89362699bf0815c85ed7d9833f35c Mon Sep 17 00:00:00 2001 From: kiersten-stokes Date: Fri, 27 Sep 2024 15:12:52 -0500 Subject: [PATCH 40/86] Add Iceberg REST auth server configuration property Add comment and source Add test --- .../src/main/sphinx/connector/iceberg.rst | 3 ++ .../rest/IcebergRestCatalogFactory.java | 6 ++++ .../iceberg/rest/IcebergRestConfig.java | 14 ++++++++++ .../iceberg/rest/TestIcebergRestConfig.java | 3 ++ .../iceberg/rest/TestIcebergSmokeRest.java | 28 ++++++++++++++++--- 5 files changed, 50 insertions(+), 4 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/iceberg.rst b/presto-docs/src/main/sphinx/connector/iceberg.rst index 3cad43804f08..731e1f602b36 100644 --- a/presto-docs/src/main/sphinx/connector/iceberg.rst +++ b/presto-docs/src/main/sphinx/connector/iceberg.rst @@ -210,6 +210,9 @@ Property Name Description Available values are ``NONE`` or ``OAUTH2`` (default: ``NONE``). ``OAUTH2`` requires either a credential or token. +``iceberg.rest.auth.oauth2.uri`` OAUTH2 server endpoint URI. + Example: ``https://localhost:9191`` + ``iceberg.rest.auth.oauth2.credential`` The credential to use for OAUTH2 authentication. Example: ``key:secret`` diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java index ababf3b2e2cb..771b8338f781 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java @@ -35,6 +35,7 @@ import static java.util.Objects.requireNonNull; import static org.apache.iceberg.CatalogProperties.URI; import static org.apache.iceberg.rest.auth.OAuth2Properties.CREDENTIAL; +import static org.apache.iceberg.rest.auth.OAuth2Properties.OAUTH2_SERVER_URI; import static org.apache.iceberg.rest.auth.OAuth2Properties.TOKEN; public class IcebergRestCatalogFactory @@ -67,6 +68,11 @@ protected Map getCatalogProperties(ConnectorSession session) catalogConfig.getAuthenticationType().ifPresent(type -> { if (type == OAUTH2) { + // The oauth2/tokens endpoint of the REST catalog spec has been deprecated and will + // be removed in Iceberg 2.0 (https://github.com/apache/iceberg/pull/10603) + // TODO auth server URI will eventually need to be made a required property + catalogConfig.getAuthenticationServerUri().ifPresent(authServerUri -> properties.put(OAUTH2_SERVER_URI, authServerUri)); + if (!catalogConfig.credentialOrTokenExists()) { throw new IllegalStateException("iceberg.rest.auth.oauth2 requires either a credential or a token"); } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestConfig.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestConfig.java index b00c68f09aca..fe0d1a5522cb 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestConfig.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestConfig.java @@ -25,6 +25,7 @@ public class IcebergRestConfig private String serverUri; private SessionType sessionType; private AuthenticationType authenticationType; + private String authenticationServerUri; private String credential; private String token; @@ -68,6 +69,19 @@ public IcebergRestConfig setAuthenticationType(AuthenticationType authentication return this; } + public Optional getAuthenticationServerUri() + { + return Optional.ofNullable(authenticationServerUri); + } + + @Config("iceberg.rest.auth.oauth2.uri") + @ConfigDescription("The URI to connect to the OAUTH2 server") + public IcebergRestConfig setAuthenticationServerUri(String authServerUri) + { + this.authenticationServerUri = authServerUri; + return this; + } + public Optional getCredential() { return Optional.ofNullable(credential); diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergRestConfig.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergRestConfig.java index 730a58d862c5..a332a8f6e6fc 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergRestConfig.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergRestConfig.java @@ -32,6 +32,7 @@ public void testDefaults() assertRecordedDefaults(ConfigAssertions.recordDefaults(IcebergRestConfig.class) .setServerUri(null) .setAuthenticationType(null) + .setAuthenticationServerUri(null) .setCredential(null) .setToken(null) .setSessionType(null)); @@ -43,6 +44,7 @@ public void testExplicitPropertyMappings() Map properties = ImmutableMap.builder() .put("iceberg.rest.uri", "http://localhost:xxx") .put("iceberg.rest.auth.type", "OAUTH2") + .put("iceberg.rest.auth.oauth2.uri", "http://localhost:yyy") .put("iceberg.rest.auth.oauth2.credential", "key:secret") .put("iceberg.rest.auth.oauth2.token", "SXVLUXUhIExFQ0tFUiEK") .put("iceberg.rest.session.type", "USER") @@ -51,6 +53,7 @@ public void testExplicitPropertyMappings() IcebergRestConfig expected = new IcebergRestConfig() .setServerUri("http://localhost:xxx") .setAuthenticationType(OAUTH2) + .setAuthenticationServerUri("http://localhost:yyy") .setCredential("key:secret") .setToken("SXVLUXUhIExFQ0tFUiEK") .setSessionType(USER); diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java index 65c34bc31bbe..d74c11c4ad06 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java @@ -29,6 +29,7 @@ import com.facebook.presto.testing.QueryRunner; import com.google.common.collect.ImmutableMap; import org.apache.iceberg.Table; +import org.apache.iceberg.rest.RESTCatalog; import org.assertj.core.util.Files; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -42,12 +43,15 @@ import static com.facebook.presto.iceberg.FileFormat.PARQUET; import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static com.facebook.presto.iceberg.IcebergUtil.getNativeIcebergTable; +import static com.facebook.presto.iceberg.rest.AuthenticationType.OAUTH2; import static com.facebook.presto.iceberg.rest.IcebergRestTestUtil.getRestServer; import static com.facebook.presto.iceberg.rest.IcebergRestTestUtil.restConnectorProperties; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static java.lang.String.format; +import static org.apache.iceberg.rest.auth.OAuth2Properties.OAUTH2_SERVER_URI; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.testng.Assert.assertEquals; @Test public class TestIcebergSmokeRest @@ -106,12 +110,11 @@ protected QueryRunner createQueryRunner() Optional.of(warehouseLocation.toPath())); } - protected IcebergNativeCatalogFactory getCatalogFactory() + protected IcebergNativeCatalogFactory getCatalogFactory(IcebergRestConfig restConfig) { IcebergConfig icebergConfig = new IcebergConfig() .setCatalogType(REST) - .setCatalogWarehouse(warehouseLocation.getAbsolutePath().toString()); - IcebergRestConfig restConfig = new IcebergRestConfig().setServerUri(serverUri); + .setCatalogWarehouse(warehouseLocation.getAbsolutePath()); return new IcebergRestCatalogFactory( icebergConfig, @@ -125,7 +128,8 @@ protected IcebergNativeCatalogFactory getCatalogFactory() @Override protected Table getIcebergTable(ConnectorSession session, String schema, String tableName) { - return getNativeIcebergTable(getCatalogFactory(), + IcebergRestConfig restConfig = new IcebergRestConfig().setServerUri(serverUri); + return getNativeIcebergTable(getCatalogFactory(restConfig), session, SchemaTableName.valueOf(schema + "." + tableName)); } @@ -192,4 +196,20 @@ public void testMetadataDeleteOnTableWithUnsupportedSpecsWhoseDataAllDeleted(Str super.testMetadataDeleteOnTableWithUnsupportedSpecsWhoseDataAllDeleted(version, mode); } } + + @Test + public void testSetOauth2ServerUriPropertyI() + { + String authEndpoint = "http://localhost:8888"; + IcebergRestConfig restConfig = new IcebergRestConfig() + .setServerUri(serverUri) + .setAuthenticationType(OAUTH2) + .setToken("SXVLUXUhIExFQ0tFUiEK") + .setAuthenticationServerUri(authEndpoint); + + IcebergRestCatalogFactory catalogFactory = (IcebergRestCatalogFactory) getCatalogFactory(restConfig); + RESTCatalog catalog = (RESTCatalog) catalogFactory.getCatalog(getSession().toConnectorSession()); + + assertEquals(catalog.properties().get(OAUTH2_SERVER_URI), authEndpoint); + } } From af8852255b1cb1eca69625434403b97c95eb4aeb Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Tue, 1 Oct 2024 08:46:05 -0700 Subject: [PATCH 41/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 42940c8bb790..a8645d623ea6 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 42940c8bb790e354be124a1f059ed02f6cf28f1e +Subproject commit a8645d623ea67bd13dd83fd90f2f74a706f2aa69 From c4c02f8fe85cc1a8ceccf2cc7109388ba6e1d8b8 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Meng Date: Tue, 1 Oct 2024 15:57:11 -0700 Subject: [PATCH 42/86] [native]Remove bucket property set in table write node --- .../presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp | 8 -------- presto-native-execution/velox | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp index c4e96972c69b..9d3245244462 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp @@ -16,7 +16,6 @@ #include "presto_cpp/main/types/PrestoToVeloxConnector.h" #include "presto_cpp/main/types/PrestoToVeloxQueryPlan.h" #include -#include "velox/connectors/hive/HiveDataSink.h" #include "velox/core/QueryCtx.h" #include "velox/exec/HashPartitionFunction.h" #include "velox/exec/RoundRobinPartitionFunction.h" @@ -1317,12 +1316,6 @@ VeloxQueryPlanConverterBase::toVeloxQueryPlan( auto insertTableHandle = std::make_shared( connectorId, connectorInsertHandle); - bool hasBucketProperty{false}; - if (auto* HiveInsertTableHandle = - dynamic_cast( - connectorInsertHandle.get())) { - hasBucketProperty = HiveInsertTableHandle->bucketProperty() != nullptr; - } const auto outputType = toRowType( generateOutputVariables( @@ -1348,7 +1341,6 @@ VeloxQueryPlanConverterBase::toVeloxQueryPlan( std::move(aggregationNode), std::move(insertTableHandle), node->partitioningScheme != nullptr, - hasBucketProperty, outputType, getCommitStrategy(), sourceVeloxPlan); diff --git a/presto-native-execution/velox b/presto-native-execution/velox index a8645d623ea6..bfb8ebe9282a 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit a8645d623ea67bd13dd83fd90f2f74a706f2aa69 +Subproject commit bfb8ebe9282a2665ef8edecde95fdb8b13b67080 From 176e8867cd254bb428a5ab8b58118a5be2b8b4d8 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Sun, 29 Sep 2024 17:44:49 -0700 Subject: [PATCH 43/86] [native] Push down FilterNode into TableScan Also add `$path` and `$bucket` in split info columns and fix split counts in coordinator UI. --- .../presto_cpp/main/PrestoTask.cpp | 16 +- .../main/types/PrestoToVeloxConnector.cpp | 14 +- .../main/types/PrestoToVeloxQueryPlan.cpp | 23 ++ .../main/types/tests/PlanConverterTest.cpp | 4 + .../types/tests/PrestoToVeloxSplitTest.cpp | 2 + .../main/types/tests/data/ScanAgg.json | 333 ++++++++++-------- 6 files changed, 238 insertions(+), 154 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/PrestoTask.cpp b/presto-native-execution/presto_cpp/main/PrestoTask.cpp index ae84bff7ded8..44fac67de078 100644 --- a/presto-native-execution/presto_cpp/main/PrestoTask.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoTask.cpp @@ -590,10 +590,20 @@ void PrestoTask::updateExecutionInfoLocked( prestoTaskStats.outputPositions = 0; prestoTaskStats.outputDataSizeInBytes = 0; - prestoTaskStats.queuedDrivers = veloxTaskStats.numQueuedDrivers; - prestoTaskStats.totalDrivers = veloxTaskStats.numTotalDrivers; + // Presto Java reports number of drivers to number of splits in Presto UI + // because split and driver are 1 to 1 mapping relationship. This is not true + // in Prestissimo where 1 driver handles many splits. In order to quickly + // unblock developers from viewing the correct progress of splits in + // Prestissimo's coordinator UI, we put number of splits in total, queued, and + // finished to indicate the progress of the query. Number of running drivers + // are passed as it is to have a proper running drivers count in UI. + // + // TODO: We should really extend the API (protocol::TaskStats and Presto + // coordinator UI) to have splits information as a proper fix. + prestoTaskStats.totalDrivers = veloxTaskStats.numTotalSplits; + prestoTaskStats.queuedDrivers = veloxTaskStats.numQueuedSplits; prestoTaskStats.runningDrivers = veloxTaskStats.numRunningDrivers; - prestoTaskStats.completedDrivers = veloxTaskStats.numCompletedDrivers; + prestoTaskStats.completedDrivers = veloxTaskStats.numFinishedSplits; prestoTaskStats.pipelines.resize(veloxTaskStats.pipelineStats.size()); for (int i = 0; i < veloxTaskStats.pipelineStats.size(); ++i) { diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index faa4f93eaf67..a9a6f21ed25f 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -1109,13 +1109,15 @@ HivePrestoToVeloxConnector::toVeloxSplit( for (const auto& [key, value] : hiveSplit->storage.serdeParameters) { serdeParameters[key] = value; } - std::unordered_map infoColumns; - infoColumns.reserve(2); - infoColumns.insert( - {"$file_size", std::to_string(hiveSplit->fileSplit.fileSize)}); - infoColumns.insert( + std::unordered_map infoColumns = { + {"$path", hiveSplit->fileSplit.path}, + {"$file_size", std::to_string(hiveSplit->fileSplit.fileSize)}, {"$file_modified_time", - std::to_string(hiveSplit->fileSplit.fileModifiedTime)}); + std::to_string(hiveSplit->fileSplit.fileModifiedTime)}, + }; + if (hiveSplit->tableBucketNumber) { + infoColumns["$bucket"] = std::to_string(*hiveSplit->tableBucketNumber); + } auto veloxSplit = std::make_unique( catalogId, diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp index 9d3245244462..28ca974c5b6a 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp @@ -606,6 +606,29 @@ core::PlanNodePtr VeloxQueryPlanConverterBase::toVeloxQueryPlan( left->outputType())); } + // For ScanFilter and ScanFilterProject, the planner sometimes put the + // remaining filter in a FilterNode after the TableScan. We need to put it + // back to TableScan so that Velox can leverage it to do stripe level + // skipping. Otherwise we only get row level skipping and lose some + // optimization opportunity in case of very low selectivity. + if (auto tableScan = std::dynamic_pointer_cast( + node->source)) { + if (auto* tableLayout = dynamic_cast( + tableScan->table.connectorTableLayout.get())) { + auto remainingFilter = + exprConverter_.toVeloxExpr(tableLayout->remainingPredicate); + if (auto* constant = dynamic_cast( + remainingFilter.get())) { + bool value = constant->value().value(); + // We should get empty values node instead of table scan if the + // remaining filter is constantly false. + VELOX_CHECK(value, "Unexpected always-false remaining predicate"); + tableLayout->remainingPredicate = node->predicate; + return toVeloxQueryPlan(tableScan, tableWriteInfo, taskId); + } + } + } + return std::make_shared( node->id, exprConverter_.toVeloxExpr(node->predicate), diff --git a/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp b/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp index b72814044fb0..a29db9f97b17 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp +++ b/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp @@ -143,6 +143,10 @@ TEST_F(PlanConverterTest, scanAgg) { ASSERT_EQ( tableHandle->dataColumns()->toString(), "ROW>>,comment:VARCHAR>"); + ASSERT_TRUE(tableHandle->remainingFilter()); + ASSERT_EQ( + tableHandle->remainingFilter()->toString(), + "presto.default.lt(presto.default.rand(),0.0001)"); auto tableParameters = tableHandle->tableParameters(); ASSERT_EQ(tableParameters.size(), 6); diff --git a/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxSplitTest.cpp b/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxSplitTest.cpp index 5e3a8f98c295..bdb08da2d6eb 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxSplitTest.cpp +++ b/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxSplitTest.cpp @@ -157,6 +157,8 @@ TEST_F(PrestoToVeloxSplitTest, bucketConversion) { ASSERT_EQ(veloxHiveSplit.bucketConversion->tableBucketCount, 4096); ASSERT_EQ(veloxHiveSplit.bucketConversion->partitionBucketCount, 512); ASSERT_EQ(veloxHiveSplit.bucketConversion->bucketColumnHandles.size(), 1); + ASSERT_EQ(veloxHiveSplit.infoColumns.at("$path"), hiveSplit.fileSplit.path); + ASSERT_EQ(veloxHiveSplit.infoColumns.at("$bucket"), "42"); auto& veloxColumn = veloxHiveSplit.bucketConversion->bucketColumnHandles[0]; ASSERT_EQ(veloxColumn->name(), "c0"); ASSERT_EQ(*veloxColumn->dataType(), *BIGINT()); diff --git a/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json b/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json index cdda8bdb383d..1033e8a2617f 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json +++ b/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json @@ -7,166 +7,209 @@ "@type":".ProjectNode", "id":"1", "source":{ - "@type":".TableScanNode", - "id":"0", - "table":{ - "connectorId":"hive", - "connectorHandle":{ - "@type":"hive", - "schemaName":"tpch", - "tableName":"nation" - }, - "transaction":{ - "@type":"hive", - "uuid":"7cc96264-a0fa-45e4-9042-62754ac3a5a0" - }, - "connectorTableLayout":{ - "@type":"hive", - "schemaTableName":{ - "schema":"tpch", - "table":"nation" + "@type" : ".FilterNode", + "id" : "449", + "source":{ + "@type":".TableScanNode", + "id":"0", + "table":{ + "connectorId":"hive", + "connectorHandle":{ + "@type":"hive", + "schemaName":"tpch", + "tableName":"nation" + }, + "transaction":{ + "@type":"hive", + "uuid":"7cc96264-a0fa-45e4-9042-62754ac3a5a0" }, - "tablePath":"a/path/to/a/table", - "partitionColumns":[ + "connectorTableLayout":{ + "@type":"hive", + "schemaTableName":{ + "schema":"tpch", + "table":"nation" + }, + "tablePath":"a/path/to/a/table", + "partitionColumns":[ - ], - "dataColumns":[ - { - "name":"nationkey", - "type":"bigint" + ], + "dataColumns":[ + { + "name":"nationkey", + "type":"bigint" + }, + { + "name":"name", + "type":"varchar(25)" + }, + { + "name":"regionkey", + "type":"bigint" + }, + { + "name":"complex_type", + "type":"array>>" + }, + { + "name":"comment", + "type":"varchar(152)" + } + ], + "tableParameters":{ + "presto_version":"testversion", + "presto_query_id":"20200908_214711_00000_7xpqg", + "numFiles":"1", + "numRows":"25", + "rawDataSize":"2734", + "totalSize":"1451" }, - { - "name":"name", - "type":"varchar(25)" + "domainPredicate":{ + "columnDomains":[ + { + "column":"psudo_bool_column", + "domain":{ + "values":{ + "@type":"sortable", + "type":"boolean", + "ranges":[ + { + "low":{ + "type":"boolean", + "bound":"ABOVE" + }, + "high":{ + "type":"boolean", + "valueBlock":"CgAAAEJZVEVfQVJSQVkBAAAAAAE=", + "bound":"BELOW" + } + }, + { + "low":{ + "type":"boolean", + "valueBlock":"CgAAAEJZVEVfQVJSQVkBAAAAAAE=", + "bound":"ABOVE" + }, + "high":{ + "type":"boolean", + "bound":"BELOW" + } + } + ] + }, + "nullAllowed":false + } + } + ] }, - { - "name":"regionkey", - "type":"bigint" + "remainingPredicate":{ + "@type":"constant", + "valueBlock":"CgAAAEJZVEVfQVJSQVkBAAAAAAE=", + "type":"boolean" }, - { - "name":"complex_type", - "type":"array>>" + "predicateColumns":{ + }, - { - "name":"comment", - "type":"varchar(152)" - } - ], - "tableParameters":{ - "presto_version":"testversion", - "presto_query_id":"20200908_214711_00000_7xpqg", - "numFiles":"1", - "numRows":"25", - "rawDataSize":"2734", - "totalSize":"1451" - }, - "domainPredicate":{ - "columnDomains":[ + "partitionColumnPredicate":{ + "columnDomains":[ + + ] + }, + "pushdownFilterEnabled":true, + "layoutString":"tpch.nation{}", + "requestedColumns":[ { - "column":"psudo_bool_column", - "domain":{ - "values":{ - "@type":"sortable", - "type":"boolean", - "ranges":[ - { - "low":{ - "type":"boolean", - "bound":"ABOVE" - }, - "high":{ - "type":"boolean", - "valueBlock":"CgAAAEJZVEVfQVJSQVkBAAAAAAE=", - "bound":"BELOW" - } - }, - { - "low":{ - "type":"boolean", - "valueBlock":"CgAAAEJZVEVfQVJSQVkBAAAAAAE=", - "bound":"ABOVE" - }, - "high":{ - "type":"boolean", - "bound":"BELOW" - } - } - ] - }, - "nullAllowed":false - } - } - ] - }, - "remainingPredicate":{ - "@type":"constant", - "valueBlock":"CgAAAEJZVEVfQVJSQVkBAAAAAAE=", - "type":"boolean" - }, - "predicateColumns":{ + "@type":"hive", + "name":"regionkey", + "hiveType":"bigint", + "typeSignature":"bigint", + "hiveColumnIndex":2, + "columnType":"REGULAR", + "requiredSubfields":[ + ] + } + ], + "partialAggregationsPushedDown":false, + "appendRowNumber":false, + "footerStatsUnreliable":false + } + }, + "outputVariables":[ + { + "@type":"variable", + "name":"regionkey", + "type":"bigint" }, - "partitionColumnPredicate":{ - "columnDomains":[ + { + "@type":"variable", + "name":"complex_type", + "type":"array(map(varchar, row(id bigint, description varchar)))" + } + ], + "assignments":{ + "regionkey":{ + "@type":"hive", + "name":"regionkey", + "hiveType":"bigint", + "typeSignature":"bigint", + "hiveColumnIndex":2, + "columnType":"REGULAR", + "requiredSubfields":[ ] }, - "pushdownFilterEnabled":true, - "layoutString":"tpch.nation{}", - "requestedColumns":[ - { - "@type":"hive", - "name":"regionkey", - "hiveType":"bigint", - "typeSignature":"bigint", - "hiveColumnIndex":2, - "columnType":"REGULAR", - "requiredSubfields":[ - - ] - } - ], - "partialAggregationsPushedDown":false, - "appendRowNumber":false, - "footerStatsUnreliable":false + "complex_type":{ + "@type":"hive", + "name":"complex_type", + "hiveType":"array>>", + "typeSignature":"array(map(varchar, row(id bigint, description varchar)))", + "hiveColumnIndex":3, + "columnType":"REGULAR", + "requiredSubfields":[ + "complex_type[1][\"foo\"].id", + "complex_type[2][\"bar\"].id" + ] + } } }, - "outputVariables":[ - { - "@type":"variable", - "name":"regionkey", - "type":"bigint" - }, - { - "@type":"variable", - "name":"complex_type", - "type":"array(map(varchar, row(id bigint, description varchar)))" - } - ], - "assignments":{ - "regionkey":{ - "@type":"hive", - "name":"regionkey", - "hiveType":"bigint", - "typeSignature":"bigint", - "hiveColumnIndex":2, - "columnType":"REGULAR", - "requiredSubfields":[ - - ] + "predicate": { + "@type" : "call", + "displayName" : "LESS_THAN", + "functionHandle" : { + "@type" : "$static", + "signature" : { + "name" : "presto.default.$operator$less_than", + "kind" : "SCALAR", + "typeVariableConstraints" : [ ], + "longVariableConstraints" : [ ], + "returnType" : "boolean", + "argumentTypes" : [ "double", "double" ], + "variableArity" : false + } }, - "complex_type":{ - "@type":"hive", - "name":"complex_type", - "hiveType":"array>>", - "typeSignature":"array(map(varchar, row(id bigint, description varchar)))", - "hiveColumnIndex":3, - "columnType":"REGULAR", - "requiredSubfields":[ - "complex_type[1][\"foo\"].id", - "complex_type[2][\"bar\"].id" - ] - } + "returnType" : "boolean", + "arguments" : [ { + "@type" : "call", + "displayName" : "rand", + "functionHandle" : { + "@type" : "$static", + "signature" : { + "name" : "presto.default.rand", + "kind" : "SCALAR", + "typeVariableConstraints" : [ ], + "longVariableConstraints" : [ ], + "returnType" : "double", + "argumentTypes" : [ ], + "variableArity" : false + } + }, + "returnType" : "double", + "arguments" : [ ] + }, { + "@type" : "constant", + "valueBlock" : "CgAAAExPTkdfQVJSQVkBAAAAAC1DHOviNho/", + "type" : "double" + } ] } }, "assignments":{ From 699a808d0a5ca3d4a7ad11e1b437ca5cb9950cdc Mon Sep 17 00:00:00 2001 From: Serge Druzkin Date: Tue, 1 Oct 2024 17:25:48 -0700 Subject: [PATCH 44/86] [presto-orc] Fix row index position when compression is disabled Summary: Fix a bug in position accounting in the OrcOutputBuffer when doing batched writeBytes. Differential Revision: D63733934 --- .../facebook/presto/orc/OrcOutputBuffer.java | 2 ++ .../presto/orc/TestOrcOutputBuffer.java | 21 ++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java b/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java index 6c87f1c8a622..0882a258f898 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java @@ -290,11 +290,13 @@ public void writeBytes(byte[] bytes, int bytesOffset, int length) // write maxBufferSize chunks directly to output if (length >= maxBufferSize) { flushBufferToOutputStream(); + int bytesOffsetBefore = bytesOffset; while (length >= maxBufferSize) { writeChunkToOutputStream(bytes, bytesOffset, maxBufferSize); length -= maxBufferSize; bytesOffset += maxBufferSize; } + bufferOffset += bytesOffset - bytesOffsetBefore; } // write the tail smaller than maxBufferSize to the buffer diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcOutputBuffer.java b/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcOutputBuffer.java index 77d1af62e1f4..c63fa0cda73e 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcOutputBuffer.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcOutputBuffer.java @@ -58,24 +58,43 @@ private enum WriteMode @Test public void testWriteHugeByteChucks() { + // compression buffer size should be at least 2x smaller than the size of test data int size = 1024 * 1024; byte[] largeByteArray = new byte[size]; Arrays.fill(largeByteArray, (byte) 0xA); - ColumnWriterOptions columnWriterOptions = ColumnWriterOptions.builder().setCompressionKind(CompressionKind.NONE).build(); + ColumnWriterOptions columnWriterOptions = ColumnWriterOptions.builder() + .setCompressionKind(CompressionKind.NONE) + .setCompressionMaxBufferSize(DataSize.valueOf("256kB")) + .build(); OrcOutputBuffer orcOutputBuffer = new OrcOutputBuffer(columnWriterOptions, Optional.empty()); + // write size-10 bytes from offset 10 DynamicSliceOutput output = new DynamicSliceOutput(size); orcOutputBuffer.writeBytes(largeByteArray, 10, size - 10); orcOutputBuffer.flush(); assertEquals(orcOutputBuffer.writeDataTo(output), size - 10); assertEquals(output.slice(), wrappedBuffer(largeByteArray, 10, size - 10)); + assertEquals(orcOutputBuffer.size(), size - 10); orcOutputBuffer.reset(); output.reset(); + + // write size-100 bytes from offset 100 orcOutputBuffer.writeBytes(wrappedBuffer(largeByteArray), 100, size - 100); orcOutputBuffer.flush(); assertEquals(orcOutputBuffer.writeDataTo(output), size - 100); assertEquals(output.slice(), wrappedBuffer(largeByteArray, 100, size - 100)); + assertEquals(orcOutputBuffer.size(), size - 100); + + orcOutputBuffer.reset(); + output.reset(); + + // write all bytes + orcOutputBuffer.writeBytes(wrappedBuffer(largeByteArray), 0, size); + orcOutputBuffer.flush(); + assertEquals(orcOutputBuffer.writeDataTo(output), size); + assertEquals(output.slice(), wrappedBuffer(largeByteArray)); + assertEquals(orcOutputBuffer.size(), size); } @Test From 8a026e5a734d01f69fae591983cbb364f29193ac Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Wed, 2 Oct 2024 09:53:39 -0700 Subject: [PATCH 45/86] Remove array_dupes and array_has_dupes alias. --- .../presto/operator/scalar/sql/ArraySqlFunctions.java | 4 ++-- .../presto/operator/scalar/sql/TestArraySqlFunctions.java | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArraySqlFunctions.java b/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArraySqlFunctions.java index b51855cc9a2e..2fc1218ea240 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArraySqlFunctions.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArraySqlFunctions.java @@ -69,7 +69,7 @@ public static String arrayFrequency() "m -> m)"; } - @SqlInvokedScalarFunction(value = "array_duplicates", alias = {"array_dupes"}, deterministic = true, calledOnNullInput = false) + @SqlInvokedScalarFunction(value = "array_duplicates", deterministic = true, calledOnNullInput = false) @Description("Returns set of elements that have duplicates") @SqlParameter(name = "input", type = "array(T)") @TypeParameter("T") @@ -81,7 +81,7 @@ public static String arrayDuplicates() "map_keys(map_filter(array_frequency(input), (k, v) -> v > 1)))"; } - @SqlInvokedScalarFunction(value = "array_has_duplicates", alias = {"array_has_dupes"}, deterministic = true, calledOnNullInput = false) + @SqlInvokedScalarFunction(value = "array_has_duplicates", deterministic = true, calledOnNullInput = false) @Description("Returns whether array has any duplicate element") @TypeParameter("T") @SqlParameter(name = "input", type = "array(T)") diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArraySqlFunctions.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArraySqlFunctions.java index bcc96309d96a..2e0e865f13be 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArraySqlFunctions.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArraySqlFunctions.java @@ -196,9 +196,6 @@ public void testArrayHasDuplicates() assertFunction("array_has_duplicates(array[0, null])", BOOLEAN, false); assertFunction("array_has_duplicates(array[0, null, null])", BOOLEAN, true); - // Test legacy name. - assertFunction("array_has_dupes(array[varchar 'a', varchar 'b', varchar 'a'])", BOOLEAN, true); - assertFunction("array_has_duplicates(array[array[1], array[2], array[]])", BOOLEAN, false); assertFunction("array_has_duplicates(array[array[1], array[2], array[2]])", BOOLEAN, true); assertFunction("array_has_duplicates(array[(1, 2), (1, 2)])", BOOLEAN, true); @@ -224,9 +221,6 @@ public void testArrayDuplicates() assertFunction("array_duplicates(array[0, null])", new ArrayType(INTEGER), ImmutableList.of()); assertFunction("array_duplicates(array[0, null, null])", new ArrayType(INTEGER), singletonList(null)); - // Test legacy name. - assertFunction("array_dupes(array[1, 2, 1])", new ArrayType(INTEGER), ImmutableList.of(1)); - RowType rowType = RowType.from(ImmutableList.of(RowType.field(INTEGER), RowType.field(INTEGER))); assertFunction("array_duplicates(array[array[1], array[2], array[]])", new ArrayType(new ArrayType(INTEGER)), ImmutableList.of()); assertFunction("array_duplicates(array[array[1], array[2], array[2]])", new ArrayType(new ArrayType(INTEGER)), ImmutableList.of(ImmutableList.of(2))); From 8c264599aa87b230c5b74d4548440afcb10d2248 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Tue, 23 Jul 2024 09:26:09 -0400 Subject: [PATCH 46/86] Expand block documentation --- .../facebook/presto/common/block/Block.java | 23 ++++++++++++++----- .../presto/common/block/DictionaryBlock.java | 18 +++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/presto-common/src/main/java/com/facebook/presto/common/block/Block.java b/presto-common/src/main/java/com/facebook/presto/common/block/Block.java index 42c33554d510..6b5379ba06eb 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/block/Block.java +++ b/presto-common/src/main/java/com/facebook/presto/common/block/Block.java @@ -25,7 +25,7 @@ /** * A block packs positionCount values into a chunk of memory. How the values are packed, * whether compression is used, endianness, and other implementation details are up to the subclasses. - * However, for purposes of API, you can think of a Block as a sequence of values that + * However, for purposes of API, you can think of a Block as a sequence of zero-indexed values that * can be read by calling the getter methods in this interface. For instance, * you can read positionCount bytes by calling * block.getByte(0), block.getByte(1), ... block.getByte(positionCount - 1). @@ -51,6 +51,8 @@ default int getSliceLength(int position) /** * Gets a byte in the value at {@code position}. + * + * @throws IllegalArgumentException if position is negative or greater than or equal to the positionCount */ default byte getByte(int position) { @@ -59,6 +61,8 @@ default byte getByte(int position) /** * Gets a short in the value at {@code position}. + * + * @throws IllegalArgumentException if position is negative or greater than or equal to the positionCount */ default short getShort(int position) { @@ -67,6 +71,8 @@ default short getShort(int position) /** * Gets an int in the value at {@code position}. + * + * @throws IllegalArgumentException if position is negative or greater than or equal to the positionCount */ default int getInt(int position) { @@ -75,6 +81,8 @@ default int getInt(int position) /** * Gets a long in the value at {@code position}. + * + * @throws IllegalArgumentException if position is negative or greater than or equal to the positionCount */ default long getLong(int position) { @@ -99,7 +107,8 @@ default Slice getSlice(int position, int offset, int length) /** * Gets a block in the value at {@code position}. - * @return + * + * @throws IllegalArgumentException if position is negative or greater than or equal to the positionCount */ default Block getBlock(int position) { @@ -107,7 +116,7 @@ default Block getBlock(int position) } /** - * Is the byte sequences at {@code offset} in the value at {@code position} equal + * Is the byte sequence at {@code offset} in the value at {@code position} equal * to the byte sequence at {@code otherOffset} in {@code otherSlice}. * This method must be implemented if @{code getSlice} is implemented. */ @@ -147,7 +156,7 @@ default void writeBytesTo(int position, int offset, int length, SliceOutput slic } /** - * Appends the value at {@code position} to {@code blockBuilder} and close the entry. + * Appends the value at {@code position} to {@code blockBuilder} and closes the entry. */ void writePositionTo(int position, BlockBuilder blockBuilder); @@ -378,12 +387,14 @@ default Block getLoadedBlock() Block appendNull(); /** - * Returns the converted long value at {@code position} if the value ar {@code position} can be converted to long. - * @throws UnsupportedOperationException if value at {@code position} is not compatible to be converted to long. + * Returns the converted long value at {@code position} if the value at {@code position} can be converted to long. * * Difference between toLong() and getLong() is: * getLong() would only return value when the block is LongArrayBlock, otherwise it would throw exception. * toLong() would return value for compatible types: LongArrayBlock, IntArrayBlock, ByteArrayBlock and ShortArrayBlock. + * + * @throws UnsupportedOperationException if value at {@code position} is not able to be converted to long. + * @throws IllegalArgumentException if position is negative or greater than or equal to the positionCount */ default long toLong(int position) { diff --git a/presto-common/src/main/java/com/facebook/presto/common/block/DictionaryBlock.java b/presto-common/src/main/java/com/facebook/presto/common/block/DictionaryBlock.java index 252f6db09eb1..9587f6c8bfc6 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/block/DictionaryBlock.java +++ b/presto-common/src/main/java/com/facebook/presto/common/block/DictionaryBlock.java @@ -36,6 +36,20 @@ import static java.lang.String.format; import static java.util.Objects.requireNonNull; +/** + * A dictionary holds positionCount values of arbitrary types. Usually some of these values are repeated, + * and the block wraps an underlying delegate block with fewer or no repeated values. + * This delegate block is called the "dictionary". + * The ids array contains positionCount indexes into the underlying delegate block. + * When value N is requested from this block instead of returning the value directly, + * it looks up the index of value N at ids[N]; then it returns the value in dictionary[ids[N]]. + * This compresses data when the same value repeats at multiple locations. + * + * Not every id in the ids array is a valid position in the block. + * Specify an offset in the ids array to indicate that IDs are only stored from that position forward. + * If the ids array is longer than offset+positionCount, then extra values to the right are not valid. + * That is, IDs are stored in a range of the array from offset to offset+positionCount-1 (inclusive). + */ public class DictionaryBlock implements Block { @@ -509,6 +523,10 @@ int[] getRawIds() return ids; } + /** + * @param position the position of the desired value in this block + * @return the position of the desired value in the underlying block this block wraps + */ public int getId(int position) { checkValidPosition(position, positionCount); From 8bd65b058e0bf7932c79ae2cf25a112106b448a8 Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Wed, 31 Jul 2024 09:39:29 -0700 Subject: [PATCH 47/86] Disable streaming aggregations. --- .../presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp index 28ca974c5b6a..0ff1356872aa 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp @@ -983,6 +983,11 @@ VeloxQueryPlanConverterBase::toVeloxQueryPlan( std::vector aggregateNames; std::vector aggregates; + VELOX_CHECK(node->preGroupedVariables.empty() + && node->groupingSets.groupingSetCount == 1 + && node->groupingSets.globalGroupingSets.empty(), + "Streaming aggregation is disabled"); + std::vector outputVariables; for (auto it = node->aggregations.begin(); it != node->aggregations.end(); it++) { From ceeab87b46d3b6193b5c1dbaad1e96dac938e53a Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Fri, 20 Sep 2024 16:10:10 -0700 Subject: [PATCH 48/86] Register dwrf reader/writer factories. --- .../presto_cpp/main/tests/TaskManagerTest.cpp | 6 ++++++ .../presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp b/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp index 928ed2d2ce9e..59c6d20d411a 100644 --- a/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp +++ b/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp @@ -30,6 +30,8 @@ #include "velox/dwio/common/WriterFactory.h" #include "velox/dwio/common/tests/utils/BatchMaker.h" #include "velox/dwio/dwrf/writer/Writer.h" +#include "velox/dwio/dwrf/RegisterDwrfReader.h" +#include "velox/dwio/dwrf/RegisterDwrfWriter.h" #include "velox/exec/Exchange.h" #include "velox/exec/Values.h" #include "velox/exec/tests/utils/PlanBuilder.h" @@ -215,6 +217,8 @@ class TaskManagerTest : public testing::Test { functions::prestosql::registerAllScalarFunctions(); aggregate::prestosql::registerAllAggregateFunctions(); parse::registerTypeResolver(); + dwrf::registerDwrfWriterFactory(); + dwrf::registerDwrfReaderFactory(); exec::ExchangeSource::registerFactory( [cpuExecutor = exchangeCpuExecutor_, ioExecutor = exchangeIoExecutor_, @@ -289,6 +293,8 @@ class TaskManagerTest : public testing::Test { connector::unregisterConnector(kHiveConnectorId); unregisterPrestoToVeloxConnector( connector::hive::HiveConnectorFactory::kHiveConnectorName); + dwrf::unregisterDwrfWriterFactory(); + dwrf::unregisterDwrfReaderFactory(); } std::vector makeVectors(int count, int rowsPerVector) { diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp index 0ff1356872aa..28ca974c5b6a 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp @@ -983,11 +983,6 @@ VeloxQueryPlanConverterBase::toVeloxQueryPlan( std::vector aggregateNames; std::vector aggregates; - VELOX_CHECK(node->preGroupedVariables.empty() - && node->groupingSets.groupingSetCount == 1 - && node->groupingSets.globalGroupingSets.empty(), - "Streaming aggregation is disabled"); - std::vector outputVariables; for (auto it = node->aggregations.begin(); it != node->aggregations.end(); it++) { From 1a8c27d95ee2cef49d624474a7eba7056be078b0 Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Wed, 2 Oct 2024 14:48:06 -0700 Subject: [PATCH 49/86] Fix formatting problem. --- .../presto_cpp/main/tests/TaskManagerTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp b/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp index 59c6d20d411a..8bd485bc2051 100644 --- a/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp +++ b/presto-native-execution/presto_cpp/main/tests/TaskManagerTest.cpp @@ -29,9 +29,9 @@ #include "velox/dwio/common/FileSink.h" #include "velox/dwio/common/WriterFactory.h" #include "velox/dwio/common/tests/utils/BatchMaker.h" -#include "velox/dwio/dwrf/writer/Writer.h" #include "velox/dwio/dwrf/RegisterDwrfReader.h" #include "velox/dwio/dwrf/RegisterDwrfWriter.h" +#include "velox/dwio/dwrf/writer/Writer.h" #include "velox/exec/Exchange.h" #include "velox/exec/Values.h" #include "velox/exec/tests/utils/PlanBuilder.h" From 64b5ebfef5f800d6e1a4594c7c7e5fb002284bcf Mon Sep 17 00:00:00 2001 From: kiersten-stokes Date: Wed, 25 Sep 2024 11:20:49 -0500 Subject: [PATCH 50/86] Fix REST session bug for Iceberg REST catalogs --- .../iceberg/IcebergNativeCatalogFactory.java | 8 +- .../rest/IcebergRestCatalogFactory.java | 154 ++++++++++++++++-- .../rest/TestIcebergDistributedRest.java | 35 +++- .../rest/IcebergRestCatalogServlet.java | 38 +++++ 4 files changed, 212 insertions(+), 23 deletions(-) diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeCatalogFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeCatalogFactory.java index 3890649d3659..d0a361184af1 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeCatalogFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeCatalogFactory.java @@ -48,7 +48,7 @@ */ public class IcebergNativeCatalogFactory { - private final Cache catalogCache; + protected final Cache catalogCache; private final String catalogName; protected final CatalogType catalogType; private final String catalogWarehouse; @@ -99,7 +99,7 @@ public SupportsNamespaces getNamespaces(ConnectorSession session) throw new PrestoException(NOT_SUPPORTED, "Iceberg catalog of type " + catalogType + " does not support namespace operations"); } - private String getCacheKey(ConnectorSession session) + protected String getCacheKey(ConnectorSession session) { StringBuilder sb = new StringBuilder(); sb.append(catalogName); @@ -112,7 +112,7 @@ protected Optional getCatalogCacheKey(ConnectorSession session) return Optional.empty(); } - private Map getProperties(ConnectorSession session) + protected Map getProperties(ConnectorSession session) { Map properties = new HashMap<>(); if (icebergConfig.getManifestCachingEnabled()) { @@ -134,7 +134,7 @@ protected Map getCatalogProperties(ConnectorSession session) return ImmutableMap.of(); } - private Configuration getHadoopConfiguration() + protected Configuration getHadoopConfiguration() { Configuration configuration = new Configuration(false); diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java index 771b8338f781..4a8be67c1c9d 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/rest/IcebergRestCatalogFactory.java @@ -20,21 +20,35 @@ import com.facebook.presto.iceberg.IcebergConfig; import com.facebook.presto.iceberg.IcebergNativeCatalogFactory; import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.security.ConnectorIdentity; import com.google.common.collect.ImmutableMap; +import com.google.common.util.concurrent.UncheckedExecutionException; import io.jsonwebtoken.Jwts; -import org.apache.iceberg.rest.auth.OAuth2Properties; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.SessionCatalog.SessionContext; +import org.apache.iceberg.rest.HTTPClient; +import org.apache.iceberg.rest.RESTCatalog; import javax.inject.Inject; import java.util.Date; import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ExecutionException; import static com.facebook.presto.iceberg.rest.AuthenticationType.OAUTH2; import static com.facebook.presto.iceberg.rest.SessionType.USER; +import static com.google.common.base.Throwables.throwIfInstanceOf; +import static com.google.common.base.Throwables.throwIfUnchecked; import static java.lang.String.format; import static java.util.Objects.requireNonNull; +import static java.util.UUID.randomUUID; import static org.apache.iceberg.CatalogProperties.URI; +import static org.apache.iceberg.CatalogUtil.configureHadoopConf; import static org.apache.iceberg.rest.auth.OAuth2Properties.CREDENTIAL; +import static org.apache.iceberg.rest.auth.OAuth2Properties.JWT_TOKEN_TYPE; import static org.apache.iceberg.rest.auth.OAuth2Properties.OAUTH2_SERVER_URI; import static org.apache.iceberg.rest.auth.OAuth2Properties.TOKEN; @@ -43,6 +57,7 @@ public class IcebergRestCatalogFactory { private final IcebergRestConfig catalogConfig; private final NodeVersion nodeVersion; + private final String catalogName; @Inject public IcebergRestCatalogFactory( @@ -56,6 +71,37 @@ public IcebergRestCatalogFactory( super(config, catalogName, s3ConfigurationUpdater, gcsConfigurationInitialize); this.catalogConfig = requireNonNull(catalogConfig, "catalogConfig is null"); this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null"); + this.catalogName = requireNonNull(catalogName, "catalogName is null").getCatalogName(); + } + + @Override + public Catalog getCatalog(ConnectorSession session) + { + try { + return catalogCache.get(getCacheKey(session), () -> { + RESTCatalog catalog = new RESTCatalog( + convertSession(session), + config -> HTTPClient.builder(config).uri(config.get(URI)).build()); + + configureHadoopConf(catalog, getHadoopConfiguration()); + catalog.initialize(catalogName, getProperties(session)); + return catalog; + }); + } + catch (ExecutionException | UncheckedExecutionException e) { + throwIfInstanceOf(e.getCause(), PrestoException.class); + throwIfUnchecked(e); + throw new UncheckedExecutionException(e); + } + } + + @Override + protected Optional getCatalogCacheKey(ConnectorSession session) + { + StringBuilder sb = new StringBuilder(); + catalogConfig.getSessionType().filter(type -> type.equals(USER)) + .ifPresent(type -> sb.append(session.getUser())); + return Optional.of(sb.toString()); } @Override @@ -81,24 +127,96 @@ protected Map getCatalogProperties(ConnectorSession session) } }); - catalogConfig.getSessionType().ifPresent(type -> { - if (type == USER) { - properties.putAll(session.getIdentity().getExtraCredentials()); - - String sessionId = format("%s-%s", session.getUser(), session.getSource().orElse("default")); - String jwt = Jwts.builder() - .setId(sessionId) - .setSubject(session.getUser()) - .setIssuedAt(new Date()) - .setIssuer(nodeVersion.toString()) - .claim("user", session.getUser()) - .claim("source", session.getSource().orElse("")) - .compact(); - - properties.put(OAuth2Properties.JWT_TOKEN_TYPE, jwt); - } - }); + catalogConfig.getSessionType().filter(type -> type.equals(USER)) + .ifPresent(type -> properties.put(CatalogProperties.USER, session.getUser())); return properties.build(); } + + protected SessionContext convertSession(ConnectorSession session) + { + RestSessionBuilder sessionContextBuilder = catalogConfig.getSessionType() + .filter(type -> type.equals(USER)) + .map(type -> { + String sessionId = format("%s-%s", session.getUser(), session.getSource().orElse("default")); + Map properties = ImmutableMap.of( + "user", session.getUser(), + "source", session.getSource().orElse(""), + "version", nodeVersion.toString()); + + String jwt = Jwts.builder() + .setSubject(session.getUser()) + .setIssuer(nodeVersion.toString()) + .setIssuedAt(new Date()) + .claim("user", session.getUser()) + .claim("source", session.getSource().orElse("")) + .compact(); + + ImmutableMap.Builder credentials = ImmutableMap.builder(); + credentials.put(JWT_TOKEN_TYPE, jwt).putAll(session.getIdentity().getExtraCredentials()); + + return builder(session).setSessionId(sessionId) + .setIdentity(session.getUser()) + .setCredentials(credentials.build()) + .setProperties(properties); + }).orElse(builder(session).setSessionId(randomUUID().toString())); + return sessionContextBuilder.build(); + } + + protected static class RestSessionBuilder + { + private String sessionId; + private String identity; + private Map properties; + private Map credentials; + private final ConnectorIdentity wrappedIdentity; + + private RestSessionBuilder(ConnectorSession session) + { + sessionId = null; + identity = null; + credentials = null; + properties = ImmutableMap.of(); + wrappedIdentity = session.getIdentity(); + } + + protected RestSessionBuilder setSessionId(String sessionId) + { + this.sessionId = sessionId; + return this; + } + + protected RestSessionBuilder setIdentity(String identity) + { + this.identity = identity; + return this; + } + + protected RestSessionBuilder setCredentials(Map credentials) + { + this.credentials = credentials; + return this; + } + + protected RestSessionBuilder setProperties(Map properties) + { + this.properties = properties; + return this; + } + + protected SessionContext build() + { + return new SessionContext( + sessionId, + identity, + credentials, + properties, + wrappedIdentity); + } + } + + protected static RestSessionBuilder builder(ConnectorSession session) + { + return new RestSessionBuilder(session); + } } diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergDistributedRest.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergDistributedRest.java index 3d306dc56c5c..06965eb0e32e 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergDistributedRest.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergDistributedRest.java @@ -14,7 +14,9 @@ package com.facebook.presto.iceberg.rest; import com.facebook.airlift.http.server.testing.TestingHttpServer; +import com.facebook.presto.Session; import com.facebook.presto.iceberg.IcebergDistributedTestBase; +import com.facebook.presto.spi.security.Identity; import com.facebook.presto.testing.QueryRunner; import com.google.common.collect.ImmutableMap; import org.assertj.core.util.Files; @@ -29,9 +31,11 @@ import static com.facebook.presto.iceberg.CatalogType.REST; import static com.facebook.presto.iceberg.FileFormat.PARQUET; +import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static com.facebook.presto.iceberg.IcebergQueryRunner.createIcebergQueryRunner; import static com.facebook.presto.iceberg.rest.IcebergRestTestUtil.getRestServer; import static com.facebook.presto.iceberg.rest.IcebergRestTestUtil.restConnectorProperties; +import static com.facebook.presto.testing.TestingSession.testSessionBuilder; import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -83,9 +87,14 @@ public void tearDown() protected QueryRunner createQueryRunner() throws Exception { + Map connectorProperties = ImmutableMap.builder() + .putAll(restConnectorProperties(serverUri)) + .put("iceberg.rest.session.type", SessionType.USER.name()) + .build(); + return createIcebergQueryRunner( ImmutableMap.of(), - restConnectorProperties(serverUri), + connectorProperties, PARQUET, true, false, @@ -101,4 +110,28 @@ public void testDeleteOnV1Table() .isInstanceOf(RuntimeException.class) .hasMessageMatching("Cannot downgrade v2 table to v1"); } + + @Test + public void testRestUserSessionAuthorization() + { + // Query with default user should succeed + assertQuerySucceeds(getSession(), "SHOW SCHEMAS"); + + String unauthorizedUser = "unauthorized_user"; + Session unauthorizedUserSession = testSessionBuilder() + .setCatalog(ICEBERG_CATALOG) + .setUserAgent(unauthorizedUser) + .setIdentity(new Identity( + unauthorizedUser, + Optional.empty(), + ImmutableMap.of(), + ImmutableMap.of(), + ImmutableMap.of(), + Optional.of(unauthorizedUser), + Optional.empty())) + .build(); + + // Query with different user should fail + assertQueryFails(unauthorizedUserSession, "SHOW SCHEMAS", "Forbidden: User not authorized"); + } } diff --git a/presto-iceberg/src/test/java/org/apache/iceberg/rest/IcebergRestCatalogServlet.java b/presto-iceberg/src/test/java/org/apache/iceberg/rest/IcebergRestCatalogServlet.java index e9aa46a58968..d05eb23fcda2 100644 --- a/presto-iceberg/src/test/java/org/apache/iceberg/rest/IcebergRestCatalogServlet.java +++ b/presto-iceberg/src/test/java/org/apache/iceberg/rest/IcebergRestCatalogServlet.java @@ -14,6 +14,9 @@ package org.apache.iceberg.rest; import com.facebook.airlift.log.Logger; +import io.jsonwebtoken.Claims; +import io.jsonwebtoken.Jwts; +import io.jsonwebtoken.MalformedJwtException; import org.apache.hc.core5.http.ContentType; import org.apache.hc.core5.http.HttpHeaders; import org.apache.iceberg.exceptions.RESTException; @@ -94,6 +97,14 @@ protected void execute(ServletRequestContext context, HttpServletResponse respon response.setStatus(HttpServletResponse.SC_OK); responseHeaders.forEach(response::setHeader); + String token = context.headers.get("Authorization"); + if (token != null && isRestUserSessionToken(token) && !isAuthorizedRestUserSessionToken(token)) { + context.errorResponse = ErrorResponse.builder() + .responseCode(HttpServletResponse.SC_FORBIDDEN) + .withMessage("User not authorized") + .build(); + } + if (context.error().isPresent()) { response.setStatus(HttpServletResponse.SC_BAD_REQUEST); RESTObjectMapper.mapper().writeValue(response.getWriter(), context.error().get()); @@ -145,6 +156,33 @@ protected Consumer handle(HttpServletResponse response) }; } + protected Claims getTokenClaims(String token) + { + token = token.replaceAll("Bearer token-exchange-token:sub=", ""); + return Jwts.parserBuilder().build().parseClaimsJwt(token).getBody(); + } + + protected boolean isRestUserSessionToken(String token) + { + try { + getTokenClaims(token); + } + catch (MalformedJwtException mje) { + // Not a json web token + return false; + } + return true; + } + + protected boolean isAuthorizedRestUserSessionToken(String jwt) + { + Claims jwtClaims = getTokenClaims(jwt); + return jwtClaims.getSubject().equals("user") && + jwtClaims.getIssuer().equals("testversion") && + jwtClaims.get("user").equals("user") && + jwtClaims.get("source").equals("test"); + } + public static class ServletRequestContext { private HTTPMethod method; From 751768a544ec65ce4bbfd474907b01afab2dff1c Mon Sep 17 00:00:00 2001 From: Serge Druzkin Date: Tue, 1 Oct 2024 12:50:47 -0700 Subject: [PATCH 51/86] Fix some toString methods in presto-orc (#23750) Summary: Pull Request resolved: https://github.com/prestodb/presto/pull/23750 Differential Revision: D63643493 --- .../main/java/com/facebook/presto/orc/ChunkedSliceOutput.java | 4 ++-- .../main/java/com/facebook/presto/orc/OrcOutputBuffer.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java b/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java index e21b93e4273c..e5142bc7a25e 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/ChunkedSliceOutput.java @@ -303,9 +303,9 @@ public String toString(Charset charset) @Override public String toString() { - StringBuilder builder = new StringBuilder("OutputStreamSliceOutputAdapter{"); + StringBuilder builder = new StringBuilder("ChunkedSliceOutput{"); builder.append("position=").append(size()); - builder.append("bufferSize=").append(slice.length()); + builder.append(", bufferSize=").append(slice.length()); builder.append('}'); return builder.toString(); } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java b/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java index 0882a258f898..60b5dd97f955 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/OrcOutputBuffer.java @@ -423,9 +423,9 @@ public String toString(Charset charset) @Override public String toString() { - StringBuilder builder = new StringBuilder("OutputStreamSliceOutputAdapter{"); + StringBuilder builder = new StringBuilder("OrcOutputBuffer{"); builder.append("outputStream=").append(compressedOutputStream); - builder.append("bufferSize=").append(slice.length()); + builder.append(", bufferSize=").append(slice.length()); builder.append('}'); return builder.toString(); } From 7c814aeeb5ab35e351f181fc465a92cca59d66de Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Sun, 22 Sep 2024 17:00:53 -0400 Subject: [PATCH 52/86] Don't generate code for Parquet readers; Make batchreader code available and findable in the source repository --- presto-parquet/pom.xml | 43 --- .../batchreader/BooleanFlatBatchReader.java | 257 +++++++++++++++++ .../batchreader/BooleanNestedBatchReader.java | 144 ++++++++++ .../batchreader/Int64FlatBatchReader.java | 257 +++++++++++++++++ .../batchreader/Int64NestedBatchReader.java | 144 ++++++++++ ...TimeAndTimestampMicrosFlatBatchReader.java | 257 +++++++++++++++++ ...meAndTimestampMicrosNestedBatchReader.java | 144 ++++++++++ .../LongDecimalFlatBatchReader.java | 258 ++++++++++++++++++ .../ShortDecimalFlatBatchReader.java | 257 +++++++++++++++++ .../batchreader/TimestampFlatBatchReader.java | 257 +++++++++++++++++ .../TimestampNestedBatchReader.java | 144 ++++++++++ 11 files changed, 2119 insertions(+), 43 deletions(-) create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanFlatBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanNestedBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64FlatBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64NestedBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosFlatBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosNestedBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/LongDecimalFlatBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/ShortDecimalFlatBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampFlatBatchReader.java create mode 100644 presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampNestedBatchReader.java diff --git a/presto-parquet/pom.xml b/presto-parquet/pom.xml index 09c9a45cfc47..7f2caa8e0b12 100644 --- a/presto-parquet/pom.xml +++ b/presto-parquet/pom.xml @@ -227,49 +227,6 @@ - - com.googlecode.fmpp-maven-plugin - fmpp-maven-plugin - 1.0 - - - net.sourceforge.fmpp - fmpp - 0.9.15 - - - - ${project.basedir}/src/main/resources/freemarker/config.fmpp - ${project.build.directory}/generated-sources/java/ - ${project.basedir}/src/main/resources/freemarker/templates - - - - generate-sources - - generate - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-source - generate-sources - - add-source - - - - ${project.build.directory}/generated-sources/java/ - - - - - diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanFlatBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanFlatBatchReader.java new file mode 100644 index 000000000000..a16af1f2cc1e --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanFlatBatchReader.java @@ -0,0 +1,257 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.ByteArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.ColumnReader; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders; +import com.facebook.presto.parquet.batchreader.decoders.FlatDefinitionLevelDecoder; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BooleanValuesDecoder; +import com.facebook.presto.parquet.batchreader.dictionary.Dictionaries; +import com.facebook.presto.parquet.dictionary.Dictionary; +import com.facebook.presto.parquet.reader.ColumnChunk; +import com.facebook.presto.parquet.reader.PageReader; +import com.facebook.presto.spi.PrestoException; +import org.apache.parquet.internal.filter2.columnindex.RowRanges; +import org.apache.parquet.io.ParquetDecodingException; +import org.openjdk.jol.info.ClassLayout; + +import java.io.IOException; +import java.util.Optional; + +import static com.facebook.presto.parquet.ParquetErrorCode.PARQUET_IO_READ_ERROR; +import static com.facebook.presto.parquet.batchreader.decoders.Decoders.readFlatPage; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class BooleanFlatBatchReader + implements ColumnReader +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(BooleanFlatBatchReader.class).instanceSize(); + + private final RichColumnDescriptor columnDescriptor; + + protected Field field; + protected int nextBatchSize; + protected FlatDefinitionLevelDecoder definitionLevelDecoder; + protected BooleanValuesDecoder valuesDecoder; + protected int remainingCountInPage; + + private Dictionary dictionary; + private int readOffset; + private PageReader pageReader; + + public BooleanFlatBatchReader(RichColumnDescriptor columnDescriptor) + { + this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor is null"); + } + + @Override + public boolean isInitialized() + { + return pageReader != null && field != null; + } + + @Override + public void init(PageReader pageReader, Field field, RowRanges rowRanges) + { + checkArgument(!isInitialized(), "Parquet batch reader already initialized"); + this.pageReader = requireNonNull(pageReader, "pageReader is null"); + checkArgument(pageReader.getValueCountInColumnChunk() > 0, "page is empty"); + this.field = requireNonNull(field, "field is null"); + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage); + } + } + + @Override + public void prepareNextRead(int batchSize) + { + readOffset = readOffset + nextBatchSize; + nextBatchSize = batchSize; + } + + @Override + public ColumnChunk readNext() + { + ColumnChunk columnChunk = null; + try { + seek(); + if (field.isRequired()) { + columnChunk = readWithoutNull(); + } + else { + columnChunk = readWithNull(); + } + } + catch (IOException exception) { + throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception); + } + + readOffset = 0; + nextBatchSize = 0; + return columnChunk; + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + (definitionLevelDecoder == null ? 0 : definitionLevelDecoder.getRetainedSizeInBytes()) + + (valuesDecoder == null ? 0 : valuesDecoder.getRetainedSizeInBytes()) + + (dictionary == null ? 0 : dictionary.getRetainedSizeInBytes()) + + (pageReader == null ? 0 : pageReader.getRetainedSizeInBytes()); + } + + protected boolean readNextPage() + { + definitionLevelDecoder = null; + valuesDecoder = null; + remainingCountInPage = 0; + + DataPage page = pageReader.readPage(); + if (page == null) { + return false; + } + + FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary); + definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder(); + valuesDecoder = (BooleanValuesDecoder) flatDecoders.getValuesDecoder(); + + remainingCountInPage = page.getValueCount(); + return true; + } + + private ColumnChunk readWithNull() + throws IOException + { + byte[] values = new byte[nextBatchSize]; + boolean[] isNull = new boolean[nextBatchSize]; + + int totalNonNullCount = 0; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + totalNonNullCount += nonNullCount; + + if (nonNullCount > 0) { + valuesDecoder.readNext(values, startOffset, nonNullCount); + + int valueDestinationIndex = startOffset + chunkSize - 1; + int valueSourceIndex = startOffset + nonNullCount - 1; + + while (valueDestinationIndex >= startOffset) { + if (!isNull[valueDestinationIndex]) { + values[valueDestinationIndex] = values[valueSourceIndex]; + valueSourceIndex--; + } + valueDestinationIndex--; + } + } + + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException("Still remaining to be read in current batch."); + } + + if (totalNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize); + return new ColumnChunk(block, new int[0], new int[0]); + } + + boolean hasNoNull = totalNonNullCount == nextBatchSize; + Block block = new ByteArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private ColumnChunk readWithoutNull() + throws IOException + { + byte[] values = new byte[nextBatchSize]; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + + valuesDecoder.readNext(values, startOffset, chunkSize); + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch)); + } + + Block block = new ByteArrayBlock(nextBatchSize, Optional.empty(), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + + int remainingInBatch = readOffset; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int skipSize = chunkSize; + if (!columnDescriptor.isRequired()) { + boolean[] isNull = new boolean[readOffset]; + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + skipSize = nonNullCount; + startOffset += chunkSize; + } + valuesDecoder.skip(skipSize); + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanNestedBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanNestedBatchReader.java new file mode 100644 index 000000000000..f89ea02b2189 --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/BooleanNestedBatchReader.java @@ -0,0 +1,144 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.ByteArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BooleanValuesDecoder; +import com.facebook.presto.parquet.reader.ColumnChunk; + +import java.io.IOException; +import java.util.Optional; + +public class BooleanNestedBatchReader + extends AbstractNestedBatchReader +{ + public BooleanNestedBatchReader(RichColumnDescriptor columnDescriptor) + { + super(columnDescriptor); + } + + @Override + protected ColumnChunk readNestedWithNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + int batchNonNullCount = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int nonNullCount = 0; + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + nonNullCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + valueCount += (definitionLevels[i] >= maxDefinitionLevel - 1 ? 1 : 0); + } + batchNonNullCount += nonNullCount; + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(nonNullCount); + valuesDecoderContext.setValueCount(valueCount); + } + + if (batchNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, newBatchSize); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + byte[] values = new byte[newBatchSize]; + boolean[] isNull = new boolean[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((BooleanValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + + int valueDestinationIndex = offset + valuesDecoderContext.getValueCount() - 1; + int valueSourceIndex = offset + valuesDecoderContext.getNonNullCount() - 1; + int definitionLevelIndex = valuesDecoderContext.getEnd() - 1; + + while (valueDestinationIndex >= offset) { + if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel) { + values[valueDestinationIndex--] = values[valueSourceIndex--]; + } + else if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel - 1) { + values[valueDestinationIndex] = 0; + isNull[valueDestinationIndex] = true; + valueDestinationIndex--; + } + definitionLevelIndex--; + } + offset += valuesDecoderContext.getValueCount(); + } + + boolean hasNoNull = batchNonNullCount == newBatchSize; + Block block = new ByteArrayBlock(newBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected ColumnChunk readNestedNoNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(valueCount); + valuesDecoderContext.setValueCount(valueCount); + } + + byte[] values = new byte[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((BooleanValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + offset += valuesDecoderContext.getValueCount(); + } + + Block block = new ByteArrayBlock(newBatchSize, Optional.empty(), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(readOffset); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + BooleanValuesDecoder intValuesDecoder = (BooleanValuesDecoder) valuesDecoderContext.getValuesDecoder(); + intValuesDecoder.skip(valueCount); + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64FlatBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64FlatBatchReader.java new file mode 100644 index 000000000000..2c97056ddda9 --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64FlatBatchReader.java @@ -0,0 +1,257 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.LongArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.ColumnReader; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders; +import com.facebook.presto.parquet.batchreader.decoders.FlatDefinitionLevelDecoder; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.Int64ValuesDecoder; +import com.facebook.presto.parquet.batchreader.dictionary.Dictionaries; +import com.facebook.presto.parquet.dictionary.Dictionary; +import com.facebook.presto.parquet.reader.ColumnChunk; +import com.facebook.presto.parquet.reader.PageReader; +import com.facebook.presto.spi.PrestoException; +import org.apache.parquet.internal.filter2.columnindex.RowRanges; +import org.apache.parquet.io.ParquetDecodingException; +import org.openjdk.jol.info.ClassLayout; + +import java.io.IOException; +import java.util.Optional; + +import static com.facebook.presto.parquet.ParquetErrorCode.PARQUET_IO_READ_ERROR; +import static com.facebook.presto.parquet.batchreader.decoders.Decoders.readFlatPage; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class Int64FlatBatchReader + implements ColumnReader +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(Int64FlatBatchReader.class).instanceSize(); + + private final RichColumnDescriptor columnDescriptor; + + protected Field field; + protected int nextBatchSize; + protected FlatDefinitionLevelDecoder definitionLevelDecoder; + protected Int64ValuesDecoder valuesDecoder; + protected int remainingCountInPage; + + private Dictionary dictionary; + private int readOffset; + private PageReader pageReader; + + public Int64FlatBatchReader(RichColumnDescriptor columnDescriptor) + { + this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor is null"); + } + + @Override + public boolean isInitialized() + { + return pageReader != null && field != null; + } + + @Override + public void init(PageReader pageReader, Field field, RowRanges rowRanges) + { + checkArgument(!isInitialized(), "Parquet batch reader already initialized"); + this.pageReader = requireNonNull(pageReader, "pageReader is null"); + checkArgument(pageReader.getValueCountInColumnChunk() > 0, "page is empty"); + this.field = requireNonNull(field, "field is null"); + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage); + } + } + + @Override + public void prepareNextRead(int batchSize) + { + readOffset = readOffset + nextBatchSize; + nextBatchSize = batchSize; + } + + @Override + public ColumnChunk readNext() + { + ColumnChunk columnChunk = null; + try { + seek(); + if (field.isRequired()) { + columnChunk = readWithoutNull(); + } + else { + columnChunk = readWithNull(); + } + } + catch (IOException exception) { + throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception); + } + + readOffset = 0; + nextBatchSize = 0; + return columnChunk; + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + (definitionLevelDecoder == null ? 0 : definitionLevelDecoder.getRetainedSizeInBytes()) + + (valuesDecoder == null ? 0 : valuesDecoder.getRetainedSizeInBytes()) + + (dictionary == null ? 0 : dictionary.getRetainedSizeInBytes()) + + (pageReader == null ? 0 : pageReader.getRetainedSizeInBytes()); + } + + protected boolean readNextPage() + { + definitionLevelDecoder = null; + valuesDecoder = null; + remainingCountInPage = 0; + + DataPage page = pageReader.readPage(); + if (page == null) { + return false; + } + + FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary); + definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder(); + valuesDecoder = (Int64ValuesDecoder) flatDecoders.getValuesDecoder(); + + remainingCountInPage = page.getValueCount(); + return true; + } + + private ColumnChunk readWithNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + boolean[] isNull = new boolean[nextBatchSize]; + + int totalNonNullCount = 0; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + totalNonNullCount += nonNullCount; + + if (nonNullCount > 0) { + valuesDecoder.readNext(values, startOffset, nonNullCount); + + int valueDestinationIndex = startOffset + chunkSize - 1; + int valueSourceIndex = startOffset + nonNullCount - 1; + + while (valueDestinationIndex >= startOffset) { + if (!isNull[valueDestinationIndex]) { + values[valueDestinationIndex] = values[valueSourceIndex]; + valueSourceIndex--; + } + valueDestinationIndex--; + } + } + + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException("Still remaining to be read in current batch."); + } + + if (totalNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize); + return new ColumnChunk(block, new int[0], new int[0]); + } + + boolean hasNoNull = totalNonNullCount == nextBatchSize; + Block block = new LongArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private ColumnChunk readWithoutNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + + valuesDecoder.readNext(values, startOffset, chunkSize); + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch)); + } + + Block block = new LongArrayBlock(nextBatchSize, Optional.empty(), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + + int remainingInBatch = readOffset; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int skipSize = chunkSize; + if (!columnDescriptor.isRequired()) { + boolean[] isNull = new boolean[readOffset]; + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + skipSize = nonNullCount; + startOffset += chunkSize; + } + valuesDecoder.skip(skipSize); + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64NestedBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64NestedBatchReader.java new file mode 100644 index 000000000000..fea6186ad612 --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64NestedBatchReader.java @@ -0,0 +1,144 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.LongArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.Int64ValuesDecoder; +import com.facebook.presto.parquet.reader.ColumnChunk; + +import java.io.IOException; +import java.util.Optional; + +public class Int64NestedBatchReader + extends AbstractNestedBatchReader +{ + public Int64NestedBatchReader(RichColumnDescriptor columnDescriptor) + { + super(columnDescriptor); + } + + @Override + protected ColumnChunk readNestedWithNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + int batchNonNullCount = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int nonNullCount = 0; + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + nonNullCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + valueCount += (definitionLevels[i] >= maxDefinitionLevel - 1 ? 1 : 0); + } + batchNonNullCount += nonNullCount; + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(nonNullCount); + valuesDecoderContext.setValueCount(valueCount); + } + + if (batchNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, newBatchSize); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + long[] values = new long[newBatchSize]; + boolean[] isNull = new boolean[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((Int64ValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + + int valueDestinationIndex = offset + valuesDecoderContext.getValueCount() - 1; + int valueSourceIndex = offset + valuesDecoderContext.getNonNullCount() - 1; + int definitionLevelIndex = valuesDecoderContext.getEnd() - 1; + + while (valueDestinationIndex >= offset) { + if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel) { + values[valueDestinationIndex--] = values[valueSourceIndex--]; + } + else if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel - 1) { + values[valueDestinationIndex] = 0; + isNull[valueDestinationIndex] = true; + valueDestinationIndex--; + } + definitionLevelIndex--; + } + offset += valuesDecoderContext.getValueCount(); + } + + boolean hasNoNull = batchNonNullCount == newBatchSize; + Block block = new LongArrayBlock(newBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected ColumnChunk readNestedNoNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(valueCount); + valuesDecoderContext.setValueCount(valueCount); + } + + long[] values = new long[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((Int64ValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + offset += valuesDecoderContext.getValueCount(); + } + + Block block = new LongArrayBlock(newBatchSize, Optional.empty(), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(readOffset); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + Int64ValuesDecoder intValuesDecoder = (Int64ValuesDecoder) valuesDecoderContext.getValuesDecoder(); + intValuesDecoder.skip(valueCount); + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosFlatBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosFlatBatchReader.java new file mode 100644 index 000000000000..758df93fa44f --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosFlatBatchReader.java @@ -0,0 +1,257 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.LongArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.ColumnReader; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders; +import com.facebook.presto.parquet.batchreader.decoders.FlatDefinitionLevelDecoder; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.Int64TimeAndTimestampMicrosValuesDecoder; +import com.facebook.presto.parquet.batchreader.dictionary.Dictionaries; +import com.facebook.presto.parquet.dictionary.Dictionary; +import com.facebook.presto.parquet.reader.ColumnChunk; +import com.facebook.presto.parquet.reader.PageReader; +import com.facebook.presto.spi.PrestoException; +import org.apache.parquet.internal.filter2.columnindex.RowRanges; +import org.apache.parquet.io.ParquetDecodingException; +import org.openjdk.jol.info.ClassLayout; + +import java.io.IOException; +import java.util.Optional; + +import static com.facebook.presto.parquet.ParquetErrorCode.PARQUET_IO_READ_ERROR; +import static com.facebook.presto.parquet.batchreader.decoders.Decoders.readFlatPage; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class Int64TimeAndTimestampMicrosFlatBatchReader + implements ColumnReader +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(Int64TimeAndTimestampMicrosFlatBatchReader.class).instanceSize(); + + private final RichColumnDescriptor columnDescriptor; + + protected Field field; + protected int nextBatchSize; + protected FlatDefinitionLevelDecoder definitionLevelDecoder; + protected Int64TimeAndTimestampMicrosValuesDecoder valuesDecoder; + protected int remainingCountInPage; + + private Dictionary dictionary; + private int readOffset; + private PageReader pageReader; + + public Int64TimeAndTimestampMicrosFlatBatchReader(RichColumnDescriptor columnDescriptor) + { + this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor is null"); + } + + @Override + public boolean isInitialized() + { + return pageReader != null && field != null; + } + + @Override + public void init(PageReader pageReader, Field field, RowRanges rowRanges) + { + checkArgument(!isInitialized(), "Parquet batch reader already initialized"); + this.pageReader = requireNonNull(pageReader, "pageReader is null"); + checkArgument(pageReader.getValueCountInColumnChunk() > 0, "page is empty"); + this.field = requireNonNull(field, "field is null"); + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage); + } + } + + @Override + public void prepareNextRead(int batchSize) + { + readOffset = readOffset + nextBatchSize; + nextBatchSize = batchSize; + } + + @Override + public ColumnChunk readNext() + { + ColumnChunk columnChunk = null; + try { + seek(); + if (field.isRequired()) { + columnChunk = readWithoutNull(); + } + else { + columnChunk = readWithNull(); + } + } + catch (IOException exception) { + throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception); + } + + readOffset = 0; + nextBatchSize = 0; + return columnChunk; + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + (definitionLevelDecoder == null ? 0 : definitionLevelDecoder.getRetainedSizeInBytes()) + + (valuesDecoder == null ? 0 : valuesDecoder.getRetainedSizeInBytes()) + + (dictionary == null ? 0 : dictionary.getRetainedSizeInBytes()) + + (pageReader == null ? 0 : pageReader.getRetainedSizeInBytes()); + } + + protected boolean readNextPage() + { + definitionLevelDecoder = null; + valuesDecoder = null; + remainingCountInPage = 0; + + DataPage page = pageReader.readPage(); + if (page == null) { + return false; + } + + FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary); + definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder(); + valuesDecoder = (Int64TimeAndTimestampMicrosValuesDecoder) flatDecoders.getValuesDecoder(); + + remainingCountInPage = page.getValueCount(); + return true; + } + + private ColumnChunk readWithNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + boolean[] isNull = new boolean[nextBatchSize]; + + int totalNonNullCount = 0; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + totalNonNullCount += nonNullCount; + + if (nonNullCount > 0) { + valuesDecoder.readNext(values, startOffset, nonNullCount); + + int valueDestinationIndex = startOffset + chunkSize - 1; + int valueSourceIndex = startOffset + nonNullCount - 1; + + while (valueDestinationIndex >= startOffset) { + if (!isNull[valueDestinationIndex]) { + values[valueDestinationIndex] = values[valueSourceIndex]; + valueSourceIndex--; + } + valueDestinationIndex--; + } + } + + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException("Still remaining to be read in current batch."); + } + + if (totalNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize); + return new ColumnChunk(block, new int[0], new int[0]); + } + + boolean hasNoNull = totalNonNullCount == nextBatchSize; + Block block = new LongArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private ColumnChunk readWithoutNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + + valuesDecoder.readNext(values, startOffset, chunkSize); + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch)); + } + + Block block = new LongArrayBlock(nextBatchSize, Optional.empty(), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + + int remainingInBatch = readOffset; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int skipSize = chunkSize; + if (!columnDescriptor.isRequired()) { + boolean[] isNull = new boolean[readOffset]; + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + skipSize = nonNullCount; + startOffset += chunkSize; + } + valuesDecoder.skip(skipSize); + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosNestedBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosNestedBatchReader.java new file mode 100644 index 000000000000..c9a96ef85b7d --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/Int64TimeAndTimestampMicrosNestedBatchReader.java @@ -0,0 +1,144 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.LongArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.Int64TimeAndTimestampMicrosValuesDecoder; +import com.facebook.presto.parquet.reader.ColumnChunk; + +import java.io.IOException; +import java.util.Optional; + +public class Int64TimeAndTimestampMicrosNestedBatchReader + extends AbstractNestedBatchReader +{ + public Int64TimeAndTimestampMicrosNestedBatchReader(RichColumnDescriptor columnDescriptor) + { + super(columnDescriptor); + } + + @Override + protected ColumnChunk readNestedWithNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + int batchNonNullCount = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int nonNullCount = 0; + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + nonNullCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + valueCount += (definitionLevels[i] >= maxDefinitionLevel - 1 ? 1 : 0); + } + batchNonNullCount += nonNullCount; + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(nonNullCount); + valuesDecoderContext.setValueCount(valueCount); + } + + if (batchNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, newBatchSize); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + long[] values = new long[newBatchSize]; + boolean[] isNull = new boolean[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((Int64TimeAndTimestampMicrosValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + + int valueDestinationIndex = offset + valuesDecoderContext.getValueCount() - 1; + int valueSourceIndex = offset + valuesDecoderContext.getNonNullCount() - 1; + int definitionLevelIndex = valuesDecoderContext.getEnd() - 1; + + while (valueDestinationIndex >= offset) { + if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel) { + values[valueDestinationIndex--] = values[valueSourceIndex--]; + } + else if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel - 1) { + values[valueDestinationIndex] = 0; + isNull[valueDestinationIndex] = true; + valueDestinationIndex--; + } + definitionLevelIndex--; + } + offset += valuesDecoderContext.getValueCount(); + } + + boolean hasNoNull = batchNonNullCount == newBatchSize; + Block block = new LongArrayBlock(newBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected ColumnChunk readNestedNoNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(valueCount); + valuesDecoderContext.setValueCount(valueCount); + } + + long[] values = new long[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((Int64TimeAndTimestampMicrosValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + offset += valuesDecoderContext.getValueCount(); + } + + Block block = new LongArrayBlock(newBatchSize, Optional.empty(), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(readOffset); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + Int64TimeAndTimestampMicrosValuesDecoder intValuesDecoder = (Int64TimeAndTimestampMicrosValuesDecoder) valuesDecoderContext.getValuesDecoder(); + intValuesDecoder.skip(valueCount); + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/LongDecimalFlatBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/LongDecimalFlatBatchReader.java new file mode 100644 index 000000000000..478dee49e0e4 --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/LongDecimalFlatBatchReader.java @@ -0,0 +1,258 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.Int128ArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.ColumnReader; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders; +import com.facebook.presto.parquet.batchreader.decoders.FlatDefinitionLevelDecoder; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.LongDecimalValuesDecoder; +import com.facebook.presto.parquet.batchreader.dictionary.Dictionaries; +import com.facebook.presto.parquet.dictionary.Dictionary; +import com.facebook.presto.parquet.reader.ColumnChunk; +import com.facebook.presto.parquet.reader.PageReader; +import com.facebook.presto.spi.PrestoException; +import org.apache.parquet.internal.filter2.columnindex.RowRanges; +import org.apache.parquet.io.ParquetDecodingException; +import org.openjdk.jol.info.ClassLayout; + +import java.io.IOException; +import java.util.Optional; + +import static com.facebook.presto.parquet.ParquetErrorCode.PARQUET_IO_READ_ERROR; +import static com.facebook.presto.parquet.batchreader.decoders.Decoders.readFlatPage; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class LongDecimalFlatBatchReader + implements ColumnReader +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(LongDecimalFlatBatchReader.class).instanceSize(); + + private final RichColumnDescriptor columnDescriptor; + + protected Field field; + protected int nextBatchSize; + protected FlatDefinitionLevelDecoder definitionLevelDecoder; + protected LongDecimalValuesDecoder valuesDecoder; + protected int remainingCountInPage; + + private Dictionary dictionary; + private int readOffset; + private PageReader pageReader; + + public LongDecimalFlatBatchReader(RichColumnDescriptor columnDescriptor) + { + this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor is null"); + } + + @Override + public boolean isInitialized() + { + return pageReader != null && field != null; + } + + @Override + public void init(PageReader pageReader, Field field, RowRanges rowRanges) + { + checkArgument(!isInitialized(), "Parquet batch reader already initialized"); + this.pageReader = requireNonNull(pageReader, "pageReader is null"); + checkArgument(pageReader.getValueCountInColumnChunk() > 0, "page is empty"); + this.field = requireNonNull(field, "field is null"); + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage); + } + } + + @Override + public void prepareNextRead(int batchSize) + { + readOffset = readOffset + nextBatchSize; + nextBatchSize = batchSize; + } + + @Override + public ColumnChunk readNext() + { + ColumnChunk columnChunk = null; + try { + seek(); + if (field.isRequired()) { + columnChunk = readWithoutNull(); + } + else { + columnChunk = readWithNull(); + } + } + catch (IOException exception) { + throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception); + } + + readOffset = 0; + nextBatchSize = 0; + return columnChunk; + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + (definitionLevelDecoder == null ? 0 : definitionLevelDecoder.getRetainedSizeInBytes()) + + (valuesDecoder == null ? 0 : valuesDecoder.getRetainedSizeInBytes()) + + (dictionary == null ? 0 : dictionary.getRetainedSizeInBytes()) + + (pageReader == null ? 0 : pageReader.getRetainedSizeInBytes()); + } + + protected boolean readNextPage() + { + definitionLevelDecoder = null; + valuesDecoder = null; + remainingCountInPage = 0; + + DataPage page = pageReader.readPage(); + if (page == null) { + return false; + } + + FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary); + definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder(); + valuesDecoder = (LongDecimalValuesDecoder) flatDecoders.getValuesDecoder(); + + remainingCountInPage = page.getValueCount(); + return true; + } + + private ColumnChunk readWithNull() + throws IOException + { + long[] values = new long[nextBatchSize * 2]; + boolean[] isNull = new boolean[nextBatchSize]; + + int totalNonNullCount = 0; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + totalNonNullCount += nonNullCount; + + if (nonNullCount > 0) { + valuesDecoder.readNext(values, startOffset, nonNullCount); + + int valueDestinationIndex = startOffset + chunkSize - 1; + int valueSourceIndex = startOffset + nonNullCount - 1; + + while (valueDestinationIndex >= startOffset) { + if (!isNull[valueDestinationIndex]) { + values[valueDestinationIndex * 2 + 1] = values[valueSourceIndex * 2 + 1]; + values[valueDestinationIndex * 2] = values[valueSourceIndex * 2]; + valueSourceIndex--; + } + valueDestinationIndex--; + } + } + + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException("Still remaining to be read in current batch."); + } + + if (totalNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize); + return new ColumnChunk(block, new int[0], new int[0]); + } + + boolean hasNoNull = totalNonNullCount == nextBatchSize; + Block block = new Int128ArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private ColumnChunk readWithoutNull() + throws IOException + { + long[] values = new long[nextBatchSize * 2]; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + + valuesDecoder.readNext(values, startOffset, chunkSize); + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch)); + } + + Block block = new Int128ArrayBlock(nextBatchSize, Optional.empty(), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + + int remainingInBatch = readOffset; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int skipSize = chunkSize; + if (!columnDescriptor.isRequired()) { + boolean[] isNull = new boolean[readOffset]; + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + skipSize = nonNullCount; + startOffset += chunkSize; + } + valuesDecoder.skip(skipSize); + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/ShortDecimalFlatBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/ShortDecimalFlatBatchReader.java new file mode 100644 index 000000000000..db3dff192a24 --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/ShortDecimalFlatBatchReader.java @@ -0,0 +1,257 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.LongArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.ColumnReader; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders; +import com.facebook.presto.parquet.batchreader.decoders.FlatDefinitionLevelDecoder; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.ShortDecimalValuesDecoder; +import com.facebook.presto.parquet.batchreader.dictionary.Dictionaries; +import com.facebook.presto.parquet.dictionary.Dictionary; +import com.facebook.presto.parquet.reader.ColumnChunk; +import com.facebook.presto.parquet.reader.PageReader; +import com.facebook.presto.spi.PrestoException; +import org.apache.parquet.internal.filter2.columnindex.RowRanges; +import org.apache.parquet.io.ParquetDecodingException; +import org.openjdk.jol.info.ClassLayout; + +import java.io.IOException; +import java.util.Optional; + +import static com.facebook.presto.parquet.ParquetErrorCode.PARQUET_IO_READ_ERROR; +import static com.facebook.presto.parquet.batchreader.decoders.Decoders.readFlatPage; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class ShortDecimalFlatBatchReader + implements ColumnReader +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(ShortDecimalFlatBatchReader.class).instanceSize(); + + private final RichColumnDescriptor columnDescriptor; + + protected Field field; + protected int nextBatchSize; + protected FlatDefinitionLevelDecoder definitionLevelDecoder; + protected ShortDecimalValuesDecoder valuesDecoder; + protected int remainingCountInPage; + + private Dictionary dictionary; + private int readOffset; + private PageReader pageReader; + + public ShortDecimalFlatBatchReader(RichColumnDescriptor columnDescriptor) + { + this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor is null"); + } + + @Override + public boolean isInitialized() + { + return pageReader != null && field != null; + } + + @Override + public void init(PageReader pageReader, Field field, RowRanges rowRanges) + { + checkArgument(!isInitialized(), "Parquet batch reader already initialized"); + this.pageReader = requireNonNull(pageReader, "pageReader is null"); + checkArgument(pageReader.getValueCountInColumnChunk() > 0, "page is empty"); + this.field = requireNonNull(field, "field is null"); + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage); + } + } + + @Override + public void prepareNextRead(int batchSize) + { + readOffset = readOffset + nextBatchSize; + nextBatchSize = batchSize; + } + + @Override + public ColumnChunk readNext() + { + ColumnChunk columnChunk = null; + try { + seek(); + if (field.isRequired()) { + columnChunk = readWithoutNull(); + } + else { + columnChunk = readWithNull(); + } + } + catch (IOException exception) { + throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception); + } + + readOffset = 0; + nextBatchSize = 0; + return columnChunk; + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + (definitionLevelDecoder == null ? 0 : definitionLevelDecoder.getRetainedSizeInBytes()) + + (valuesDecoder == null ? 0 : valuesDecoder.getRetainedSizeInBytes()) + + (dictionary == null ? 0 : dictionary.getRetainedSizeInBytes()) + + (pageReader == null ? 0 : pageReader.getRetainedSizeInBytes()); + } + + protected boolean readNextPage() + { + definitionLevelDecoder = null; + valuesDecoder = null; + remainingCountInPage = 0; + + DataPage page = pageReader.readPage(); + if (page == null) { + return false; + } + + FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary); + definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder(); + valuesDecoder = (ShortDecimalValuesDecoder) flatDecoders.getValuesDecoder(); + + remainingCountInPage = page.getValueCount(); + return true; + } + + private ColumnChunk readWithNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + boolean[] isNull = new boolean[nextBatchSize]; + + int totalNonNullCount = 0; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + totalNonNullCount += nonNullCount; + + if (nonNullCount > 0) { + valuesDecoder.readNext(values, startOffset, nonNullCount); + + int valueDestinationIndex = startOffset + chunkSize - 1; + int valueSourceIndex = startOffset + nonNullCount - 1; + + while (valueDestinationIndex >= startOffset) { + if (!isNull[valueDestinationIndex]) { + values[valueDestinationIndex] = values[valueSourceIndex]; + valueSourceIndex--; + } + valueDestinationIndex--; + } + } + + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException("Still remaining to be read in current batch."); + } + + if (totalNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize); + return new ColumnChunk(block, new int[0], new int[0]); + } + + boolean hasNoNull = totalNonNullCount == nextBatchSize; + Block block = new LongArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private ColumnChunk readWithoutNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + + valuesDecoder.readNext(values, startOffset, chunkSize); + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch)); + } + + Block block = new LongArrayBlock(nextBatchSize, Optional.empty(), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + + int remainingInBatch = readOffset; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int skipSize = chunkSize; + if (!columnDescriptor.isRequired()) { + boolean[] isNull = new boolean[readOffset]; + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + skipSize = nonNullCount; + startOffset += chunkSize; + } + valuesDecoder.skip(skipSize); + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampFlatBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampFlatBatchReader.java new file mode 100644 index 000000000000..7ac7bef95043 --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampFlatBatchReader.java @@ -0,0 +1,257 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.LongArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.ColumnReader; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.Decoders.FlatDecoders; +import com.facebook.presto.parquet.batchreader.decoders.FlatDefinitionLevelDecoder; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.TimestampValuesDecoder; +import com.facebook.presto.parquet.batchreader.dictionary.Dictionaries; +import com.facebook.presto.parquet.dictionary.Dictionary; +import com.facebook.presto.parquet.reader.ColumnChunk; +import com.facebook.presto.parquet.reader.PageReader; +import com.facebook.presto.spi.PrestoException; +import org.apache.parquet.internal.filter2.columnindex.RowRanges; +import org.apache.parquet.io.ParquetDecodingException; +import org.openjdk.jol.info.ClassLayout; + +import java.io.IOException; +import java.util.Optional; + +import static com.facebook.presto.parquet.ParquetErrorCode.PARQUET_IO_READ_ERROR; +import static com.facebook.presto.parquet.batchreader.decoders.Decoders.readFlatPage; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class TimestampFlatBatchReader + implements ColumnReader +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(TimestampFlatBatchReader.class).instanceSize(); + + private final RichColumnDescriptor columnDescriptor; + + protected Field field; + protected int nextBatchSize; + protected FlatDefinitionLevelDecoder definitionLevelDecoder; + protected TimestampValuesDecoder valuesDecoder; + protected int remainingCountInPage; + + private Dictionary dictionary; + private int readOffset; + private PageReader pageReader; + + public TimestampFlatBatchReader(RichColumnDescriptor columnDescriptor) + { + this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor is null"); + } + + @Override + public boolean isInitialized() + { + return pageReader != null && field != null; + } + + @Override + public void init(PageReader pageReader, Field field, RowRanges rowRanges) + { + checkArgument(!isInitialized(), "Parquet batch reader already initialized"); + this.pageReader = requireNonNull(pageReader, "pageReader is null"); + checkArgument(pageReader.getValueCountInColumnChunk() > 0, "page is empty"); + this.field = requireNonNull(field, "field is null"); + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage); + } + } + + @Override + public void prepareNextRead(int batchSize) + { + readOffset = readOffset + nextBatchSize; + nextBatchSize = batchSize; + } + + @Override + public ColumnChunk readNext() + { + ColumnChunk columnChunk = null; + try { + seek(); + if (field.isRequired()) { + columnChunk = readWithoutNull(); + } + else { + columnChunk = readWithNull(); + } + } + catch (IOException exception) { + throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception); + } + + readOffset = 0; + nextBatchSize = 0; + return columnChunk; + } + + @Override + public long getRetainedSizeInBytes() + { + return INSTANCE_SIZE + + (definitionLevelDecoder == null ? 0 : definitionLevelDecoder.getRetainedSizeInBytes()) + + (valuesDecoder == null ? 0 : valuesDecoder.getRetainedSizeInBytes()) + + (dictionary == null ? 0 : dictionary.getRetainedSizeInBytes()) + + (pageReader == null ? 0 : pageReader.getRetainedSizeInBytes()); + } + + protected boolean readNextPage() + { + definitionLevelDecoder = null; + valuesDecoder = null; + remainingCountInPage = 0; + + DataPage page = pageReader.readPage(); + if (page == null) { + return false; + } + + FlatDecoders flatDecoders = readFlatPage(page, columnDescriptor, dictionary); + definitionLevelDecoder = flatDecoders.getDefinitionLevelDecoder(); + valuesDecoder = (TimestampValuesDecoder) flatDecoders.getValuesDecoder(); + + remainingCountInPage = page.getValueCount(); + return true; + } + + private ColumnChunk readWithNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + boolean[] isNull = new boolean[nextBatchSize]; + + int totalNonNullCount = 0; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + totalNonNullCount += nonNullCount; + + if (nonNullCount > 0) { + valuesDecoder.readNext(values, startOffset, nonNullCount); + + int valueDestinationIndex = startOffset + chunkSize - 1; + int valueSourceIndex = startOffset + nonNullCount - 1; + + while (valueDestinationIndex >= startOffset) { + if (!isNull[valueDestinationIndex]) { + values[valueDestinationIndex] = values[valueSourceIndex]; + valueSourceIndex--; + } + valueDestinationIndex--; + } + } + + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException("Still remaining to be read in current batch."); + } + + if (totalNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize); + return new ColumnChunk(block, new int[0], new int[0]); + } + + boolean hasNoNull = totalNonNullCount == nextBatchSize; + Block block = new LongArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private ColumnChunk readWithoutNull() + throws IOException + { + long[] values = new long[nextBatchSize]; + int remainingInBatch = nextBatchSize; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + + valuesDecoder.readNext(values, startOffset, chunkSize); + startOffset += chunkSize; + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + + if (remainingInBatch != 0) { + throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch)); + } + + Block block = new LongArrayBlock(nextBatchSize, Optional.empty(), values); + return new ColumnChunk(block, new int[0], new int[0]); + } + + private void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + + int remainingInBatch = readOffset; + int startOffset = 0; + while (remainingInBatch > 0) { + if (remainingCountInPage == 0) { + if (!readNextPage()) { + break; + } + } + + int chunkSize = Math.min(remainingCountInPage, remainingInBatch); + int skipSize = chunkSize; + if (!columnDescriptor.isRequired()) { + boolean[] isNull = new boolean[readOffset]; + int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize); + skipSize = nonNullCount; + startOffset += chunkSize; + } + valuesDecoder.skip(skipSize); + remainingInBatch -= chunkSize; + remainingCountInPage -= chunkSize; + } + } +} diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampNestedBatchReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampNestedBatchReader.java new file mode 100644 index 000000000000..e5c867a4d989 --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/TimestampNestedBatchReader.java @@ -0,0 +1,144 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet.batchreader; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.LongArrayBlock; +import com.facebook.presto.common.block.RunLengthEncodedBlock; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.TimestampValuesDecoder; +import com.facebook.presto.parquet.reader.ColumnChunk; + +import java.io.IOException; +import java.util.Optional; + +public class TimestampNestedBatchReader + extends AbstractNestedBatchReader +{ + public TimestampNestedBatchReader(RichColumnDescriptor columnDescriptor) + { + super(columnDescriptor); + } + + @Override + protected ColumnChunk readNestedWithNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + int batchNonNullCount = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int nonNullCount = 0; + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + nonNullCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + valueCount += (definitionLevels[i] >= maxDefinitionLevel - 1 ? 1 : 0); + } + batchNonNullCount += nonNullCount; + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(nonNullCount); + valuesDecoderContext.setValueCount(valueCount); + } + + if (batchNonNullCount == 0) { + Block block = RunLengthEncodedBlock.create(field.getType(), null, newBatchSize); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + long[] values = new long[newBatchSize]; + boolean[] isNull = new boolean[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((TimestampValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + + int valueDestinationIndex = offset + valuesDecoderContext.getValueCount() - 1; + int valueSourceIndex = offset + valuesDecoderContext.getNonNullCount() - 1; + int definitionLevelIndex = valuesDecoderContext.getEnd() - 1; + + while (valueDestinationIndex >= offset) { + if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel) { + values[valueDestinationIndex--] = values[valueSourceIndex--]; + } + else if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel - 1) { + values[valueDestinationIndex] = 0; + isNull[valueDestinationIndex] = true; + valueDestinationIndex--; + } + definitionLevelIndex--; + } + offset += valuesDecoderContext.getValueCount(); + } + + boolean hasNoNull = batchNonNullCount == newBatchSize; + Block block = new LongArrayBlock(newBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected ColumnChunk readNestedNoNull() + throws IOException + { + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + int newBatchSize = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + newBatchSize += valueCount; + valuesDecoderContext.setNonNullCount(valueCount); + valuesDecoderContext.setValueCount(valueCount); + } + + long[] values = new long[newBatchSize]; + int offset = 0; + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + ((TimestampValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount()); + offset += valuesDecoderContext.getValueCount(); + } + + Block block = new LongArrayBlock(newBatchSize, Optional.empty(), values); + return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels()); + } + + @Override + protected void seek() + throws IOException + { + if (readOffset == 0) { + return; + } + int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel(); + RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(readOffset); + DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length); + + int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels(); + for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) { + int valueCount = 0; + for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) { + valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0); + } + TimestampValuesDecoder intValuesDecoder = (TimestampValuesDecoder) valuesDecoderContext.getValuesDecoder(); + intValuesDecoder.skip(valueCount); + } + } +} From 832b071b3c74f65a6fe8386dc296b61f17192a7a Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Thu, 3 Oct 2024 21:21:49 -0700 Subject: [PATCH 53/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index bfb8ebe9282a..fef4915eff3e 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit bfb8ebe9282a2665ef8edecde95fdb8b13b67080 +Subproject commit fef4915eff3e9f7e970621a88c54f98c516f3e6f From 88c03ab6ffb3f10eba7caa3c2bab692106d0b599 Mon Sep 17 00:00:00 2001 From: abhibongale Date: Thu, 26 Sep 2024 19:34:15 +0100 Subject: [PATCH 54/86] Fix distinct operator for UUID Cherry-pick of https://github.com/trinodb/trino/pull/1180 by guyco33 Co-authored-by: Guy Cohen --- .../java/com/facebook/presto/type/UuidOperators.java | 2 +- .../presto/sql/query/TestDistinctAggregations.java | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/presto-main/src/main/java/com/facebook/presto/type/UuidOperators.java b/presto-main/src/main/java/com/facebook/presto/type/UuidOperators.java index e81d14bcc633..d7236a659ccf 100644 --- a/presto-main/src/main/java/com/facebook/presto/type/UuidOperators.java +++ b/presto-main/src/main/java/com/facebook/presto/type/UuidOperators.java @@ -199,7 +199,7 @@ public static boolean isDistinctFrom( if (left.isNull(leftPosition)) { return false; } - return left.compareTo(leftPosition, 0, UUID.getFixedSize(), right, rightPosition, 0, UUID.getFixedSize()) != 0; + return !UUID.equalTo(left, leftPosition, right, rightPosition); } } diff --git a/presto-main/src/test/java/com/facebook/presto/sql/query/TestDistinctAggregations.java b/presto-main/src/test/java/com/facebook/presto/sql/query/TestDistinctAggregations.java index eeda5789acfd..26c8955ea39c 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/query/TestDistinctAggregations.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/query/TestDistinctAggregations.java @@ -272,4 +272,14 @@ public void testMixedDistinctWithFilter() "FROM (VALUES (1, 3), (2, 4), (2, 4), (4, 5)) t (x, y)", "VALUES (BIGINT '0', CAST(NULL AS BIGINT))"); } + + @Test + public void testUuidDistinct() + { + assertions.assertQuery( + "SELECT DISTINCT uuid_col " + + "FROM (VALUES (UUID'be0b0518-35a1-4d10-b7f1-1b61355fa741')," + + " (UUID'be0b0518-35a1-4d10-b7f1-1b61355fa741')) AS t (uuid_col)", + "VALUES UUID'be0b0518-35a1-4d10-b7f1-1b61355fa741'"); + } } From 6f603558869153bd249836004e9c78af7d3fd5fd Mon Sep 17 00:00:00 2001 From: wangd Date: Sun, 29 Sep 2024 01:47:31 +0800 Subject: [PATCH 55/86] Fix the length of offsets array in parquet long decimal value decoders --- .../decoders/delta/BinaryLongDecimalDeltaValuesDecoder.java | 2 +- .../decoders/plain/BinaryLongDecimalPlainValuesDecoder.java | 2 +- .../decoders/rle/LongDecimalRLEDictionaryValuesDecoder.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/delta/BinaryLongDecimalDeltaValuesDecoder.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/delta/BinaryLongDecimalDeltaValuesDecoder.java index 676ca0fadceb..fb08cd047e0c 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/delta/BinaryLongDecimalDeltaValuesDecoder.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/delta/BinaryLongDecimalDeltaValuesDecoder.java @@ -49,7 +49,7 @@ public void readNext(long[] values, int offset, int length) BinaryValuesDecoder.ValueBuffer valueBuffer = delegate.readNext(length); int bufferSize = valueBuffer.getBufferSize(); byte[] byteBuffer = new byte[bufferSize]; - int[] offsets = new int[bufferSize + 1]; + int[] offsets = new int[length + 1]; delegate.readIntoBuffer(byteBuffer, 0, offsets, 0, valueBuffer); for (int i = 0; i < length; i++) { diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/plain/BinaryLongDecimalPlainValuesDecoder.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/plain/BinaryLongDecimalPlainValuesDecoder.java index 81f86f376c42..4af38a23018c 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/plain/BinaryLongDecimalPlainValuesDecoder.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/plain/BinaryLongDecimalPlainValuesDecoder.java @@ -45,7 +45,7 @@ public void readNext(long[] values, int offset, int length) PlainValueBuffer valueBuffer = (PlainValueBuffer) delegate.readNext(length); int bufferSize = valueBuffer.getBufferSize(); byte[] byteBuffer = new byte[bufferSize]; - int[] offsets = new int[bufferSize + 1]; + int[] offsets = new int[length + 1]; delegate.readIntoBuffer(byteBuffer, 0, offsets, 0, valueBuffer); for (int i = 0; i < length; i++) { diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/rle/LongDecimalRLEDictionaryValuesDecoder.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/rle/LongDecimalRLEDictionaryValuesDecoder.java index b130aa416719..05ced8bc8010 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/rle/LongDecimalRLEDictionaryValuesDecoder.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/rle/LongDecimalRLEDictionaryValuesDecoder.java @@ -46,7 +46,7 @@ public void readNext(long[] values, int offset, int length) ValueBuffer valueBuffer = delegate.readNext(length); int bufferSize = valueBuffer.getBufferSize(); byte[] byteBuffer = new byte[bufferSize]; - int[] offsets = new int[bufferSize + 1]; + int[] offsets = new int[length + 1]; delegate.readIntoBuffer(byteBuffer, 0, offsets, 0, valueBuffer); for (int i = 0; i < length; i++) { From 152e64dbebf2d8c9deedaae833d4ebfcaa4a531f Mon Sep 17 00:00:00 2001 From: jaystarshot Date: Fri, 4 Oct 2024 10:51:46 -0700 Subject: [PATCH 56/86] Turn on optimized_scale_writer_producer_buffer by default --- .../com/facebook/presto/sql/analyzer/FeaturesConfig.java | 2 +- .../facebook/presto/sql/analyzer/TestFeaturesConfig.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java index c88271e97dd5..1bb1ea8894bd 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java @@ -106,7 +106,7 @@ public class FeaturesConfig private boolean redistributeWrites = true; private boolean scaleWriters; private DataSize writerMinSize = new DataSize(32, MEGABYTE); - private boolean optimizedScaleWriterProducerBuffer; + private boolean optimizedScaleWriterProducerBuffer = true; private boolean optimizeMetadataQueries; private boolean optimizeMetadataQueriesIgnoreStats; private int optimizeMetadataQueriesCallThreshold = 100; diff --git a/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java b/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java index 01ddec6dee15..c78e4835c354 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java @@ -89,7 +89,7 @@ public void testDefaults() .setRedistributeWrites(true) .setScaleWriters(false) .setWriterMinSize(new DataSize(32, MEGABYTE)) - .setOptimizedScaleWriterProducerBuffer(false) + .setOptimizedScaleWriterProducerBuffer(true) .setOptimizeMetadataQueries(false) .setOptimizeMetadataQueriesIgnoreStats(false) .setOptimizeMetadataQueriesCallThreshold(100) @@ -306,7 +306,7 @@ public void testExplicitPropertyMappings() .put("redistribute-writes", "false") .put("scale-writers", "true") .put("writer-min-size", "42GB") - .put("optimized-scale-writer-producer-buffer", "true") + .put("optimized-scale-writer-producer-buffer", "false") .put("optimizer.optimize-metadata-queries", "true") .put("optimizer.optimize-metadata-queries-ignore-stats", "true") .put("optimizer.optimize-metadata-queries-call-threshold", "200") @@ -506,7 +506,7 @@ public void testExplicitPropertyMappings() .setRedistributeWrites(false) .setScaleWriters(true) .setWriterMinSize(new DataSize(42, GIGABYTE)) - .setOptimizedScaleWriterProducerBuffer(true) + .setOptimizedScaleWriterProducerBuffer(false) .setOptimizeMetadataQueries(true) .setOptimizeMetadataQueriesIgnoreStats(true) .setOptimizeMetadataQueriesCallThreshold(200) From 6d096e5ee93a99a6573b32c63aa7dabfff4186f0 Mon Sep 17 00:00:00 2001 From: Bikramjeet Vig Date: Fri, 4 Oct 2024 10:33:47 -0700 Subject: [PATCH 57/86] [native] Minor refactor of PeriodicMemoryChecker Minor refactor that allows the memory pushback mechanism in PeriodicMemoryChecker to be overloaded by its child class. --- .../presto_cpp/main/PeriodicMemoryChecker.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h index cd704d4b7254..239637b02069 100644 --- a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h +++ b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h @@ -97,6 +97,10 @@ class PeriodicMemoryChecker { /// Returns true if dump is successful. virtual void removeDumpFile(const std::string& filePath) const = 0; + /// Invoked by the periodic checker when 'Config::systemMemPushbackEnabled' + /// is true and system memory usage is above 'Config::systemMemLimitBytes'. + virtual void pushbackMemory(); + const Config config_; private: @@ -119,10 +123,6 @@ class PeriodicMemoryChecker { } }; - // Invoked by the periodic checker when 'Config::systemMemPushbackEnabled' - // is true and system memory usage is above 'Config::systemMemLimitBytes'. - void pushbackMemory(); - // Invoked by the periodic checker when 'Config::mallocMemHeapDumpEnabled' is // true. void maybeDumpHeap(); From 66f34de3fb919e84514544f45d47b43b677d90a3 Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Mon, 7 Oct 2024 10:19:21 -0700 Subject: [PATCH 58/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index fef4915eff3e..4f1140247e2f 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit fef4915eff3e9f7e970621a88c54f98c516f3e6f +Subproject commit 4f1140247e2f5059abd8f71b2ae54e3d1dcf1be2 From 11926329467d60f71c3c4747b9a40f83afad4ee8 Mon Sep 17 00:00:00 2001 From: pratyakshsharma Date: Fri, 27 Sep 2024 00:47:52 +0530 Subject: [PATCH 59/86] Read dataDirectory from system/env variables Refactor IcebergQueryRunner and HiveQueryRunner to read the data directory from system variables rather than program arguments. This is done to ensure these runners are in sync with HiveExternalWorkerQueryRunner. --- .../facebook/presto/hive/HiveQueryRunner.java | 36 ++++------------ .../facebook/presto/hive/HiveTestUtils.java | 41 +++++++++++++++++++ presto-iceberg/pom.xml | 8 +++- .../presto/iceberg/IcebergQueryRunner.java | 33 +++------------ 4 files changed, 61 insertions(+), 57 deletions(-) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java b/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java index a6e842b29b62..1ceece114b01 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java @@ -60,6 +60,7 @@ import static com.facebook.presto.SystemSessionProperties.GROUPED_EXECUTION; import static com.facebook.presto.SystemSessionProperties.HASH_PARTITION_COUNT; import static com.facebook.presto.SystemSessionProperties.PARTITIONING_PROVIDER_CATALOG; +import static com.facebook.presto.hive.HiveTestUtils.getDataDirectoryPath; import static com.facebook.presto.spi.security.SelectedRole.Type.ROLE; import static com.facebook.presto.testing.TestingSession.testSessionBuilder; import static com.facebook.presto.tests.QueryAssertions.copyTables; @@ -441,42 +442,19 @@ public static void main(String[] args) throws Exception { // You need to add "--user user" to your CLI for your queries to work - Logging.initialize(); - - Optional dataDirectory = Optional.empty(); + setupLogging(); + Optional dataDirectory; if (args.length > 0) { if (args.length != 1) { log.error("usage: HiveQueryRunner [dataDirectory]\n"); log.error(" [dataDirectory] is a local directory under which you want the hive_data directory to be created.]\n"); System.exit(1); } - - File dataDirectoryFile = new File(args[0]); - if (dataDirectoryFile.exists()) { - if (!dataDirectoryFile.isDirectory()) { - log.error("Error: " + dataDirectoryFile.getAbsolutePath() + " is not a directory."); - System.exit(1); - } - else if (!dataDirectoryFile.canRead() || !dataDirectoryFile.canWrite()) { - log.error("Error: " + dataDirectoryFile.getAbsolutePath() + " is not readable/writable."); - System.exit(1); - } - } - else { - // For user supplied path like [path_exists_but_is_not_readable_or_writable]/[paths_do_not_exist], the hadoop file system won't - // be able to create directory for it. e.g. "/aaa/bbb" is not creatable because path "/" is not writable. - while (!dataDirectoryFile.exists()) { - dataDirectoryFile = dataDirectoryFile.getParentFile(); - } - if (!dataDirectoryFile.canRead() || !dataDirectoryFile.canWrite()) { - log.error("Error: The ancestor directory " + dataDirectoryFile.getAbsolutePath() + " is not readable/writable."); - System.exit(1); - } - } - - dataDirectory = Optional.of(dataDirectoryFile.toPath()); + dataDirectory = getDataDirectoryPath(Optional.of(args[0])); + } + else { + dataDirectory = getDataDirectoryPath(Optional.empty()); } - DistributedQueryRunner queryRunner = createQueryRunner(TpchTable.getTables(), getAllTpcdsTableNames(), ImmutableMap.of("http-server.http.port", "8080"), dataDirectory); try { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java b/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java index e20dbcaa0675..a6fdaa9f7367 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java @@ -15,6 +15,7 @@ import com.facebook.airlift.json.JsonCodec; import com.facebook.airlift.json.smile.SmileCodec; +import com.facebook.airlift.log.Logger; import com.facebook.presto.PagesIndexPageSorter; import com.facebook.presto.cache.CacheConfig; import com.facebook.presto.common.block.BlockEncodingManager; @@ -79,7 +80,9 @@ import com.google.common.collect.ImmutableSet; import io.airlift.slice.Slice; +import java.io.File; import java.math.BigDecimal; +import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -94,6 +97,8 @@ public final class HiveTestUtils { + private static final Logger log = Logger.get(HiveTestUtils.class); + private HiveTestUtils() { } @@ -312,6 +317,42 @@ public static Optional getProperty(String name) } } + public static Optional getDataDirectoryPath(Optional suppliedDataDirectoryPath) + { + Optional dataDirectory = Optional.empty(); + if (!suppliedDataDirectoryPath.isPresent()) { + //in case the path is not supplied as program argument, read it from env variable. + suppliedDataDirectoryPath = getProperty("DATA_DIR"); + } + if (suppliedDataDirectoryPath.isPresent()) { + File dataDirectoryFile = new File(suppliedDataDirectoryPath.get()); + if (dataDirectoryFile.exists()) { + if (!dataDirectoryFile.isDirectory()) { + log.error("Error: " + dataDirectoryFile.getAbsolutePath() + " is not a directory."); + System.exit(1); + } + else if (!dataDirectoryFile.canRead() || !dataDirectoryFile.canWrite()) { + log.error("Error: " + dataDirectoryFile.getAbsolutePath() + " is not readable/writable."); + System.exit(1); + } + } + else { + // For user supplied path like [path_exists_but_is_not_readable_or_writable]/[paths_do_not_exist], the hadoop file system won't + // be able to create directory for it. e.g. "/aaa/bbb" is not creatable because path "/" is not writable. + while (!dataDirectoryFile.exists()) { + dataDirectoryFile = dataDirectoryFile.getParentFile(); + } + if (!dataDirectoryFile.canRead() || !dataDirectoryFile.canWrite()) { + log.error("Error: The ancestor directory " + dataDirectoryFile.getAbsolutePath() + " is not readable/writable."); + System.exit(1); + } + } + + dataDirectory = Optional.of(dataDirectoryFile.toPath()); + } + return dataDirectory; + } + public static List> getAllSessionProperties(HiveClientConfig hiveClientConfig, HiveCommonClientConfig hiveCommonClientConfig) { return getAllSessionProperties(hiveClientConfig, new ParquetFileWriterConfig(), hiveCommonClientConfig); diff --git a/presto-iceberg/pom.xml b/presto-iceberg/pom.xml index 6071bab59cbd..783d9d7a50bd 100644 --- a/presto-iceberg/pom.xml +++ b/presto-iceberg/pom.xml @@ -92,7 +92,6 @@ animal-sniffer-annotations - compile @@ -538,6 +537,13 @@ test + + com.facebook.presto + presto-hive + test-jar + test + + com.facebook.presto presto-tests diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java index e1163b801c5a..f5621e76fd5c 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java @@ -38,7 +38,6 @@ import com.google.common.collect.ImmutableSet; import io.airlift.tpch.TpchTable; -import java.io.File; import java.io.IOException; import java.net.URI; import java.nio.file.Files; @@ -51,6 +50,7 @@ import static com.facebook.airlift.log.Level.ERROR; import static com.facebook.airlift.log.Level.WARN; +import static com.facebook.presto.hive.HiveTestUtils.getDataDirectoryPath; import static com.facebook.presto.iceberg.CatalogType.HIVE; import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; @@ -282,38 +282,17 @@ public static void main(String[] args) throws Exception { setupLogging(); - Optional dataDirectory = Optional.empty(); + Optional dataDirectory; if (args.length > 0) { if (args.length != 1) { log.error("usage: IcebergQueryRunner [dataDirectory]\n"); log.error(" [dataDirectory] is a local directory under which you want the iceberg_data directory to be created.]\n"); System.exit(1); } - - File dataDirectoryFile = new File(args[0]); - if (dataDirectoryFile.exists()) { - if (!dataDirectoryFile.isDirectory()) { - log.error("Error: " + dataDirectoryFile.getAbsolutePath() + " is not a directory."); - System.exit(1); - } - else if (!dataDirectoryFile.canRead() || !dataDirectoryFile.canWrite()) { - log.error("Error: " + dataDirectoryFile.getAbsolutePath() + " is not readable/writable."); - System.exit(1); - } - } - else { - // For user supplied path like [path_exists_but_is_not_readable_or_writable]/[paths_do_not_exist], the hadoop file system won't - // be able to create directory for it. e.g. "/aaa/bbb" is not creatable because path "/" is not writable. - while (!dataDirectoryFile.exists()) { - dataDirectoryFile = dataDirectoryFile.getParentFile(); - } - if (!dataDirectoryFile.canRead() || !dataDirectoryFile.canWrite()) { - log.error("Error: The ancestor directory " + dataDirectoryFile.getAbsolutePath() + " is not readable/writable."); - System.exit(1); - } - } - - dataDirectory = Optional.of(dataDirectoryFile.toPath()); + dataDirectory = getDataDirectoryPath(Optional.of(args[0])); + } + else { + dataDirectory = getDataDirectoryPath(Optional.empty()); } Map properties = ImmutableMap.of("http-server.http.port", "8080"); From 64be0e3188921ed5e5dc110f6d4bceaec352bb12 Mon Sep 17 00:00:00 2001 From: Jialiang Tan Date: Mon, 7 Oct 2024 16:10:02 -0700 Subject: [PATCH 60/86] Add trace session properties to presto native --- .../presto/SystemSessionProperties.java | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java index 8bd8a3351efd..1b923594a652 100644 --- a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -356,10 +356,15 @@ public final class SystemSessionProperties public static final String NATIVE_DEBUG_DISABLE_EXPRESSION_WITH_MEMOIZATION = "native_debug_disable_expression_with_memoization"; public static final String NATIVE_DEBUG_DISABLE_EXPRESSION_WITH_LAZY_INPUTS = "native_debug_disable_expression_with_lazy_inputs"; public static final String NATIVE_SELECTIVE_NIMBLE_READER_ENABLED = "native_selective_nimble_reader_enabled"; - public static final String NATIVE_MAX_PARTIAL_AGGREGATION_MEMORY = "native_max_partial_aggregation_memory"; public static final String NATIVE_MAX_EXTENDED_PARTIAL_AGGREGATION_MEMORY = "native_max_extended_partial_aggregation_memory"; public static final String NATIVE_MAX_SPILL_BYTES = "native_max_spill_bytes"; + public static final String NATIVE_QUERY_TRACE_ENABLED = "native_query_trace_enabled"; + public static final String NATIVE_QUERY_TRACE_DIR = "native_query_trace_dir"; + public static final String NATIVE_QUERY_TRACE_NODE_IDS = "native_query_trace_node_ids"; + public static final String NATIVE_QUERY_TRACE_MAX_BYTES = "native_query_trace_max_bytes"; + public static final String NATIVE_QUERY_TRACE_REG_EXP = "native_query_trace_task_reg_exp"; + public static final String DEFAULT_VIEW_SECURITY_MODE = "default_view_security_mode"; public static final String JOIN_PREFILTER_BUILD_SIDE = "join_prefilter_build_side"; public static final String OPTIMIZER_USE_HISTOGRAMS = "optimizer_use_histograms"; @@ -1773,6 +1778,26 @@ public SystemSessionProperties( "The max allowed spill bytes", 100L << 30, false), + booleanProperty(NATIVE_QUERY_TRACE_ENABLED, + "Enables query tracing.", + false, + false), + stringProperty(NATIVE_QUERY_TRACE_DIR, + "Base dir of a query to store tracing data.", + "", + false), + stringProperty(NATIVE_QUERY_TRACE_NODE_IDS, + "A comma-separated list of plan node ids whose input data will be traced. Empty string if only want to trace the query metadata.", + "", + false), + longProperty(NATIVE_QUERY_TRACE_MAX_BYTES, + "The max trace bytes limit. Tracing is disabled if zero.", + 0L, + false), + stringProperty(NATIVE_QUERY_TRACE_REG_EXP, + "The regexp of traced task id. We only enable trace on a task if its id matches.", + "", + false), booleanProperty( RANDOMIZE_OUTER_JOIN_NULL_KEY, "(Deprecated) Randomize null join key for outer join", From 96c58c916f344df810e2fc783a1c8ef4d2bcbe5f Mon Sep 17 00:00:00 2001 From: Zac Blanco Date: Mon, 7 Oct 2024 10:54:53 -0700 Subject: [PATCH 61/86] [CLI] Add flag to disable certificate verification Cherry-pick of trinodb/trino@1c5b9215a2d1b04b5f84e2b3e86 with minor modifications for Presto Co-authored-by: Lewuathe --- .../facebook/presto/cli/ClientOptions.java | 4 + .../java/com/facebook/presto/cli/Console.java | 1 + .../com/facebook/presto/cli/QueryRunner.java | 9 +- .../facebook/presto/cli/AbstractCliTest.java | 15 ++- .../presto/cli/TestInsecureQueryRunner.java | 102 ++++++++++++++++++ .../src/test/resources/insecure-ssl-test.jks | Bin 0 -> 2253 bytes .../facebook/presto/client/OkHttpUtil.java | 36 +++++++ .../src/main/sphinx/clients/presto-cli.rst | 46 ++++++-- 8 files changed, 199 insertions(+), 14 deletions(-) create mode 100644 presto-cli/src/test/java/com/facebook/presto/cli/TestInsecureQueryRunner.java create mode 100644 presto-cli/src/test/resources/insecure-ssl-test.jks diff --git a/presto-cli/src/main/java/com/facebook/presto/cli/ClientOptions.java b/presto-cli/src/main/java/com/facebook/presto/cli/ClientOptions.java index b9dc8a8ceff6..6a05483e1e51 100644 --- a/presto-cli/src/main/java/com/facebook/presto/cli/ClientOptions.java +++ b/presto-cli/src/main/java/com/facebook/presto/cli/ClientOptions.java @@ -86,9 +86,13 @@ public class ClientOptions @Option(name = "--truststore-type", title = "truststore type", description = "Truststore type") public String trustStoreType = KeyStore.getDefaultType(); + @Option(name = "--access-token", title = "access token", description = "Access token") public String accessToken; + @Option(name = "--insecure", title = "trust all certificates", description = "Skip validation of HTTP server certificates (should only be used for debugging)") + public boolean insecure; + @Option(name = "--user", title = "user", description = "Username") public String user = System.getProperty("user.name"); diff --git a/presto-cli/src/main/java/com/facebook/presto/cli/Console.java b/presto-cli/src/main/java/com/facebook/presto/cli/Console.java index 0ec67846ae20..0deff11d0a1f 100644 --- a/presto-cli/src/main/java/com/facebook/presto/cli/Console.java +++ b/presto-cli/src/main/java/com/facebook/presto/cli/Console.java @@ -134,6 +134,7 @@ public boolean run() Optional.ofNullable(clientOptions.truststorePassword), Optional.ofNullable(clientOptions.trustStoreType), Optional.ofNullable(clientOptions.accessToken), + clientOptions.insecure, Optional.ofNullable(clientOptions.user), clientOptions.password ? Optional.of(getPassword()) : Optional.empty(), Optional.ofNullable(clientOptions.krb5Principal), diff --git a/presto-cli/src/main/java/com/facebook/presto/cli/QueryRunner.java b/presto-cli/src/main/java/com/facebook/presto/cli/QueryRunner.java index 4823ce5ccb2c..1ecffacc29bc 100644 --- a/presto-cli/src/main/java/com/facebook/presto/cli/QueryRunner.java +++ b/presto-cli/src/main/java/com/facebook/presto/cli/QueryRunner.java @@ -14,6 +14,7 @@ package com.facebook.presto.cli; import com.facebook.presto.client.ClientSession; +import com.facebook.presto.client.OkHttpUtil; import com.facebook.presto.client.StatementClient; import com.google.common.net.HostAndPort; import okhttp3.OkHttpClient; @@ -63,6 +64,7 @@ public QueryRunner( Optional truststorePassword, Optional trustStoreType, Optional accessToken, + boolean insecureSsl, Optional user, Optional password, Optional kerberosPrincipal, @@ -77,7 +79,12 @@ public QueryRunner( this.debug = debug; this.runtime = runtime; - this.sslSetup = builder -> setupSsl(builder, keystorePath, keystorePassword, keyStoreType, truststorePath, truststorePassword, trustStoreType); + if (insecureSsl) { + this.sslSetup = OkHttpUtil::setupInsecureSsl; + } + else { + this.sslSetup = builder -> setupSsl(builder, keystorePath, keystorePassword, keyStoreType, truststorePath, truststorePassword, trustStoreType); + } OkHttpClient.Builder builder = new OkHttpClient.Builder(); diff --git a/presto-cli/src/test/java/com/facebook/presto/cli/AbstractCliTest.java b/presto-cli/src/test/java/com/facebook/presto/cli/AbstractCliTest.java index c34f2af6f6cc..eb2267e1bd79 100644 --- a/presto-cli/src/test/java/com/facebook/presto/cli/AbstractCliTest.java +++ b/presto-cli/src/test/java/com/facebook/presto/cli/AbstractCliTest.java @@ -114,14 +114,19 @@ protected MockResponse createMockResponse() protected void executeQueries(List queries) { - Console console = new Console(); QueryRunner queryRunner = createQueryRunner(createMockClientSession()); + executeQueries(queryRunner, queries); + } + + protected void executeQueries(QueryRunner queryRunner, List queries) + { + Console console = new Console(); for (String query : queries) { console.executeCommand(queryRunner, query, CSV, false); } } - protected static QueryRunner createQueryRunner(ClientSession clientSession) + protected static QueryRunner createQueryRunner(ClientSession clientSession, boolean insecureSsl) { return new QueryRunner( clientSession, @@ -136,6 +141,7 @@ protected static QueryRunner createQueryRunner(ClientSession clientSession) Optional.empty(), Optional.empty(), Optional.empty(), + insecureSsl, Optional.empty(), Optional.empty(), Optional.empty(), @@ -147,6 +153,11 @@ protected static QueryRunner createQueryRunner(ClientSession clientSession) true); } + protected static QueryRunner createQueryRunner(ClientSession clientSession) + { + return createQueryRunner(clientSession, false); + } + protected static void assertHeaders(String headerName, Headers headers, Set expectedSessionHeaderValues) { assertEquals(ImmutableSet.copyOf(headers.values(headerName)), expectedSessionHeaderValues); diff --git a/presto-cli/src/test/java/com/facebook/presto/cli/TestInsecureQueryRunner.java b/presto-cli/src/test/java/com/facebook/presto/cli/TestInsecureQueryRunner.java new file mode 100644 index 000000000000..b86ac24c937f --- /dev/null +++ b/presto-cli/src/test/java/com/facebook/presto/cli/TestInsecureQueryRunner.java @@ -0,0 +1,102 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.cli; + +import com.google.common.collect.ImmutableList; +import okhttp3.mockwebserver.MockWebServer; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import javax.net.ssl.KeyManagerFactory; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManagerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.security.KeyManagementException; +import java.security.KeyStore; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.security.UnrecoverableKeyException; +import java.security.cert.CertificateException; + +import static com.google.common.io.Resources.getResource; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.testng.Assert.assertEquals; + +@Test(singleThreaded = true) +public class TestInsecureQueryRunner + extends AbstractCliTest +{ + @Override + @BeforeMethod + public void setup() + throws IOException + { + server = new MockWebServer(); + SSLContext sslContext = buildTestSslContext(); + server.useHttps(sslContext.getSocketFactory(), false); + server.start(); + } + + @Override + @AfterMethod(alwaysRun = true) + public void teardown() + throws IOException + { + server.close(); + } + + @Test + public void testInsecureConnection() + { + server.enqueue(createMockResponse()); + server.enqueue(createMockResponse()); + executeQueries(createQueryRunner(createMockClientSession(), true), + ImmutableList.of("query with insecure mode;")); + try { + assertEquals(server.takeRequest(1, SECONDS).getPath(), "/v1/statement"); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + + private SSLContext buildTestSslContext() + throws IOException + { + try { + // Load self-signed certificate + char[] serverKeyStorePassword = "insecure-ssl-test".toCharArray(); + KeyStore serverKeyStore = KeyStore.getInstance(KeyStore.getDefaultType()); + try (InputStream in = getResource(getClass(), "/insecure-ssl-test.jks").openStream()) { + serverKeyStore.load(in, serverKeyStorePassword); + } + String kmfAlgorithm = KeyManagerFactory.getDefaultAlgorithm(); + KeyManagerFactory kmf = KeyManagerFactory.getInstance(kmfAlgorithm); + kmf.init(serverKeyStore, serverKeyStorePassword); + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance(kmfAlgorithm); + trustManagerFactory.init(serverKeyStore); + SSLContext sslContext = SSLContext.getInstance("SSL"); + sslContext.init(kmf.getKeyManagers(), trustManagerFactory.getTrustManagers(), new SecureRandom()); + return sslContext; + } + catch (KeyStoreException | NoSuchAlgorithmException | CertificateException | UnrecoverableKeyException | KeyManagementException e) { + throw new IOException("failed to initialize SSL context", e); + } + } +} diff --git a/presto-cli/src/test/resources/insecure-ssl-test.jks b/presto-cli/src/test/resources/insecure-ssl-test.jks new file mode 100644 index 0000000000000000000000000000000000000000..21d80eda222bf31fba97d00300024344f1c03bbf GIT binary patch literal 2253 zcmchYX*3iH8^>qH49(aZON8vh*ai(IMnnrCHIZFO#h4lUu40O736X6^WGrI~qf2rl zTUkPO#+vM8DauZ-bI<#}=bqE|`{DoL`8{8r^FPn|{|{FVR{#J2=t#i7#p_D)zH{5x z>yDDQH$lngj;KqQD42!sH@WblH3rU<-h7ilPF zRK4C;F&S)2&+jtsX=&ECmF98QR$yAjqjlA z(Rbw`F|>!;=>Vn?6rU{_KDuTQZYOS`i{IKYjUeh|wD^#O@Ip>sY?69RwN~dJK@c}KDZQE1K!AV?sSys(D%Wd9l@SF2b}P+43!0kl^`z^|H#iPN&^ za9PvCW{N|&I`$cJWwh*a6Kj~!3}CH5TTghW``6>}x$7O-uHnNPlD;>a!FidNBa1ZI zpz6=3tkO+}O9!m^Zo`K@m**kYgq(OcmT(A%0Fm_84U%#*4e7F)8D_;VK6fK z0Pk$JHY3-Xw1b-op$j{123=3n_C`kR($|MxBRpP*6^jcdEk5Ng6Fi^CTE9#iC~NPC zn5lP;CE%$_c8-xW4COX;rlRu=hr3xdsw8NEGs%n)o3?ikh}r6atB9Dz@$z>}@O-@VWiG!RbXwt+ zhJEx&xoTSC49BPMbc;|9l3Ec9y*{9rh(CB%!WeIx+tH0HR&v@g<=+vP?3FP75Svo` zb}VH_@f#?1-du%14{5y~YMHjLeUbB5avhRTRgH+7*snR^5+yn#P#@Lua2=s0d5*)r zTNjJTpuYEGvYF$Q@otaR>M}%s)?@xtu+~thxl#)1$JC1gmmn)$#iVtq z;Xf{@2j{{K_pOD+RuL><1LI z-_$4AU*BkBoy(2^-A=sD_mdWV_6yHmkV3qB?a=rF=mDID)f?T} zz%ff_7IGV0OxdI~;b7-UDB|FB{Ip3v|NW@!Wp4GnnortYH)xx0CDs+~%>t8WvP{pI zYfA8q9rh=mmU?v8Lo^$Truu5xdt_<2K2Vi26QL?mLZ5?19Gc}HSIOd~i^5a96<s9s~+vEz?2cNbyqFyZCOi@mq%4~+8J+IKshwagC!jZ}Pauj~J+X@w6sF|NBw zU$c%8a}*kp=G}dnX}4C^ahH1OO|Kha;cBB4^x?OS=|?3*E`Irm0EdTutGvDsQ?C?A zd9l(-_1r^ER~Olzr<&X^Lp2c^gfUAm7-;WlzrJ!BS7Yv;Hk-C}b!VrSfGG4}_9G72 zWfAY0SB(phr-`WxBE2jWw8*O1R;wVJhN_Zg{1s+3Fbx(Ro9c&@Z+nZ@eSCX7iIMu& zy!x=3(r?*LahJdLz9EAckhiA{uUhRn5hs$OUya92IG|n1F1Q6nK(11`Wx^~BL^^Ea zENa|T8_z5#?OyoKq>se+N)Rd<;^H~r9X_QW2ahFYQ0%Tg8Q_z!t4r6pmfw!ld%@Ab zp{(pktI#pAOh(p{&PV+C72P>F#cNR|s;AAv6Sf~K1D*EoIExuDySJ-yXc;gwnPT?2 z@|@WA>Ytzc?Cxe!u)CiMPgTe(^UP=sYqTiR#zdswIxPzko%}afG`jl2n57*tkEkiMYnXvs25aD1W{PIAhoS>(*c(I|LGgW_J4_iO$ z%cpj}f0F>}Hk_M>C8y>v4HgRoNbfWqSF(s?y8xt07)5dWmZ(-msU*?qP1$z`si5~+9x9`O8c8Qn+4j>mEh;?xx+)PbEn4nb=TRApU0Q=u=kd^-^6uIG~Z@2 zq9$ne9AK|_=4wOBna>V&AQm71m?MEYg%Umb5K%6$0GO}-+&(8%|uGnWYbs6VB9{>OV literal 0 HcmV?d00001 diff --git a/presto-client/src/main/java/com/facebook/presto/client/OkHttpUtil.java b/presto-client/src/main/java/com/facebook/presto/client/OkHttpUtil.java index 20026f984aa1..78175f158dbb 100644 --- a/presto-client/src/main/java/com/facebook/presto/client/OkHttpUtil.java +++ b/presto-client/src/main/java/com/facebook/presto/client/OkHttpUtil.java @@ -41,6 +41,7 @@ import java.net.Proxy; import java.security.GeneralSecurityException; import java.security.KeyStore; +import java.security.SecureRandom; import java.security.cert.Certificate; import java.security.cert.CertificateExpiredException; import java.security.cert.CertificateNotYetValidException; @@ -138,6 +139,41 @@ private static InetSocketAddress toUnresolvedAddress(HostAndPort address) return InetSocketAddress.createUnresolved(address.getHost(), address.getPort()); } + public static void setupInsecureSsl(OkHttpClient.Builder clientBuilder) + { + try { + X509TrustManager trustAllCerts = new X509TrustManager() + { + @Override + public void checkClientTrusted(X509Certificate[] chain, String authType) + { + throw new UnsupportedOperationException("checkClientTrusted should not be called"); + } + + @Override + public void checkServerTrusted(X509Certificate[] chain, String authType) + { + // skip validation of server certificate + } + + @Override + public X509Certificate[] getAcceptedIssuers() + { + return new X509Certificate[0]; + } + }; + + SSLContext sslContext = SSLContext.getInstance("SSL"); + sslContext.init(null, new TrustManager[] {trustAllCerts}, new SecureRandom()); + + clientBuilder.sslSocketFactory(sslContext.getSocketFactory(), trustAllCerts); + clientBuilder.hostnameVerifier((hostname, session) -> true); + } + catch (GeneralSecurityException e) { + throw new ClientException("Error setting up SSL: " + e.getMessage(), e); + } + } + public static void setupSsl( OkHttpClient.Builder clientBuilder, Optional keyStorePath, diff --git a/presto-docs/src/main/sphinx/clients/presto-cli.rst b/presto-docs/src/main/sphinx/clients/presto-cli.rst index dce94ed5522e..54098c62fb25 100644 --- a/presto-docs/src/main/sphinx/clients/presto-cli.rst +++ b/presto-docs/src/main/sphinx/clients/presto-cli.rst @@ -72,16 +72,19 @@ Run the CLI with the ``--help`` option to see the online help. ./presto --help +NAME + presto - Presto interactive console + SYNOPSIS presto [--access-token ] [--catalog ] [--client-info ] [--client-request-timeout ] [--client-tags ] [--debug] [--disable-compression] - [--execute ] [--extra-credential ...] - [(-f | --file )] [(-h | --help)] - [--http-proxy ] [--ignore-errors] - [--keystore-password ] - [--keystore-path ] + [--disable-redirects] [--execute ] + [--extra-credential ...] [(-f | --file )] + [(-h | --help)] [--http-proxy ] [--ignore-errors] + [--insecure] [--keystore-password ] + [--keystore-path ] [--keystore-type ] [--krb5-config-path ] [--krb5-credential-cache-path ] [--krb5-disable-remote-service-hostname-canonicalization] @@ -90,10 +93,12 @@ SYNOPSIS [--krb5-remote-service-name ] [--log-levels-file ] [--output-format ] [--password] [--resource-estimate ...] - [--schema ] [--server ] [--session ...] - [--socks-proxy ] [--source ] - [--truststore-password ] - [--truststore-path ] [--user ] [--version] + [--runtime-stats] [--schema ] [--server ] + [--session ...] [--socks-proxy ] + [--source ] [--truststore-password ] + [--truststore-path ] + [--truststore-type ] [--user ] + [--validate-nexturi-source] [--version] OPTIONS --access-token @@ -117,6 +122,9 @@ OPTIONS --disable-compression Disable compression of query results + --disable-redirects + Disable client following redirects from server + --execute Execute specified statements and exit @@ -137,12 +145,19 @@ OPTIONS Continue processing in batch mode when an error occurs (default is to exit immediately) + --insecure + Skip validation of HTTP server certificates (should only be used for + debugging) + --keystore-password Keystore password --keystore-path Keystore path + --keystore-type + Keystore type + --krb5-config-path Kerberos config file path (default: /etc/krb5.conf) @@ -166,7 +181,7 @@ OPTIONS Configure log levels for debugging using this file --output-format - Output format for batch mode [ALIGNED, VERTICAL, CSV, TSV, + Output format for batch mode [ALIGNED, VERTICAL, JSON, CSV, TSV, CSV_HEADER, TSV_HEADER, NULL] (default: CSV) --password @@ -177,7 +192,8 @@ OPTIONS key=value) --runtime-stats - Enable runtime stats information. Flag must be used in conjunction with the --debug flag + Enable runtime stats information. Flag must be used in conjunction + with the --debug flag --schema Default schema @@ -201,8 +217,16 @@ OPTIONS --truststore-path Truststore path + --truststore-type + Truststore type + --user Username + --validate-nexturi-source + Validate nextUri server host and port does not change during query + execution + --version Display version information and exit + From f9f884dc749b0cee899685a9b93193360ea797ee Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Sat, 5 Oct 2024 16:43:52 -0400 Subject: [PATCH 62/86] [native] Add support for DEPENDENCY_DIR, INSTALL_PREFIX, PYTHON_VENV --- .gitignore | 2 ++ presto-native-execution/CMakeLists.txt | 12 ++++++++++++ presto-native-execution/Makefile | 17 +++++++++++++++++ presto-native-execution/README.md | 19 ++++++++++++++----- .../scripts/setup-macos.sh | 11 ++++++++++- 5 files changed, 55 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index b3f153236a8d..dc9c1042f96f 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,8 @@ cmake-build-*/ *.swp *~ a.out +presto-native-execution/deps-download +presto-native-execution/deps-install # Compiled Object files *.slo diff --git a/presto-native-execution/CMakeLists.txt b/presto-native-execution/CMakeLists.txt index 78b1d371e536..d5001dde70a7 100644 --- a/presto-native-execution/CMakeLists.txt +++ b/presto-native-execution/CMakeLists.txt @@ -121,6 +121,11 @@ set(VELOX_BUILD_TEST_UTILS set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +if(DEFINED ENV{INSTALL_PREFIX}) + message(STATUS "Dependency install directory set to: $ENV{INSTALL_PREFIX}") + list(APPEND CMAKE_PREFIX_PATH "$ENV{INSTALL_PREFIX}") +endif() + set(Boost_USE_MULTITHREADED TRUE) find_package( Boost @@ -228,4 +233,11 @@ if("${TREAT_WARNINGS_AS_ERRORS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") endif() +if(DEFINED ENV{INSTALL_PREFIX}) + # Allow installed package headers to be picked up before brew/system package + # headers. We set this after Velox since Velox handles INSTALL_PREFIX its own + # way. + include_directories(BEFORE "$ENV{INSTALL_PREFIX}/include") +endif() + add_subdirectory(presto_cpp) diff --git a/presto-native-execution/Makefile b/presto-native-execution/Makefile index ecc5f05f95bc..6a84fa7f68a6 100644 --- a/presto-native-execution/Makefile +++ b/presto-native-execution/Makefile @@ -20,6 +20,7 @@ NUM_THREADS ?= $(shell getconf _NPROCESSORS_CONF 2>/dev/null || echo 1) CPU_TARGET ?= "avx" CMAKE_PREFIX_PATH ?= "/usr/local" PRESTOCPP_ROOT_DIR="$(shell pwd)" +PYTHON_VENV ?= .venv EXTRA_CMAKE_FLAGS ?= "" @@ -102,17 +103,33 @@ TypeSignature: #: Build the Presto TypeSignature parser cd presto_cpp/main/types; $(MAKE) TypeSignature format-fix: #: Fix formatting issues in the presto-native-execution directory +ifneq ("$(wildcard ${PYTHON_VENV}/pyvenv.cfg)","") + source ${PYTHON_VENV}/bin/activate; scripts/check.py format master --fix +else scripts/check.py format master --fix +endif format-check: #: Check for formatting issues in the presto-native-execution directory clang-format --version +ifneq ("$(wildcard ${PYTHON_VENV}/pyvenv.cfg)","") + source ${PYTHON_VENV}/bin/activate; scripts/check.py format master +else scripts/check.py format master +endif header-fix: #: Fix license header issues in the presto-native-execution directory +ifneq ("$(wildcard ${PYTHON_VENV}/pyvenv.cfg)","") + source ${PYTHON_VENV}/bin/activate; scripts/check.py header master --fix +else scripts/check.py header master --fix +endif header-check: #: Check for license header issues in the presto-native-execution directory +ifneq ("$(wildcard ${PYTHON_VENV}/pyvenv.cfg)","") + source ${PYTHON_VENV}/bin/activate; scripts/check.py header master +else scripts/check.py header master +endif help: #: Show the help messages @cat $(firstword $(MAKEFILE_LIST)) | \ diff --git a/presto-native-execution/README.md b/presto-native-execution/README.md index f18f88925d79..274f8c132ab9 100644 --- a/presto-native-execution/README.md +++ b/presto-native-execution/README.md @@ -26,10 +26,19 @@ available inside `presto/presto-native-execution/scripts`. * CentOS Stream 9: `setup-centos.sh` * Ubuntu: `setup-ubuntu.sh` -Create a directory say `dependencies` and invoke one of these scripts from -this folder. All the dependencies are installed in the system default location eg: `/usr/local`. -To change the installation location specify a path using the `INSTALL_PREFIX` environment variable. -For example, change the location if the default location cannot be written to by the user running the setup script. +The above setup scripts use the `DEPENDENCY_DIR` environment variable to set the +location to download and build packages. This defaults to `deps-download` in the current +working directory. + +Use `INSTALL_PREFIX` to set the install directory of the packages. This defaults to +`deps-install` in the current working directory on macOS and to the default install +location (for example, `/usr/local`) on linux. +Using the default install location `/usr/local` on macOS is discouraged because this +location is used by certain Homebrew versions. + +Manually add the `INSTALL_PREFIX` value in the IDE or bash environment, so subsequent +Prestissimo builds can use the installed packages. Say +`export INSTALL_PREFIX=/Users/$USERNAME/presto/presto-native-execution/deps-install` to `~/.zshrc`. The following libraries are installed by the above setup scripts. The Velox library installs other @@ -70,7 +79,7 @@ Compilers (and versions) not mentioned are known to not work or have not been tr | MacOS | `clang14` | ### Build Prestissimo -#### Parquet and S3 Supprt +#### Parquet and S3 Support To enable Parquet and S3 support, set `PRESTO_ENABLE_PARQUET = "ON"`, `PRESTO_ENABLE_S3 = "ON"` in the environment. diff --git a/presto-native-execution/scripts/setup-macos.sh b/presto-native-execution/scripts/setup-macos.sh index 4e703d9a1738..7b8eccd65388 100755 --- a/presto-native-execution/scripts/setup-macos.sh +++ b/presto-native-execution/scripts/setup-macos.sh @@ -13,7 +13,14 @@ set -eufx -o pipefail -# Run the velox setup script first. +SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}") +PYTHON_VENV=${PYTHON_VENV:-"${SCRIPTDIR}/../.venv"} +# Prestissimo fails to build DuckDB with error +# "math cannot parse the expression" when this +# script is invoked under the Presto git project. +# Set DEPENDENCY_DIR to a directory outside of Presto +# to build DuckDB. +BUILD_DUCKDB="${BUILD_DUCKDB:-false}" source "$(dirname "${BASH_SOURCE}")/../velox/scripts/setup-macos.sh" function install_proxygen { @@ -41,4 +48,6 @@ else install_velox_deps install_presto_deps echo "All dependencies for Prestissimo installed!" + echo "To reuse the installed dependencies for subsequent builds, consider adding this to your ~/.zshrc" + echo "export INSTALL_PREFIX=$INSTALL_PREFIX" fi From 7deadd1dc2ba0bce4f345c1394e67a5aa1fd7893 Mon Sep 17 00:00:00 2001 From: namya28 Date: Fri, 27 Sep 2024 18:43:24 +0530 Subject: [PATCH 63/86] Upgrade the logback-core version to 1.2.13 --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 5b0c7d82b6aa..8487bfedfa0b 100644 --- a/pom.xml +++ b/pom.xml @@ -59,6 +59,7 @@ 1.53 7.5 3.8.0 + 1.2.13 1.13.1 1.6.8 9.2 From 48f0a0c1d380b1155dfd7c99b134a350627c7260 Mon Sep 17 00:00:00 2001 From: Christian Zentgraf Date: Wed, 9 Oct 2024 15:30:08 -0400 Subject: [PATCH 64/86] [native] Update velox --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 4f1140247e2f..acd57170b6d9 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 4f1140247e2f5059abd8f71b2ae54e3d1dcf1be2 +Subproject commit acd57170b6d98206def1ef02b74c467e3ba18061 From 9fa4aee94c0942c962acd7df46996bc925006dd6 Mon Sep 17 00:00:00 2001 From: Elbin Pallimalil Date: Wed, 9 Oct 2024 20:19:04 +0530 Subject: [PATCH 65/86] Add cast functions for char datatype to numeric types --- .../facebook/presto/type/CharOperators.java | 82 +++++++++++++++++++ .../scalar/AbstractTestFunctions.java | 5 ++ .../scalar/TestDateTimeFunctionsBase.java | 6 -- .../presto/type/TestCharOperators.java | 32 ++++++++ 4 files changed, 119 insertions(+), 6 deletions(-) diff --git a/presto-main/src/main/java/com/facebook/presto/type/CharOperators.java b/presto-main/src/main/java/com/facebook/presto/type/CharOperators.java index b2767e021002..2b38c48450bd 100644 --- a/presto-main/src/main/java/com/facebook/presto/type/CharOperators.java +++ b/presto-main/src/main/java/com/facebook/presto/type/CharOperators.java @@ -15,6 +15,7 @@ import com.facebook.presto.common.block.Block; import com.facebook.presto.common.type.StandardTypes; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.BlockIndex; import com.facebook.presto.spi.function.BlockPosition; import com.facebook.presto.spi.function.IsNull; @@ -26,6 +27,7 @@ import io.airlift.slice.XxHash64; import static com.facebook.presto.common.function.OperatorType.BETWEEN; +import static com.facebook.presto.common.function.OperatorType.CAST; import static com.facebook.presto.common.function.OperatorType.EQUAL; import static com.facebook.presto.common.function.OperatorType.GREATER_THAN; import static com.facebook.presto.common.function.OperatorType.GREATER_THAN_OR_EQUAL; @@ -37,6 +39,8 @@ import static com.facebook.presto.common.function.OperatorType.NOT_EQUAL; import static com.facebook.presto.common.function.OperatorType.XX_HASH_64; import static com.facebook.presto.common.type.Chars.compareChars; +import static com.facebook.presto.spi.StandardErrorCode.INVALID_CAST_ARGUMENT; +import static java.lang.String.format; public final class CharOperators { @@ -167,4 +171,82 @@ public static boolean indeterminate(@SqlType("char(x)") Slice value, @IsNull boo { return isNull; } + + @LiteralParameters("x") + @ScalarOperator(CAST) + @SqlType(StandardTypes.DOUBLE) + public static double castToDouble(@SqlType("char(x)") Slice slice) + { + try { + return Double.parseDouble(slice.toStringUtf8()); + } + catch (Exception e) { + throw new PrestoException(INVALID_CAST_ARGUMENT, format("Cannot cast '%s' to DOUBLE", slice.toStringUtf8())); + } + } + + @LiteralParameters("x") + @ScalarOperator(CAST) + @SqlType(StandardTypes.REAL) + public static long castToFloat(@SqlType("char(x)") Slice slice) + { + try { + return Float.floatToIntBits(Float.parseFloat(slice.toStringUtf8())); + } + catch (Exception e) { + throw new PrestoException(INVALID_CAST_ARGUMENT, format("Cannot cast '%s' to REAL", slice.toStringUtf8())); + } + } + + @LiteralParameters("x") + @ScalarOperator(CAST) + @SqlType(StandardTypes.BIGINT) + public static long castToBigint(@SqlType("char(x)") Slice slice) + { + try { + return Long.parseLong(slice.toStringUtf8().trim()); + } + catch (Exception e) { + throw new PrestoException(INVALID_CAST_ARGUMENT, format("Cannot cast '%s' to BIGINT", slice.toStringUtf8())); + } + } + + @LiteralParameters("x") + @ScalarOperator(CAST) + @SqlType(StandardTypes.INTEGER) + public static long castToInteger(@SqlType("char(x)") Slice slice) + { + try { + return Integer.parseInt(slice.toStringUtf8().trim()); + } + catch (Exception e) { + throw new PrestoException(INVALID_CAST_ARGUMENT, format("Cannot cast '%s' to INT", slice.toStringUtf8())); + } + } + + @LiteralParameters("x") + @ScalarOperator(CAST) + @SqlType(StandardTypes.SMALLINT) + public static long castToSmallint(@SqlType("char(x)") Slice slice) + { + try { + return Short.parseShort(slice.toStringUtf8().trim()); + } + catch (Exception e) { + throw new PrestoException(INVALID_CAST_ARGUMENT, format("Cannot cast '%s' to SMALLINT", slice.toStringUtf8())); + } + } + + @LiteralParameters("x") + @ScalarOperator(CAST) + @SqlType(StandardTypes.TINYINT) + public static long castToTinyint(@SqlType("char(x)") Slice slice) + { + try { + return Byte.parseByte(slice.toStringUtf8().trim()); + } + catch (Exception e) { + throw new PrestoException(INVALID_CAST_ARGUMENT, format("Cannot cast '%s' to TINYINT", slice.toStringUtf8())); + } + } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/AbstractTestFunctions.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/AbstractTestFunctions.java index 1a0d6a8f7710..d31ae0b09eb0 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/AbstractTestFunctions.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/AbstractTestFunctions.java @@ -112,6 +112,11 @@ protected void assertFunction(String projection, Type expectedType, Object expec functionAssertions.assertFunction(projection, expectedType, expected); } + protected void assertFunctionString(String projection, Type expectedType, String expected) + { + functionAssertions.assertFunctionString(projection, expectedType, expected); + } + protected void assertFunctionWithError(String projection, Type expectedType, Double expected) { if (expected == null) { diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestDateTimeFunctionsBase.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestDateTimeFunctionsBase.java index 7a782eb6a801..53186af9f835 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestDateTimeFunctionsBase.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestDateTimeFunctionsBase.java @@ -24,7 +24,6 @@ import com.facebook.presto.common.type.TimeType; import com.facebook.presto.common.type.TimeZoneKey; import com.facebook.presto.common.type.TimestampType; -import com.facebook.presto.common.type.Type; import com.facebook.presto.spi.StandardErrorCode; import com.facebook.presto.spi.security.ConnectorIdentity; import com.facebook.presto.testing.TestingConnectorSession; @@ -1175,11 +1174,6 @@ public void testIntervalDayToSecondToMilliseconds() assertFunction("to_milliseconds(parse_duration('1d'))", BigintType.BIGINT, DAYS.toMillis(1)); } - private void assertFunctionString(String projection, Type expectedType, String expected) - { - functionAssertions.assertFunctionString(projection, expectedType, expected); - } - private static SqlDate toDate(LocalDate localDate) { return new SqlDate(toIntExact(localDate.toEpochDay())); diff --git a/presto-main/src/test/java/com/facebook/presto/type/TestCharOperators.java b/presto-main/src/test/java/com/facebook/presto/type/TestCharOperators.java index ca51a3ac6749..c22ce2b3276c 100644 --- a/presto-main/src/test/java/com/facebook/presto/type/TestCharOperators.java +++ b/presto-main/src/test/java/com/facebook/presto/type/TestCharOperators.java @@ -17,7 +17,13 @@ import org.testng.annotations.Test; import static com.facebook.presto.common.function.OperatorType.INDETERMINATE; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.RealType.REAL; +import static com.facebook.presto.common.type.SmallintType.SMALLINT; +import static com.facebook.presto.common.type.TinyintType.TINYINT; public class TestCharOperators extends AbstractTestFunctions @@ -192,4 +198,30 @@ public void testIndeterminate() assertOperator(INDETERMINATE, "CAST(null AS CHAR(3))", BOOLEAN, true); assertOperator(INDETERMINATE, "CHAR '123'", BOOLEAN, false); } + + @Test + public void testCharCast() + { + assertFunction("CAST(CAST('78.95' AS CHAR(5)) AS DOUBLE)", DOUBLE, 78.95); + assertFunction("CAST(CAST(' 45.58 ' AS CHAR(10)) AS DOUBLE)", DOUBLE, 45.58); + assertInvalidCast("CAST(CAST(' Z56 ' AS CHAR(20)) AS DOUBLE)"); + assertFunction("CAST(CAST('45.783' AS CHAR(6)) AS REAL)", REAL, 45.783f); + assertFunction("CAST(CAST(' 45.783 ' AS CHAR(10)) AS REAL)", REAL, 45.783f); + assertInvalidCast("CAST(CAST(' Z56 ' AS CHAR(20)) AS REAL)"); + assertFunctionString("CAST(CAST('6.40282346638528860e+70' AS CHAR(60)) AS REAL)", REAL, "Infinity"); + assertFunction("CAST(CAST('45' AS CHAR(2)) AS BIGINT)", BIGINT, 45L); + assertFunction("CAST(CAST(' 45 ' AS CHAR(10)) AS BIGINT)", BIGINT, 45L); + assertInvalidCast("CAST(CAST(' Z56 ' AS CHAR(20)) AS BIGINT)"); + assertFunction("CAST(CAST('45' AS CHAR(2)) AS INTEGER)", INTEGER, 45); + assertFunction("CAST(CAST('2147483647' AS CHAR(10)) AS INTEGER)", INTEGER, 2147483647); + assertFunction("CAST(CAST(' 45 ' AS CHAR(10)) AS INTEGER)", INTEGER, 45); + assertInvalidCast("CAST(CAST(' Z56 ' AS CHAR(20)) AS INTEGER)"); + assertInvalidCast("CAST(CAST('2147483648' AS CHAR(10)) AS INTEGER)"); // 1 over the max range of integer + assertFunction("CAST(CAST('45' AS CHAR(2)) AS SMALLINT)", SMALLINT, (short) 45); + assertFunction("CAST(CAST(' 45 ' AS CHAR(10)) AS SMALLINT)", SMALLINT, (short) 45); + assertInvalidCast("CAST(CAST(' Z56 ' AS CHAR(20)) AS SMALLINT)"); + assertFunction("CAST(CAST('123' AS CHAR(3)) AS TINYINT)", TINYINT, (byte) 123); + assertFunction("CAST(CAST(' 123 ' AS CHAR(10)) AS TINYINT)", TINYINT, (byte) 123); + assertInvalidCast("CAST(CAST(' Z56 ' AS CHAR(20)) AS TINYINT)"); + } } From cbd1a11d634dacfc75fb0d6c8cb057be7797986d Mon Sep 17 00:00:00 2001 From: wangd Date: Thu, 3 Oct 2024 23:35:03 +0800 Subject: [PATCH 66/86] Fix round for float value when input out of long range --- .../presto/operator/scalar/MathFunctions.java | 2 +- .../presto/operator/scalar/TestMathFunctions.java | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/presto-main/src/main/java/com/facebook/presto/operator/scalar/MathFunctions.java b/presto-main/src/main/java/com/facebook/presto/operator/scalar/MathFunctions.java index 69f7eb3d07b0..231e8db1b0aa 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/scalar/MathFunctions.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/scalar/MathFunctions.java @@ -1136,7 +1136,7 @@ public static long roundFloat(@SqlType(StandardTypes.REAL) long num, @SqlType(St catch (ArithmeticException e) { // Use BigDecimal if the value is out of the range of long. BigDecimal bigDecimal = new BigDecimal(numInFloat); - return floatToRawIntBits(bigDecimal.setScale((int) decimals, HALF_UP).longValue()); + return floatToRawIntBits(bigDecimal.setScale((int) decimals, HALF_UP).floatValue()); } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestMathFunctions.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestMathFunctions.java index a296d6a6a1d8..a5ab98de85fa 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestMathFunctions.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestMathFunctions.java @@ -709,6 +709,19 @@ public void testSecureRandom() assertInvalidFunction("secure_random(DECIMAL '5.0', DECIMAL '-5.0')", "upper bound must be greater than lower bound"); } + @Test + public void testRoundForUnderlyingValueOutOfRange() + { + // Round data of `REAL` type with underlying value out of long range should work well. + // See issue https://github.com/prestodb/presto/issues/23763 + assertFunction("round(REAL '1.0E19', 1)", REAL, 1.0E19f); + assertFunction("round(REAL '1.0E19', 10)", REAL, 1.0E19f); + assertFunction("round(REAL '1.0E19', 100)", REAL, 1.0E19f); + assertFunction("round(REAL '9999999999999999999.9', 1)", REAL, 9999999999999999999.9f); + assertFunction("round(REAL '9999999999999999999.99', 10)", REAL, 9999999999999999999.99f); + assertFunction("round(REAL '9999999999999999999.999', 100)", REAL, 9999999999999999999.999f); + } + @Test public void testRound() { From 01354d643db48d369fd451762299e1bcdb135379 Mon Sep 17 00:00:00 2001 From: oyeliseiev-ua Date: Thu, 5 Sep 2024 17:24:43 +0300 Subject: [PATCH 67/86] Update SingleStore docs --- presto-docs/src/main/sphinx/connector/singlestore.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/connector/singlestore.rst b/presto-docs/src/main/sphinx/connector/singlestore.rst index 9be7a64826a5..be6b2abc2768 100644 --- a/presto-docs/src/main/sphinx/connector/singlestore.rst +++ b/presto-docs/src/main/sphinx/connector/singlestore.rst @@ -68,9 +68,12 @@ that catalog name instead of ``singlestore`` in the above examples. SingleStore Connector Limitations --------------------------------- +For :doc:`/sql/create-table` statement, the default table type is columnstore. +The table type can be configured by setting the ``default_table_type`` engine variable, see the +`documentation `_. + The following SQL statements are not supported: -* :doc:`/sql/create-table` with ``rowstore`` table type (supports only ``columnstore`` table type, set by default `docs `_.) * :doc:`/sql/alter-schema` * :doc:`/sql/analyze` * :doc:`/sql/create-role` From 44e6604ebe74520f855694e7226131807be422f3 Mon Sep 17 00:00:00 2001 From: oyeliseiev-ua <134942613+oyeliseiev-ua@users.noreply.github.com> Date: Mon, 16 Sep 2024 12:53:54 +0300 Subject: [PATCH 68/86] Update presto-docs/src/main/sphinx/connector/singlestore.rst Co-authored-by: Steve Burnett --- presto-docs/src/main/sphinx/connector/singlestore.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/connector/singlestore.rst b/presto-docs/src/main/sphinx/connector/singlestore.rst index be6b2abc2768..4eaaa8bee283 100644 --- a/presto-docs/src/main/sphinx/connector/singlestore.rst +++ b/presto-docs/src/main/sphinx/connector/singlestore.rst @@ -70,7 +70,7 @@ SingleStore Connector Limitations For :doc:`/sql/create-table` statement, the default table type is columnstore. The table type can be configured by setting the ``default_table_type`` engine variable, see the -`documentation `_. +`Creating a Columnstore Table `_. The following SQL statements are not supported: From ac954c259a53ecd5f4af924ae8b441670b6ff8eb Mon Sep 17 00:00:00 2001 From: oyeliseiev-ua <134942613+oyeliseiev-ua@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:17:40 +0300 Subject: [PATCH 69/86] Update presto-docs/src/main/sphinx/connector/singlestore.rst Co-authored-by: Steve Burnett --- presto-docs/src/main/sphinx/connector/singlestore.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/connector/singlestore.rst b/presto-docs/src/main/sphinx/connector/singlestore.rst index 4eaaa8bee283..5e89561dc949 100644 --- a/presto-docs/src/main/sphinx/connector/singlestore.rst +++ b/presto-docs/src/main/sphinx/connector/singlestore.rst @@ -68,7 +68,7 @@ that catalog name instead of ``singlestore`` in the above examples. SingleStore Connector Limitations --------------------------------- -For :doc:`/sql/create-table` statement, the default table type is columnstore. +For :doc:`/sql/create-table` statement, the default table type is ``columnstore``. The table type can be configured by setting the ``default_table_type`` engine variable, see the `Creating a Columnstore Table `_. From 8ce4ccd69f660a94ce779805001077b5edf0a509 Mon Sep 17 00:00:00 2001 From: oyeliseiev-ua <134942613+oyeliseiev-ua@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:17:49 +0300 Subject: [PATCH 70/86] Update presto-docs/src/main/sphinx/connector/singlestore.rst Co-authored-by: Steve Burnett --- presto-docs/src/main/sphinx/connector/singlestore.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/connector/singlestore.rst b/presto-docs/src/main/sphinx/connector/singlestore.rst index 5e89561dc949..e3371f53ee93 100644 --- a/presto-docs/src/main/sphinx/connector/singlestore.rst +++ b/presto-docs/src/main/sphinx/connector/singlestore.rst @@ -69,7 +69,7 @@ SingleStore Connector Limitations --------------------------------- For :doc:`/sql/create-table` statement, the default table type is ``columnstore``. -The table type can be configured by setting the ``default_table_type`` engine variable, see the +The table type can be configured by setting the ``default_table_type`` engine variable, see `Creating a Columnstore Table `_. The following SQL statements are not supported: From 4b0ab664bfc090838db38212ad84b8a6603b6344 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 9 Oct 2024 11:50:10 -0700 Subject: [PATCH 71/86] Add property for eager building of plan As part of new native plan checker SPI, the new property eager-plan-validation-enabled will enable the eager building and validation of a logical plan so that any errors or incompatibilities in the plan will cause the query to fail quickly, before queueing and cluster resources are assigned to keep queries with invalid plans from holding slots in the queue. RFC: https://github.com/prestodb/rfcs/blob/main/RFC-0008-plan-checker.md --- .../src/main/sphinx/admin/properties.rst | 11 +++++ .../presto/SystemSessionProperties.java | 11 +++++ .../EagerPlanValidationExecutionMBean.java | 41 +++++++++++++++++++ .../execution/ForEagerPlanValidation.java | 31 ++++++++++++++ .../presto/execution/SqlQueryExecution.java | 38 ++++++++++++++++- .../presto/server/CoordinatorModule.java | 20 ++++++++- .../presto/sql/analyzer/FeaturesConfig.java | 29 +++++++++++++ .../sql/analyzer/TestFeaturesConfig.java | 10 ++++- 8 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 presto-main/src/main/java/com/facebook/presto/execution/EagerPlanValidationExecutionMBean.java create mode 100644 presto-main/src/main/java/com/facebook/presto/execution/ForEagerPlanValidation.java diff --git a/presto-docs/src/main/sphinx/admin/properties.rst b/presto-docs/src/main/sphinx/admin/properties.rst index 6a9af7fc6682..0b4a7ccc8393 100644 --- a/presto-docs/src/main/sphinx/admin/properties.rst +++ b/presto-docs/src/main/sphinx/admin/properties.rst @@ -72,6 +72,17 @@ Number of local parallel table writer threads per worker for partitioned writes. set, the number set by ``task_writer_count`` will be used. It is required to be a power of two for a Java query engine. +``eager-plan-validation-enabled`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``boolean`` +* **Default value:** ``false`` + +This property enables the eager building and validation of a logical plan. +When enabled, the logical plan will begin to be built and validated before +queueing and allocation of cluster resources so that any errors or +incompatibilities in the query plan will fail quickly and inform the user. + .. _tuning-memory: Memory Management Properties diff --git a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java index 1b923594a652..d039bd559574 100644 --- a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -331,6 +331,7 @@ public final class SystemSessionProperties public static final String REWRITE_EXPRESSION_WITH_CONSTANT_EXPRESSION = "rewrite_expression_with_constant_expression"; public static final String PRINT_ESTIMATED_STATS_FROM_CACHE = "print_estimated_stats_from_cache"; public static final String REMOVE_CROSS_JOIN_WITH_CONSTANT_SINGLE_ROW_INPUT = "remove_cross_join_with_constant_single_row_input"; + public static final String EAGER_PLAN_VALIDATION_ENABLED = "eager_plan_validation_enabled"; // TODO: Native execution related session properties that are temporarily put here. They will be relocated in the future. public static final String NATIVE_SIMPLIFIED_EXPRESSION_EVALUATION_ENABLED = "native_simplified_expression_evaluation_enabled"; @@ -2043,6 +2044,11 @@ public SystemSessionProperties( "If one input of the cross join is a single row with constant value, remove this cross join and replace with a project node", featuresConfig.isRemoveCrossJoinWithSingleConstantRow(), false), + booleanProperty( + EAGER_PLAN_VALIDATION_ENABLED, + "Enable eager building and validation of logical plan before queueing", + featuresConfig.isEagerPlanValidationEnabled(), + false), new PropertyMetadata<>( DEFAULT_VIEW_SECURITY_MODE, format("Set default view security mode. Options are: %s", @@ -3369,6 +3375,11 @@ public static boolean isRewriteExpressionWithConstantEnabled(Session session) return session.getSystemProperty(REWRITE_EXPRESSION_WITH_CONSTANT_EXPRESSION, Boolean.class); } + public static boolean isEagerPlanValidationEnabled(Session session) + { + return session.getSystemProperty(EAGER_PLAN_VALIDATION_ENABLED, Boolean.class); + } + public static CreateView.Security getDefaultViewSecurityMode(Session session) { return session.getSystemProperty(DEFAULT_VIEW_SECURITY_MODE, CreateView.Security.class); diff --git a/presto-main/src/main/java/com/facebook/presto/execution/EagerPlanValidationExecutionMBean.java b/presto-main/src/main/java/com/facebook/presto/execution/EagerPlanValidationExecutionMBean.java new file mode 100644 index 000000000000..ad77325e4263 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/execution/EagerPlanValidationExecutionMBean.java @@ -0,0 +1,41 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.execution; + +import com.facebook.airlift.concurrent.ThreadPoolExecutorMBean; +import org.weakref.jmx.Managed; +import org.weakref.jmx.Nested; + +import javax.inject.Inject; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; + +public class EagerPlanValidationExecutionMBean +{ + private final ThreadPoolExecutorMBean executorMBean; + + @Inject + public EagerPlanValidationExecutionMBean(@ForEagerPlanValidation ExecutorService executor) + { + this.executorMBean = new ThreadPoolExecutorMBean((ThreadPoolExecutor) executor); + } + + @Managed + @Nested + public ThreadPoolExecutorMBean getExecutor() + { + return executorMBean; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/execution/ForEagerPlanValidation.java b/presto-main/src/main/java/com/facebook/presto/execution/ForEagerPlanValidation.java new file mode 100644 index 000000000000..5193341a73cc --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/execution/ForEagerPlanValidation.java @@ -0,0 +1,31 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.execution; + +import javax.inject.Qualifier; + +import java.lang.annotation.Retention; +import java.lang.annotation.Target; + +import static java.lang.annotation.ElementType.FIELD; +import static java.lang.annotation.ElementType.METHOD; +import static java.lang.annotation.ElementType.PARAMETER; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +@Retention(RUNTIME) +@Target({FIELD, PARAMETER, METHOD}) +@Qualifier +public @interface ForEagerPlanValidation +{ +} diff --git a/presto-main/src/main/java/com/facebook/presto/execution/SqlQueryExecution.java b/presto-main/src/main/java/com/facebook/presto/execution/SqlQueryExecution.java index 403b3de1ce75..55a52ed96617 100644 --- a/presto-main/src/main/java/com/facebook/presto/execution/SqlQueryExecution.java +++ b/presto-main/src/main/java/com/facebook/presto/execution/SqlQueryExecution.java @@ -75,13 +75,16 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import static com.facebook.presto.SystemSessionProperties.getExecutionPolicy; import static com.facebook.presto.SystemSessionProperties.getQueryAnalyzerTimeout; +import static com.facebook.presto.SystemSessionProperties.isEagerPlanValidationEnabled; import static com.facebook.presto.SystemSessionProperties.isLogInvokedFunctionNamesEnabled; import static com.facebook.presto.SystemSessionProperties.isSpoolingOutputBufferEnabled; import static com.facebook.presto.common.RuntimeMetricName.FRAGMENT_PLAN_TIME_NANOS; @@ -141,6 +144,8 @@ public class SqlQueryExecution private final PlanCanonicalInfoProvider planCanonicalInfoProvider; private final QueryAnalysis queryAnalysis; private final AnalyzerContext analyzerContext; + private final CompletableFuture planFuture; + private final AtomicBoolean planFutureLocked = new AtomicBoolean(); private SqlQueryExecution( QueryAnalyzer queryAnalyzer, @@ -159,6 +164,7 @@ private SqlQueryExecution( ExecutorService queryExecutor, ScheduledExecutorService timeoutThreadExecutor, SectionExecutionFactory sectionExecutionFactory, + ExecutorService eagerPlanValidationExecutor, InternalNodeManager internalNodeManager, ExecutionPolicy executionPolicy, SplitSchedulerStats schedulerStats, @@ -243,6 +249,10 @@ private SqlQueryExecution( } } } + + // Optionally build and validate plan immediately, before execution begins + planFuture = isEagerPlanValidationEnabled(getSession()) ? + CompletableFuture.supplyAsync(this::runCreateLogicalPlanAsync, eagerPlanValidationExecutor) : null; } } @@ -460,8 +470,13 @@ public void start() Thread.currentThread(), timeoutThreadExecutor, getQueryAnalyzerTimeout(getSession()))) { - // create logical plan for the query - plan = createLogicalPlanAndOptimize(); + // If planFuture has not started, cancel and build plan in current thread + if (planFuture != null && !planFutureLocked.compareAndSet(false, true)) { + plan = planFuture.get(); + } + else { + plan = createLogicalPlanAndOptimize(); + } } metadata.beginQuery(getSession(), plan.getConnectors()); @@ -590,6 +605,21 @@ private PlanRoot createLogicalPlanAndOptimize() } } + private PlanRoot runCreateLogicalPlanAsync() + { + try { + // Check if creating plan async has been cancelled + if (planFutureLocked.compareAndSet(false, true)) { + return createLogicalPlanAndOptimize(); + } + return null; + } + catch (Throwable e) { + fail(e); + throw e; + } + } + private void planDistribution(PlanRoot plan) { CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits); @@ -862,6 +892,7 @@ public static class SqlQueryExecutionFactory private final ScheduledExecutorService timeoutThreadExecutor; private final ExecutorService queryExecutor; private final SectionExecutionFactory sectionExecutionFactory; + private final ExecutorService eagerPlanValidationExecutor; private final InternalNodeManager internalNodeManager; private final Map executionPolicies; private final StatsCalculator statsCalculator; @@ -883,6 +914,7 @@ public static class SqlQueryExecutionFactory @ForQueryExecution ExecutorService queryExecutor, @ForTimeoutThread ScheduledExecutorService timeoutThreadExecutor, SectionExecutionFactory sectionExecutionFactory, + @ForEagerPlanValidation ExecutorService eagerPlanValidationExecutor, InternalNodeManager internalNodeManager, Map executionPolicies, SplitSchedulerStats schedulerStats, @@ -904,6 +936,7 @@ public static class SqlQueryExecutionFactory this.queryExecutor = requireNonNull(queryExecutor, "queryExecutor is null"); this.timeoutThreadExecutor = requireNonNull(timeoutThreadExecutor, "timeoutThreadExecutor is null"); this.sectionExecutionFactory = requireNonNull(sectionExecutionFactory, "sectionExecutionFactory is null"); + this.eagerPlanValidationExecutor = requireNonNull(eagerPlanValidationExecutor, "eagerPlanValidationExecutor is null"); this.internalNodeManager = requireNonNull(internalNodeManager, "internalNodeManager is null"); this.executionPolicies = requireNonNull(executionPolicies, "schedulerPolicies is null"); this.planOptimizers = planOptimizers.getPlanningTimeOptimizers(); @@ -946,6 +979,7 @@ public QueryExecution createQueryExecution( queryExecutor, timeoutThreadExecutor, sectionExecutionFactory, + eagerPlanValidationExecutor, internalNodeManager, executionPolicy, schedulerStats, diff --git a/presto-main/src/main/java/com/facebook/presto/server/CoordinatorModule.java b/presto-main/src/main/java/com/facebook/presto/server/CoordinatorModule.java index 28f14f60315e..203584435344 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/CoordinatorModule.java +++ b/presto-main/src/main/java/com/facebook/presto/server/CoordinatorModule.java @@ -34,8 +34,10 @@ import com.facebook.presto.event.QueryMonitorConfig; import com.facebook.presto.event.QueryProgressMonitor; import com.facebook.presto.execution.ClusterSizeMonitor; +import com.facebook.presto.execution.EagerPlanValidationExecutionMBean; import com.facebook.presto.execution.ExecutionFactoriesManager; import com.facebook.presto.execution.ExplainAnalyzeContext; +import com.facebook.presto.execution.ForEagerPlanValidation; import com.facebook.presto.execution.ForQueryExecution; import com.facebook.presto.execution.ForTimeoutThread; import com.facebook.presto.execution.NodeResourceStatusConfig; @@ -83,6 +85,7 @@ import com.facebook.presto.server.remotetask.RemoteTaskStats; import com.facebook.presto.spi.memory.ClusterMemoryPoolManager; import com.facebook.presto.spi.security.SelectedRole; +import com.facebook.presto.sql.analyzer.FeaturesConfig; import com.facebook.presto.sql.analyzer.QueryExplainer; import com.facebook.presto.sql.planner.PlanFragmenter; import com.facebook.presto.sql.planner.PlanOptimizers; @@ -104,8 +107,11 @@ import java.util.List; import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import static com.facebook.airlift.concurrent.Threads.daemonThreadsNamed; import static com.facebook.airlift.concurrent.Threads.threadsNamed; @@ -264,6 +270,8 @@ protected void setup(Binder binder) .toInstance(newCachedThreadPool(threadsNamed("query-execution-%s"))); binder.bind(QueryExecutionMBean.class).in(Scopes.SINGLETON); newExporter(binder).export(QueryExecutionMBean.class).as(generatedNameOf(QueryExecution.class)); + binder.bind(EagerPlanValidationExecutionMBean.class).in(Scopes.SINGLETON); + newExporter(binder).export(EagerPlanValidationExecutionMBean.class).withGeneratedName(); binder.bind(SplitSchedulerStats.class).in(Scopes.SINGLETON); newExporter(binder).export(SplitSchedulerStats.class).withGeneratedName(); @@ -376,6 +384,14 @@ public static ScheduledExecutorService createTimeoutThreadExecutor() return executor; } + @Provides + @Singleton + @ForEagerPlanValidation + public static ExecutorService createEagerPlanValidationExecutor(FeaturesConfig featuresConfig) + { + return new ThreadPoolExecutor(0, featuresConfig.getEagerPlanValidationThreadPoolSize(), 1L, TimeUnit.MINUTES, new LinkedBlockingQueue(), threadsNamed("plan-validation-%s")); + } + private void bindLowMemoryKiller(String name, Class clazz) { install(installModuleIf( @@ -395,7 +411,8 @@ public ExecutorCleanup( @ForQueryExecution ExecutorService queryExecutionExecutor, @ForScheduler ScheduledExecutorService schedulerExecutor, @ForTransactionManager ExecutorService transactionFinishingExecutor, - @ForTransactionManager ScheduledExecutorService transactionIdleExecutor) + @ForTransactionManager ScheduledExecutorService transactionIdleExecutor, + @ForEagerPlanValidation ExecutorService eagerPlanValidationExecutor) { executors = ImmutableList.builder() .add(statementResponseExecutor) @@ -404,6 +421,7 @@ public ExecutorCleanup( .add(schedulerExecutor) .add(transactionFinishingExecutor) .add(transactionIdleExecutor) + .add(eagerPlanValidationExecutor) .build(); } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java index 1bb1ea8894bd..88d1b3248cd7 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java @@ -296,6 +296,9 @@ public class FeaturesConfig private boolean isInlineProjectionsOnValuesEnabled; + private boolean eagerPlanValidationEnabled; + private int eagerPlanValidationThreadPoolSize = 20; + public enum PartitioningPrecisionStrategy { // Let Presto decide when to repartition @@ -2969,4 +2972,30 @@ public FeaturesConfig setInlineProjectionsOnValues(boolean isInlineProjectionsOn this.isInlineProjectionsOnValuesEnabled = isInlineProjectionsOnValuesEnabled; return this; } + + @Config("eager-plan-validation-enabled") + @ConfigDescription("Enable eager building and validation of logical plan before queueing") + public FeaturesConfig setEagerPlanValidationEnabled(boolean eagerPlanValidationEnabled) + { + this.eagerPlanValidationEnabled = eagerPlanValidationEnabled; + return this; + } + + public boolean isEagerPlanValidationEnabled() + { + return this.eagerPlanValidationEnabled; + } + + @Config("eager-plan-validation-thread-pool-size") + @ConfigDescription("Size of thread pool to use when eager plan validation is enabled") + public FeaturesConfig setEagerPlanValidationThreadPoolSize(int eagerPlanValidationThreadPoolSize) + { + this.eagerPlanValidationThreadPoolSize = eagerPlanValidationThreadPoolSize; + return this; + } + + public int getEagerPlanValidationThreadPoolSize() + { + return this.eagerPlanValidationThreadPoolSize; + } } diff --git a/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java b/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java index c78e4835c354..ebe971197a36 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java @@ -255,7 +255,9 @@ public void testDefaults() .setPrintEstimatedStatsFromCache(false) .setRemoveCrossJoinWithSingleConstantRow(true) .setUseHistograms(false) - .setInlineProjectionsOnValues(false)); + .setInlineProjectionsOnValues(false) + .setEagerPlanValidationEnabled(false) + .setEagerPlanValidationThreadPoolSize(20)); } @Test @@ -460,6 +462,8 @@ public void testExplicitPropertyMappings() .put("optimizer.remove-cross-join-with-single-constant-row", "false") .put("optimizer.use-histograms", "true") .put("optimizer.inline-projections-on-values", "true") + .put("eager-plan-validation-enabled", "true") + .put("eager-plan-validation-thread-pool-size", "2") .build(); FeaturesConfig expected = new FeaturesConfig() @@ -661,7 +665,9 @@ public void testExplicitPropertyMappings() .setPrintEstimatedStatsFromCache(true) .setRemoveCrossJoinWithSingleConstantRow(false) .setUseHistograms(true) - .setInlineProjectionsOnValues(true); + .setInlineProjectionsOnValues(true) + .setEagerPlanValidationEnabled(true) + .setEagerPlanValidationThreadPoolSize(2); assertFullMapping(properties, expected); } From b7814dfecc8ba055aac0a32a48297217eae0bc35 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 9 Oct 2024 11:54:49 -0700 Subject: [PATCH 72/86] Add SPI to support plugin for custom plan checkers This adds a new SPI to support plugin custom plan checkers to be provided for validating intermediate, final, and fragment stages of a logical plan. The motivation for this is to allow for a native plan checker that will eagerly validate a plan on the native sidecar to quickly fail the query if there are incompatibilities. Add unit test to verify that a queued query can be validated and fail while queued. This is done by using the new custom plan checker SPI to add plugin that will trigger a failure when validating the plan. See also: #23596 RFC: https://github.com/prestodb/rfcs/blob/main/RFC-0008-plan-checker.md --- .../scheduler/SqlQueryScheduler.java | 2 +- .../facebook/presto/server/PluginManager.java | 12 +++- .../facebook/presto/server/PrestoServer.java | 2 + .../presto/server/ServerMainModule.java | 8 +++ .../server/testing/TestingPrestoServer.java | 12 ++++ .../sql/planner/BasePlanFragmenter.java | 4 +- .../presto/sql/planner/PlanFragmenter.java | 7 ++- .../JsonCodecSimplePlanFragmentSerde.java | 47 +++++++++++++++ .../sql/planner/sanity/PlanChecker.java | 50 ++++++++++++++-- .../sanity/PlanCheckerProviderManager.java | 59 +++++++++++++++++++ .../presto/testing/LocalQueryRunner.java | 20 +++++-- .../facebook/presto/testing/QueryRunner.java | 3 + .../presto/cost/TestCostCalculator.java | 8 ++- .../nativeworker/ContainerQueryRunner.java | 7 +++ .../presto/spark/PrestoSparkModule.java | 8 +++ .../PrestoSparkQueryExecutionFactory.java | 9 ++- .../PrestoSparkAdaptiveQueryExecution.java | 10 ++-- .../presto/spark/PrestoSparkQueryRunner.java | 9 +++ .../planner/TestIterativePlanFragmenter.java | 10 +++- .../java/com/facebook/presto/spi/Plugin.java | 6 ++ .../facebook/presto/spi/plan/PlanChecker.java | 27 +++++++++ .../presto/spi/plan/PlanCheckerProvider.java | 36 +++++++++++ .../spi/plan/PlanCheckerProviderFactory.java | 21 +++++++ .../spi/plan/SimplePlanFragmentSerde.java | 21 +++++++ .../tests/AbstractTestQueryFramework.java | 4 +- .../presto/tests/DistributedQueryRunner.java | 8 +++ .../presto/tests/StandaloneQueryRunner.java | 7 +++ .../facebook/presto/execution/TestQueues.java | 45 ++++++++++++++ .../TestingPlanCheckerProviderPlugin.java | 59 +++++++++++++++++++ .../execution/TriggerFailurePlanChecker.java | 45 ++++++++++++++ ...e_groups_config_eager_plan_validation.json | 32 ++++++++++ .../thrift/integration/ThriftQueryRunner.java | 7 +++ 32 files changed, 578 insertions(+), 27 deletions(-) create mode 100644 presto-main/src/main/java/com/facebook/presto/sql/planner/plan/JsonCodecSimplePlanFragmentSerde.java create mode 100644 presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanCheckerProviderManager.java create mode 100644 presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanChecker.java create mode 100644 presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProvider.java create mode 100644 presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProviderFactory.java create mode 100644 presto-spi/src/main/java/com/facebook/presto/spi/plan/SimplePlanFragmentSerde.java create mode 100644 presto-tests/src/test/java/com/facebook/presto/execution/TestingPlanCheckerProviderPlugin.java create mode 100644 presto-tests/src/test/java/com/facebook/presto/execution/TriggerFailurePlanChecker.java create mode 100644 presto-tests/src/test/resources/resource_groups_config_eager_plan_validation.json diff --git a/presto-main/src/main/java/com/facebook/presto/execution/scheduler/SqlQueryScheduler.java b/presto-main/src/main/java/com/facebook/presto/execution/scheduler/SqlQueryScheduler.java index 9a6d37fdb950..198f23f1ba22 100644 --- a/presto-main/src/main/java/com/facebook/presto/execution/scheduler/SqlQueryScheduler.java +++ b/presto-main/src/main/java/com/facebook/presto/execution/scheduler/SqlQueryScheduler.java @@ -594,7 +594,7 @@ private StreamingPlanSection tryCostBasedOptimize(StreamingPlanSection section) .forEach(currentSubPlan -> { Optional newPlanFragment = performRuntimeOptimizations(currentSubPlan); if (newPlanFragment.isPresent()) { - planChecker.validatePlanFragment(newPlanFragment.get().getRoot(), session, metadata, warningCollector); + planChecker.validatePlanFragment(newPlanFragment.get(), session, metadata, warningCollector); oldToNewFragment.put(currentSubPlan.getFragment(), newPlanFragment.get()); } }); diff --git a/presto-main/src/main/java/com/facebook/presto/server/PluginManager.java b/presto-main/src/main/java/com/facebook/presto/server/PluginManager.java index 22ecb4b1c473..7983e017d024 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/PluginManager.java +++ b/presto-main/src/main/java/com/facebook/presto/server/PluginManager.java @@ -36,6 +36,7 @@ import com.facebook.presto.spi.eventlistener.EventListenerFactory; import com.facebook.presto.spi.function.FunctionNamespaceManagerFactory; import com.facebook.presto.spi.nodestatus.NodeStatusNotificationProviderFactory; +import com.facebook.presto.spi.plan.PlanCheckerProviderFactory; import com.facebook.presto.spi.prerequisites.QueryPrerequisitesFactory; import com.facebook.presto.spi.resourceGroups.ResourceGroupConfigurationManagerFactory; import com.facebook.presto.spi.security.PasswordAuthenticatorFactory; @@ -48,6 +49,7 @@ import com.facebook.presto.spi.ttl.NodeTtlFetcherFactory; import com.facebook.presto.sql.analyzer.AnalyzerProviderManager; import com.facebook.presto.sql.analyzer.QueryPreparerProviderManager; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.storage.TempStorageManager; import com.facebook.presto.tracing.TracerProviderManager; import com.facebook.presto.ttl.clusterttlprovidermanagers.ClusterTtlProviderManager; @@ -131,6 +133,7 @@ public class PluginManager private final AnalyzerProviderManager analyzerProviderManager; private final QueryPreparerProviderManager queryPreparerProviderManager; private final NodeStatusNotificationManager nodeStatusNotificationManager; + private final PlanCheckerProviderManager planCheckerProviderManager; @Inject public PluginManager( @@ -152,7 +155,8 @@ public PluginManager( ClusterTtlProviderManager clusterTtlProviderManager, HistoryBasedPlanStatisticsManager historyBasedPlanStatisticsManager, TracerProviderManager tracerProviderManager, - NodeStatusNotificationManager nodeStatusNotificationManager) + NodeStatusNotificationManager nodeStatusNotificationManager, + PlanCheckerProviderManager planCheckerProviderManager) { requireNonNull(nodeInfo, "nodeInfo is null"); requireNonNull(config, "config is null"); @@ -184,6 +188,7 @@ public PluginManager( this.analyzerProviderManager = requireNonNull(analyzerProviderManager, "analyzerProviderManager is null"); this.queryPreparerProviderManager = requireNonNull(queryPreparerProviderManager, "queryPreparerProviderManager is null"); this.nodeStatusNotificationManager = requireNonNull(nodeStatusNotificationManager, "nodeStatusNotificationManager is null"); + this.planCheckerProviderManager = requireNonNull(planCheckerProviderManager, "planCheckerProviderManager is null"); } public void loadPlugins() @@ -348,6 +353,11 @@ public void installPlugin(Plugin plugin) log.info("Registering node status notification provider %s", nodeStatusNotificationProviderFactory.getName()); nodeStatusNotificationManager.addNodeStatusNotificationProviderFactory(nodeStatusNotificationProviderFactory); } + + for (PlanCheckerProviderFactory planCheckerProviderFactory : plugin.getPlanCheckerProviderFactories()) { + log.info("Registering plan checker provider factory %s", planCheckerProviderFactory.getName()); + planCheckerProviderManager.addPlanCheckerProviderFactory(planCheckerProviderFactory); + } } public void installCoordinatorPlugin(CoordinatorPlugin plugin) diff --git a/presto-main/src/main/java/com/facebook/presto/server/PrestoServer.java b/presto-main/src/main/java/com/facebook/presto/server/PrestoServer.java index 8b3aaa3009cc..0f3ba3b14df0 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/PrestoServer.java +++ b/presto-main/src/main/java/com/facebook/presto/server/PrestoServer.java @@ -47,6 +47,7 @@ import com.facebook.presto.server.security.ServerSecurityModule; import com.facebook.presto.sql.analyzer.FeaturesConfig; import com.facebook.presto.sql.parser.SqlParserOptions; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.storage.TempStorageManager; import com.facebook.presto.storage.TempStorageModule; import com.facebook.presto.tracing.TracerProviderManager; @@ -177,6 +178,7 @@ public void run() injector.getInstance(TracerProviderManager.class).loadTracerProvider(); injector.getInstance(NodeStatusNotificationManager.class).loadNodeStatusNotificationProvider(); injector.getInstance(GracefulShutdownHandler.class).loadNodeStatusNotification(); + injector.getInstance(PlanCheckerProviderManager.class).loadPlanCheckerProviders(); startAssociatedProcesses(injector); injector.getInstance(Announcer.class).start(); diff --git a/presto-main/src/main/java/com/facebook/presto/server/ServerMainModule.java b/presto-main/src/main/java/com/facebook/presto/server/ServerMainModule.java index 85658fe457f7..85fd54dfb767 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/ServerMainModule.java +++ b/presto-main/src/main/java/com/facebook/presto/server/ServerMainModule.java @@ -146,6 +146,8 @@ import com.facebook.presto.spi.PageSorter; import com.facebook.presto.spi.analyzer.ViewDefinition; import com.facebook.presto.spi.function.SqlInvokedFunction; +import com.facebook.presto.spi.plan.SimplePlanFragment; +import com.facebook.presto.spi.plan.SimplePlanFragmentSerde; import com.facebook.presto.spi.relation.DeterminismEvaluator; import com.facebook.presto.spi.relation.DomainTranslator; import com.facebook.presto.spi.relation.PredicateCompiler; @@ -200,7 +202,9 @@ import com.facebook.presto.sql.planner.NodePartitioningManager; import com.facebook.presto.sql.planner.PartitioningProviderManager; import com.facebook.presto.sql.planner.PlanFragment; +import com.facebook.presto.sql.planner.plan.JsonCodecSimplePlanFragmentSerde; import com.facebook.presto.sql.planner.sanity.PlanChecker; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.sql.relational.RowExpressionDeterminismEvaluator; import com.facebook.presto.sql.relational.RowExpressionDomainTranslator; import com.facebook.presto.sql.tree.Expression; @@ -625,6 +629,8 @@ public ListeningExecutorService createResourceManagerExecutor(ResourceManagerCon // plan jsonBinder(binder).addKeySerializerBinding(VariableReferenceExpression.class).to(VariableReferenceExpressionSerializer.class); jsonBinder(binder).addKeyDeserializerBinding(VariableReferenceExpression.class).to(VariableReferenceExpressionDeserializer.class); + jsonCodecBinder(binder).bindJsonCodec(SimplePlanFragment.class); + binder.bind(SimplePlanFragmentSerde.class).to(JsonCodecSimplePlanFragmentSerde.class).in(Scopes.SINGLETON); // history statistics configBinder(binder).bindConfig(HistoryBasedOptimizationConfig.class); @@ -785,6 +791,8 @@ public ListeningExecutorService createResourceManagerExecutor(ResourceManagerCon //Optional Status Detector newOptionalBinder(binder, NodeStatusService.class); binder.bind(NodeStatusNotificationManager.class).in(Scopes.SINGLETON); + + binder.bind(PlanCheckerProviderManager.class).in(Scopes.SINGLETON); } @Provides diff --git a/presto-main/src/main/java/com/facebook/presto/server/testing/TestingPrestoServer.java b/presto-main/src/main/java/com/facebook/presto/server/testing/TestingPrestoServer.java index 6157d89c19e0..86f72c2df636 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/testing/TestingPrestoServer.java +++ b/presto-main/src/main/java/com/facebook/presto/server/testing/TestingPrestoServer.java @@ -74,6 +74,7 @@ import com.facebook.presto.sql.planner.ConnectorPlanOptimizerManager; import com.facebook.presto.sql.planner.NodePartitioningManager; import com.facebook.presto.sql.planner.Plan; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.storage.TempStorageManager; import com.facebook.presto.testing.ProcedureTester; import com.facebook.presto.testing.TestingAccessControlManager; @@ -173,6 +174,7 @@ public class TestingPrestoServer private final boolean nodeSchedulerIncludeCoordinator; private final ServerInfoResource serverInfoResource; private final ResourceManagerClusterStateProvider clusterStateProvider; + private final PlanCheckerProviderManager planCheckerProviderManager; public static class TestShutdownAction implements ShutdownAction @@ -379,6 +381,7 @@ public TestingPrestoServer( statsCalculator = injector.getInstance(StatsCalculator.class); eventListenerManager = ((TestingEventListenerManager) injector.getInstance(EventListenerManager.class)); clusterStateProvider = null; + planCheckerProviderManager = injector.getInstance(PlanCheckerProviderManager.class); } else if (resourceManager) { dispatchManager = null; @@ -390,6 +393,7 @@ else if (resourceManager) { statsCalculator = null; eventListenerManager = ((TestingEventListenerManager) injector.getInstance(EventListenerManager.class)); clusterStateProvider = injector.getInstance(ResourceManagerClusterStateProvider.class); + planCheckerProviderManager = null; } else if (coordinatorSidecar) { dispatchManager = null; @@ -401,6 +405,7 @@ else if (coordinatorSidecar) { statsCalculator = null; eventListenerManager = null; clusterStateProvider = null; + planCheckerProviderManager = null; } else if (catalogServer) { dispatchManager = null; @@ -412,6 +417,7 @@ else if (catalogServer) { statsCalculator = null; eventListenerManager = null; clusterStateProvider = null; + planCheckerProviderManager = null; } else { dispatchManager = null; @@ -423,6 +429,7 @@ else if (catalogServer) { statsCalculator = null; eventListenerManager = null; clusterStateProvider = null; + planCheckerProviderManager = null; } localMemoryManager = injector.getInstance(LocalMemoryManager.class); nodeManager = injector.getInstance(InternalNodeManager.class); @@ -662,6 +669,11 @@ public ClusterMemoryManager getClusterMemoryManager() return (ClusterMemoryManager) clusterMemoryManager; } + public PlanCheckerProviderManager getPlanCheckerProviderManager() + { + return planCheckerProviderManager; + } + public GracefulShutdownHandler getGracefulShutdownHandler() { return gracefulShutdownHandler; diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/BasePlanFragmenter.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/BasePlanFragmenter.java index e8bb552691d9..eb3d4e7ff872 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/BasePlanFragmenter.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/BasePlanFragmenter.java @@ -140,8 +140,6 @@ private SubPlan buildFragment(PlanNode root, FragmentProperties properties, Plan properties.getPartitionedSources()); Set fragmentVariableTypes = extractOutputVariables(root); - planChecker.validatePlanFragment(root, session, metadata, warningCollector); - Set tableWriterNodeIds = PlanFragmenterUtils.getTableWriterNodeIds(root); boolean outputTableWriterFragment = tableWriterNodeIds.stream().anyMatch(outputTableWriterNodeIds::contains); if (outputTableWriterFragment) { @@ -164,6 +162,8 @@ private SubPlan buildFragment(PlanNode root, FragmentProperties properties, Plan Optional.of(statsAndCosts.getForSubplan(root)), Optional.of(jsonFragmentPlan(root, fragmentVariableTypes, statsAndCosts.getForSubplan(root), metadata.getFunctionAndTypeManager(), session))); + planChecker.validatePlanFragment(fragment, session, metadata, warningCollector); + return new SubPlan(fragment, properties.getChildren()); } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanFragmenter.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanFragmenter.java index 3ef3eccaf626..f6ad388989dc 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanFragmenter.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanFragmenter.java @@ -29,6 +29,7 @@ import com.facebook.presto.sql.planner.BasePlanFragmenter.FragmentProperties; import com.facebook.presto.sql.planner.plan.SimplePlanRewriter; import com.facebook.presto.sql.planner.sanity.PlanChecker; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.google.common.collect.ImmutableList; import javax.inject.Inject; @@ -54,13 +55,13 @@ public class PlanFragmenter private final PlanChecker singleNodePlanChecker; @Inject - public PlanFragmenter(Metadata metadata, NodePartitioningManager nodePartitioningManager, QueryManagerConfig queryManagerConfig, FeaturesConfig featuresConfig) + public PlanFragmenter(Metadata metadata, NodePartitioningManager nodePartitioningManager, QueryManagerConfig queryManagerConfig, FeaturesConfig featuresConfig, PlanCheckerProviderManager planCheckerProviderManager) { this.metadata = requireNonNull(metadata, "metadata is null"); this.nodePartitioningManager = requireNonNull(nodePartitioningManager, "nodePartitioningManager is null"); this.config = requireNonNull(queryManagerConfig, "queryManagerConfig is null"); - this.distributedPlanChecker = new PlanChecker(requireNonNull(featuresConfig, "featuresConfig is null"), false); - this.singleNodePlanChecker = new PlanChecker(requireNonNull(featuresConfig, "featuresConfig is null"), true); + this.distributedPlanChecker = new PlanChecker(requireNonNull(featuresConfig, "featuresConfig is null"), false, planCheckerProviderManager); + this.singleNodePlanChecker = new PlanChecker(requireNonNull(featuresConfig, "featuresConfig is null"), true, planCheckerProviderManager); } public SubPlan createSubPlans(Session session, Plan plan, boolean forceSingleNode, PlanNodeIdAllocator idAllocator, WarningCollector warningCollector) diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/JsonCodecSimplePlanFragmentSerde.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/JsonCodecSimplePlanFragmentSerde.java new file mode 100644 index 000000000000..dfd68ca63a57 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/JsonCodecSimplePlanFragmentSerde.java @@ -0,0 +1,47 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.plan; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.spi.plan.SimplePlanFragment; +import com.facebook.presto.spi.plan.SimplePlanFragmentSerde; +import com.google.inject.Inject; + +import java.nio.charset.StandardCharsets; + +import static java.util.Objects.requireNonNull; + +public class JsonCodecSimplePlanFragmentSerde + implements SimplePlanFragmentSerde +{ + private final JsonCodec codec; + + @Inject + public JsonCodecSimplePlanFragmentSerde(JsonCodec codec) + { + this.codec = requireNonNull(codec, "SimplePlanFragment JSON codec is null"); + } + + @Override + public String serialize(SimplePlanFragment planFragment) + { + return new String(codec.toBytes(planFragment), StandardCharsets.UTF_8); + } + + @Override + public SimplePlanFragment deserialize(String value) + { + return codec.fromBytes(value.getBytes(StandardCharsets.UTF_8)); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanChecker.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanChecker.java index 0263b88a93fc..20495092f914 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanChecker.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanChecker.java @@ -16,28 +16,35 @@ import com.facebook.presto.Session; import com.facebook.presto.metadata.Metadata; import com.facebook.presto.spi.WarningCollector; +import com.facebook.presto.spi.plan.PlanCheckerProvider; import com.facebook.presto.spi.plan.PlanNode; +import com.facebook.presto.spi.plan.SimplePlanFragment; import com.facebook.presto.sql.analyzer.FeaturesConfig; +import com.facebook.presto.sql.planner.PlanFragment; import com.google.common.collect.ImmutableListMultimap; import com.google.common.collect.Multimap; import javax.inject.Inject; +import static java.util.Objects.requireNonNull; + /** * Perform checks on the plan that may generate warnings or errors. */ public final class PlanChecker { private final Multimap checkers; + private final PlanCheckerProviderManager planCheckerProviderManager; @Inject - public PlanChecker(FeaturesConfig featuresConfig) + public PlanChecker(FeaturesConfig featuresConfig, PlanCheckerProviderManager planCheckerProviderManager) { - this(featuresConfig, false); + this(featuresConfig, false, planCheckerProviderManager); } - public PlanChecker(FeaturesConfig featuresConfig, boolean forceSingleNode) + public PlanChecker(FeaturesConfig featuresConfig, boolean forceSingleNode, PlanCheckerProviderManager planCheckerProviderManager) { + this.planCheckerProviderManager = requireNonNull(planCheckerProviderManager, "planCheckerProviderManager is null"); ImmutableListMultimap.Builder builder = ImmutableListMultimap.builder(); builder.putAll( Stage.INTERMEDIATE, @@ -78,25 +85,58 @@ public PlanChecker(FeaturesConfig featuresConfig, boolean forceSingleNode) public void validateFinalPlan(PlanNode planNode, Session session, Metadata metadata, WarningCollector warningCollector) { checkers.get(Stage.FINAL).forEach(checker -> checker.validate(planNode, session, metadata, warningCollector)); + for (PlanCheckerProvider provider : planCheckerProviderManager.getPlanCheckerProviders()) { + for (com.facebook.presto.spi.plan.PlanChecker checker : provider.getFinalPlanCheckers()) { + checker.validate(planNode, warningCollector); + } + } } public void validateIntermediatePlan(PlanNode planNode, Session session, Metadata metadata, WarningCollector warningCollector) { checkers.get(Stage.INTERMEDIATE).forEach(checker -> checker.validate(planNode, session, metadata, warningCollector)); + for (PlanCheckerProvider provider : planCheckerProviderManager.getPlanCheckerProviders()) { + for (com.facebook.presto.spi.plan.PlanChecker checker : provider.getIntermediatePlanCheckers()) { + checker.validate(planNode, warningCollector); + } + } } - public void validatePlanFragment(PlanNode planNode, Session session, Metadata metadata, WarningCollector warningCollector) + public void validatePlanFragment(PlanFragment planFragment, Session session, Metadata metadata, WarningCollector warningCollector) { - checkers.get(Stage.FRAGMENT).forEach(checker -> checker.validate(planNode, session, metadata, warningCollector)); + checkers.get(Stage.FRAGMENT).forEach(checker -> checker.validateFragment(planFragment, session, metadata, warningCollector)); + for (PlanCheckerProvider provider : planCheckerProviderManager.getPlanCheckerProviders()) { + for (com.facebook.presto.spi.plan.PlanChecker checker : provider.getFragmentPlanCheckers()) { + checker.validateFragment(toSimplePlanFragment(planFragment), warningCollector); + } + } } public interface Checker { void validate(PlanNode planNode, Session session, Metadata metadata, WarningCollector warningCollector); + + default void validateFragment(PlanFragment planFragment, Session session, Metadata metadata, WarningCollector warningCollector) + { + validate(planFragment.getRoot(), session, metadata, warningCollector); + } } private enum Stage { INTERMEDIATE, FINAL, FRAGMENT } + + private static SimplePlanFragment toSimplePlanFragment(PlanFragment planFragment) + { + return new SimplePlanFragment( + planFragment.getId(), + planFragment.getRoot(), + planFragment.getVariables(), + planFragment.getPartitioning(), + planFragment.getTableScanSchedulingOrder(), + planFragment.getPartitioningScheme(), + planFragment.getStageExecutionDescriptor(), + planFragment.isOutputTableWriterFragment()); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanCheckerProviderManager.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanCheckerProviderManager.java new file mode 100644 index 000000000000..7bbc7acaec60 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/sanity/PlanCheckerProviderManager.java @@ -0,0 +1,59 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.sanity; + +import com.facebook.presto.spi.plan.PlanCheckerProvider; +import com.facebook.presto.spi.plan.PlanCheckerProviderFactory; +import com.facebook.presto.spi.plan.SimplePlanFragmentSerde; +import com.google.inject.Inject; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.stream.Collectors; + +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class PlanCheckerProviderManager +{ + private final SimplePlanFragmentSerde simplePlanFragmentSerde; + private final Map providerFactories = new ConcurrentHashMap<>(); + private final CopyOnWriteArrayList providers = new CopyOnWriteArrayList<>(); + + @Inject + public PlanCheckerProviderManager(SimplePlanFragmentSerde simplePlanFragmentSerde) + { + this.simplePlanFragmentSerde = requireNonNull(simplePlanFragmentSerde, "planNodeSerde is null"); + } + + public void addPlanCheckerProviderFactory(PlanCheckerProviderFactory planCheckerProviderFactory) + { + requireNonNull(planCheckerProviderFactory, "planCheckerProviderFactory is null"); + if (providerFactories.putIfAbsent(planCheckerProviderFactory.getName(), planCheckerProviderFactory) != null) { + throw new IllegalArgumentException(format("PlanCheckerProviderFactory '%s' is already registered", planCheckerProviderFactory.getName())); + } + } + + public void loadPlanCheckerProviders() + { + providers.addAllAbsent(providerFactories.values().stream().map(pc -> pc.create(simplePlanFragmentSerde)).collect(Collectors.toList())); + } + + public List getPlanCheckerProviders() + { + return providers; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java b/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java index 4bc53439ed6d..96c4c61beff7 100644 --- a/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java +++ b/presto-main/src/main/java/com/facebook/presto/testing/LocalQueryRunner.java @@ -140,6 +140,7 @@ import com.facebook.presto.spi.plan.PlanNode; import com.facebook.presto.spi.plan.PlanNodeId; import com.facebook.presto.spi.plan.PlanNodeIdAllocator; +import com.facebook.presto.spi.plan.SimplePlanFragment; import com.facebook.presto.spi.plan.StageExecutionDescriptor; import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spiller.FileSingleStreamSpillerFactory; @@ -185,8 +186,10 @@ import com.facebook.presto.sql.planner.RemoteSourceFactory; import com.facebook.presto.sql.planner.SubPlan; import com.facebook.presto.sql.planner.optimizations.PlanOptimizer; +import com.facebook.presto.sql.planner.plan.JsonCodecSimplePlanFragmentSerde; import com.facebook.presto.sql.planner.planPrinter.PlanPrinter; import com.facebook.presto.sql.planner.sanity.PlanChecker; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.sql.relational.RowExpressionDeterminismEvaluator; import com.facebook.presto.sql.relational.RowExpressionDomainTranslator; import com.facebook.presto.sql.tree.AlterFunction; @@ -297,6 +300,7 @@ public class LocalQueryRunner private final SqlParser sqlParser; private final PlanFragmenter planFragmenter; private final InMemoryNodeManager nodeManager; + private final PlanCheckerProviderManager planCheckerProviderManager; private final PageSorter pageSorter; private final PageIndexerFactory pageIndexerFactory; private final MetadataManager metadata; @@ -432,9 +436,10 @@ private LocalQueryRunner(Session defaultSession, FeaturesConfig featuresConfig, new AnalyzePropertyManager(), transactionManager); this.splitManager = new SplitManager(metadata, new QueryManagerConfig(), nodeSchedulerConfig); - this.distributedPlanChecker = new PlanChecker(featuresConfig, false); - this.singleNodePlanChecker = new PlanChecker(featuresConfig, true); - this.planFragmenter = new PlanFragmenter(this.metadata, this.nodePartitioningManager, new QueryManagerConfig(), featuresConfig); + this.planCheckerProviderManager = new PlanCheckerProviderManager(new JsonCodecSimplePlanFragmentSerde(jsonCodec(SimplePlanFragment.class))); + this.distributedPlanChecker = new PlanChecker(featuresConfig, false, planCheckerProviderManager); + this.singleNodePlanChecker = new PlanChecker(featuresConfig, true, planCheckerProviderManager); + this.planFragmenter = new PlanFragmenter(this.metadata, this.nodePartitioningManager, new QueryManagerConfig(), featuresConfig, planCheckerProviderManager); this.joinCompiler = new JoinCompiler(metadata); this.pageIndexerFactory = new GroupByHashPageIndexerFactory(joinCompiler); this.statsNormalizer = new StatsNormalizer(); @@ -515,7 +520,8 @@ private LocalQueryRunner(Session defaultSession, FeaturesConfig featuresConfig, new ThrowingClusterTtlProviderManager(), historyBasedPlanStatisticsManager, new TracerProviderManager(new TracingConfig()), - new NodeStatusNotificationManager()); + new NodeStatusNotificationManager(), + planCheckerProviderManager); connectorManager.addConnectorFactory(globalSystemConnectorFactory); connectorManager.createConnection(GlobalSystemConnector.NAME, GlobalSystemConnector.NAME, ImmutableMap.of()); @@ -643,6 +649,12 @@ public ConnectorPlanOptimizerManager getPlanOptimizerManager() return planOptimizerManager; } + @Override + public PlanCheckerProviderManager getPlanCheckerProviderManager() + { + return planCheckerProviderManager; + } + public PageSourceManager getPageSourceManager() { return pageSourceManager; diff --git a/presto-main/src/main/java/com/facebook/presto/testing/QueryRunner.java b/presto-main/src/main/java/com/facebook/presto/testing/QueryRunner.java index 2cae8bfa14d5..b19fa40da024 100644 --- a/presto-main/src/main/java/com/facebook/presto/testing/QueryRunner.java +++ b/presto-main/src/main/java/com/facebook/presto/testing/QueryRunner.java @@ -27,6 +27,7 @@ import com.facebook.presto.sql.planner.ConnectorPlanOptimizerManager; import com.facebook.presto.sql.planner.NodePartitioningManager; import com.facebook.presto.sql.planner.Plan; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.transaction.TransactionManager; import org.intellij.lang.annotations.Language; @@ -54,6 +55,8 @@ public interface QueryRunner ConnectorPlanOptimizerManager getPlanOptimizerManager(); + PlanCheckerProviderManager getPlanCheckerProviderManager(); + StatsCalculator getStatsCalculator(); Optional getEventListener(); diff --git a/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java b/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java index d19b15f2a185..e788dbe4e8a9 100644 --- a/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java +++ b/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java @@ -42,6 +42,7 @@ import com.facebook.presto.spi.plan.PlanNodeId; import com.facebook.presto.spi.plan.PlanNodeIdAllocator; import com.facebook.presto.spi.plan.ProjectNode; +import com.facebook.presto.spi.plan.SimplePlanFragment; import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.plan.UnionNode; import com.facebook.presto.spi.relation.RowExpression; @@ -57,7 +58,9 @@ import com.facebook.presto.sql.planner.TypeProvider; import com.facebook.presto.sql.planner.plan.ExchangeNode; import com.facebook.presto.sql.planner.plan.JoinNode; +import com.facebook.presto.sql.planner.plan.JsonCodecSimplePlanFragmentSerde; import com.facebook.presto.sql.planner.plan.SequenceNode; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.sql.tree.Cast; import com.facebook.presto.sql.tree.SymbolReference; import com.facebook.presto.tpch.TpchColumnHandle; @@ -84,6 +87,7 @@ import java.util.Optional; import java.util.function.Function; +import static com.facebook.airlift.json.JsonCodec.jsonCodec; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.metadata.MetadataManager.createTestMetadataManager; @@ -123,6 +127,7 @@ public class TestCostCalculator private NodeScheduler nodeScheduler; private NodePartitioningManager nodePartitioningManager; private TestingRowExpressionTranslator translator; + private PlanCheckerProviderManager planCheckerProviderManager; @BeforeClass public void setUp() @@ -151,7 +156,8 @@ public void setUp() new SimpleTtlNodeSelectorConfig()); PartitioningProviderManager partitioningProviderManager = new PartitioningProviderManager(); nodePartitioningManager = new NodePartitioningManager(nodeScheduler, partitioningProviderManager, new NodeSelectionStats()); - planFragmenter = new PlanFragmenter(metadata, nodePartitioningManager, new QueryManagerConfig(), new FeaturesConfig()); + planCheckerProviderManager = new PlanCheckerProviderManager(new JsonCodecSimplePlanFragmentSerde(jsonCodec(SimplePlanFragment.class))); + planFragmenter = new PlanFragmenter(metadata, nodePartitioningManager, new QueryManagerConfig(), new FeaturesConfig(), planCheckerProviderManager); translator = new TestingRowExpressionTranslator(); } diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunner.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunner.java index 09cd72184cac..b569f25c7458 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunner.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/ContainerQueryRunner.java @@ -25,6 +25,7 @@ import com.facebook.presto.split.SplitManager; import com.facebook.presto.sql.planner.ConnectorPlanOptimizerManager; import com.facebook.presto.sql.planner.NodePartitioningManager; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.QueryRunner; import com.facebook.presto.testing.TestingAccessControlManager; @@ -185,6 +186,12 @@ public ConnectorPlanOptimizerManager getPlanOptimizerManager() throw new UnsupportedOperationException(); } + @Override + public PlanCheckerProviderManager getPlanCheckerProviderManager() + { + throw new UnsupportedOperationException(); + } + @Override public StatsCalculator getStatsCalculator() { diff --git a/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkModule.java b/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkModule.java index 8eb985d2a059..9e258ccf9c01 100644 --- a/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkModule.java +++ b/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkModule.java @@ -136,6 +136,8 @@ import com.facebook.presto.spi.PageSorter; import com.facebook.presto.spi.analyzer.ViewDefinition; import com.facebook.presto.spi.memory.ClusterMemoryPoolManager; +import com.facebook.presto.spi.plan.SimplePlanFragment; +import com.facebook.presto.spi.plan.SimplePlanFragmentSerde; import com.facebook.presto.spi.relation.DeterminismEvaluator; import com.facebook.presto.spi.relation.DomainTranslator; import com.facebook.presto.spi.relation.PredicateCompiler; @@ -186,7 +188,9 @@ import com.facebook.presto.sql.planner.PlanFragment; import com.facebook.presto.sql.planner.PlanFragmenter; import com.facebook.presto.sql.planner.PlanOptimizers; +import com.facebook.presto.sql.planner.plan.JsonCodecSimplePlanFragmentSerde; import com.facebook.presto.sql.planner.sanity.PlanChecker; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.sql.relational.RowExpressionDeterminismEvaluator; import com.facebook.presto.sql.relational.RowExpressionDomainTranslator; import com.facebook.presto.tracing.TracerProviderManager; @@ -292,6 +296,8 @@ protected void setup(Binder binder) jsonCodecBinder(binder).bindJsonCodec(PrestoSparkLocalShuffleWriteInfo.class); jsonCodecBinder(binder).bindJsonCodec(BatchTaskUpdateRequest.class); jsonCodecBinder(binder).bindJsonCodec(BroadcastFileInfo.class); + jsonCodecBinder(binder).bindJsonCodec(SimplePlanFragment.class); + binder.bind(SimplePlanFragmentSerde.class).to(JsonCodecSimplePlanFragmentSerde.class).in(Scopes.SINGLETON); // smile codecs smileCodecBinder(binder).bindSmileCodec(TaskSource.class); @@ -542,6 +548,8 @@ protected void setup(Binder binder) // extra credentials and authenticator for Presto-on-Spark newSetBinder(binder, PrestoSparkCredentialsProvider.class); newSetBinder(binder, PrestoSparkAuthenticatorProvider.class); + + binder.bind(PlanCheckerProviderManager.class).in(Scopes.SINGLETON); } @Provides diff --git a/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java b/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java index 5f84fb6daea8..850068ac5d72 100644 --- a/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java +++ b/presto-spark-base/src/main/java/com/facebook/presto/spark/PrestoSparkQueryExecutionFactory.java @@ -99,6 +99,7 @@ import com.facebook.presto.sql.planner.PartitioningProviderManager; import com.facebook.presto.sql.planner.Plan; import com.facebook.presto.sql.planner.SubPlan; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.sql.tree.Statement; import com.facebook.presto.storage.TempStorageManager; import com.facebook.presto.transaction.TransactionManager; @@ -206,6 +207,7 @@ public class PrestoSparkQueryExecutionFactory private final HistoryBasedPlanStatisticsTracker historyBasedPlanStatisticsTracker; private final AdaptivePlanOptimizers adaptivePlanOptimizers; private final FragmentStatsProvider fragmentStatsProvider; + private final PlanCheckerProviderManager planCheckerProviderManager; @Inject public PrestoSparkQueryExecutionFactory( @@ -245,7 +247,8 @@ public PrestoSparkQueryExecutionFactory( Optional errorClassifier, HistoryBasedPlanStatisticsManager historyBasedPlanStatisticsManager, AdaptivePlanOptimizers adaptivePlanOptimizers, - FragmentStatsProvider fragmentStatsProvider) + FragmentStatsProvider fragmentStatsProvider, + PlanCheckerProviderManager planCheckerProviderManager) { this.queryIdGenerator = requireNonNull(queryIdGenerator, "queryIdGenerator is null"); this.sessionSupplier = requireNonNull(sessionSupplier, "sessionSupplier is null"); @@ -284,6 +287,7 @@ public PrestoSparkQueryExecutionFactory( this.historyBasedPlanStatisticsTracker = requireNonNull(historyBasedPlanStatisticsManager, "historyBasedPlanStatisticsManager is null").getHistoryBasedPlanStatisticsTracker(); this.adaptivePlanOptimizers = requireNonNull(adaptivePlanOptimizers, "adaptivePlanOptimizers is null"); this.fragmentStatsProvider = requireNonNull(fragmentStatsProvider, "fragmentStatsProvider is null"); + this.planCheckerProviderManager = requireNonNull(planCheckerProviderManager, "planCheckerProviderManager is null"); } public static QueryInfo createQueryInfo( @@ -764,7 +768,8 @@ else if (preparedQuery.isExplainTypeValidate()) { variableAllocator, planNodeIdAllocator, fragmentStatsProvider, - bootstrapMetricsCollector); + bootstrapMetricsCollector, + planCheckerProviderManager); } } } diff --git a/presto-spark-base/src/main/java/com/facebook/presto/spark/execution/PrestoSparkAdaptiveQueryExecution.java b/presto-spark-base/src/main/java/com/facebook/presto/spark/execution/PrestoSparkAdaptiveQueryExecution.java index 87edae52c036..d649580af500 100644 --- a/presto-spark-base/src/main/java/com/facebook/presto/spark/execution/PrestoSparkAdaptiveQueryExecution.java +++ b/presto-spark-base/src/main/java/com/facebook/presto/spark/execution/PrestoSparkAdaptiveQueryExecution.java @@ -63,6 +63,7 @@ import com.facebook.presto.sql.planner.plan.ExchangeNode; import com.facebook.presto.sql.planner.plan.RemoteSourceNode; import com.facebook.presto.sql.planner.sanity.PlanChecker; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.transaction.TransactionManager; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.UncheckedExecutionException; @@ -175,7 +176,8 @@ public PrestoSparkAdaptiveQueryExecution( VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator, FragmentStatsProvider fragmentStatsProvider, - Optional>> bootstrapMetricsCollector) + Optional>> bootstrapMetricsCollector, + PlanCheckerProviderManager planCheckerProviderManager) { super( sparkContext, @@ -219,10 +221,10 @@ public PrestoSparkAdaptiveQueryExecution( this.adaptivePlanOptimizers = requireNonNull(adaptivePlanOptimizers, "adaptivePlanOptimizers is null").getAdaptiveOptimizers(); this.variableAllocator = requireNonNull(variableAllocator, "variableAllocator is null"); this.idAllocator = requireNonNull(idAllocator, "idAllocator is null"); - this.iterativePlanFragmenter = createIterativePlanFragmenter(); + this.iterativePlanFragmenter = createIterativePlanFragmenter(requireNonNull(planCheckerProviderManager, "planCheckerProviderManager is null")); } - private IterativePlanFragmenter createIterativePlanFragmenter() + private IterativePlanFragmenter createIterativePlanFragmenter(PlanCheckerProviderManager planCheckerProviderManager) { boolean forceSingleNode = false; Function isFragmentFinished = this.executedFragments::contains; @@ -232,7 +234,7 @@ private IterativePlanFragmenter createIterativePlanFragmenter() this.planAndMore.getPlan(), isFragmentFinished, this.metadata, - new PlanChecker(this.featuresConfig, forceSingleNode), + new PlanChecker(this.featuresConfig, forceSingleNode, planCheckerProviderManager), this.idAllocator, new PrestoSparkNodePartitioningManager(this.partitioningProviderManager), this.queryManagerConfig, diff --git a/presto-spark-base/src/test/java/com/facebook/presto/spark/PrestoSparkQueryRunner.java b/presto-spark-base/src/test/java/com/facebook/presto/spark/PrestoSparkQueryRunner.java index 015f1936c4bc..f85c08ef58eb 100644 --- a/presto-spark-base/src/test/java/com/facebook/presto/spark/PrestoSparkQueryRunner.java +++ b/presto-spark-base/src/test/java/com/facebook/presto/spark/PrestoSparkQueryRunner.java @@ -67,6 +67,7 @@ import com.facebook.presto.sql.parser.SqlParserOptions; import com.facebook.presto.sql.planner.ConnectorPlanOptimizerManager; import com.facebook.presto.sql.planner.NodePartitioningManager; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.MaterializedRow; import com.facebook.presto.testing.QueryRunner; @@ -156,6 +157,7 @@ public class PrestoSparkQueryRunner private final StatsCalculator statsCalculator; private final PluginManager pluginManager; private final ConnectorManager connectorManager; + private final PlanCheckerProviderManager planCheckerProviderManager; private final Set waitTimeMetrics; private final HistoryBasedPlanStatisticsManager historyBasedPlanStatisticsManager; @@ -334,6 +336,7 @@ public PrestoSparkQueryRunner( statsCalculator = injector.getInstance(StatsCalculator.class); pluginManager = injector.getInstance(PluginManager.class); connectorManager = injector.getInstance(ConnectorManager.class); + planCheckerProviderManager = injector.getInstance(PlanCheckerProviderManager.class); waitTimeMetrics = injector.getInstance(new Key>() {}); historyBasedPlanStatisticsManager = injector.getInstance(HistoryBasedPlanStatisticsManager.class); @@ -477,6 +480,12 @@ public ConnectorPlanOptimizerManager getPlanOptimizerManager() return connectorPlanOptimizerManager; } + @Override + public PlanCheckerProviderManager getPlanCheckerProviderManager() + { + return planCheckerProviderManager; + } + @Override public StatsCalculator getStatsCalculator() { diff --git a/presto-spark-base/src/test/java/com/facebook/presto/spark/planner/TestIterativePlanFragmenter.java b/presto-spark-base/src/test/java/com/facebook/presto/spark/planner/TestIterativePlanFragmenter.java index 8f797aa42c7e..7ef6be2897f0 100644 --- a/presto-spark-base/src/test/java/com/facebook/presto/spark/planner/TestIterativePlanFragmenter.java +++ b/presto-spark-base/src/test/java/com/facebook/presto/spark/planner/TestIterativePlanFragmenter.java @@ -58,6 +58,7 @@ import com.facebook.presto.spi.plan.PlanNodeId; import com.facebook.presto.spi.plan.PlanNodeIdAllocator; import com.facebook.presto.spi.plan.ProjectNode; +import com.facebook.presto.spi.plan.SimplePlanFragment; import com.facebook.presto.spi.plan.StageExecutionDescriptor; import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.relation.RowExpression; @@ -73,7 +74,9 @@ import com.facebook.presto.sql.planner.TypeProvider; import com.facebook.presto.sql.planner.plan.ExchangeNode; import com.facebook.presto.sql.planner.plan.JoinNode; +import com.facebook.presto.sql.planner.plan.JsonCodecSimplePlanFragmentSerde; import com.facebook.presto.sql.planner.sanity.PlanChecker; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.tpch.TpchColumnHandle; import com.facebook.presto.tpch.TpchTableHandle; import com.facebook.presto.tpch.TpchTableLayoutHandle; @@ -97,6 +100,7 @@ import java.util.Set; import java.util.function.Function; +import static com.facebook.airlift.json.JsonCodec.jsonCodec; import static com.facebook.presto.SystemSessionProperties.FORCE_SINGLE_NODE_OUTPUT; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.metadata.MetadataManager.createTestMetadataManager; @@ -130,6 +134,7 @@ public class TestIterativePlanFragmenter private FinalizerService finalizerService; private NodeScheduler nodeScheduler; private NodePartitioningManager nodePartitioningManager; + private PlanCheckerProviderManager planCheckerProviderManager; @BeforeClass public void setUp() @@ -157,7 +162,8 @@ public void setUp() new SimpleTtlNodeSelectorConfig()); PartitioningProviderManager partitioningProviderManager = new PartitioningProviderManager(); nodePartitioningManager = new NodePartitioningManager(nodeScheduler, partitioningProviderManager, new NodeSelectionStats()); - planFragmenter = new PlanFragmenter(metadata, nodePartitioningManager, new QueryManagerConfig(), new FeaturesConfig()); + planCheckerProviderManager = new PlanCheckerProviderManager(new JsonCodecSimplePlanFragmentSerde(jsonCodec(SimplePlanFragment.class))); + planFragmenter = new PlanFragmenter(metadata, nodePartitioningManager, new QueryManagerConfig(), new FeaturesConfig(), planCheckerProviderManager); } @AfterClass(alwaysRun = true) @@ -224,7 +230,7 @@ private Void runTestIterativePlanFragmenter(PlanNode node, Plan plan, SubPlan fu plan, testingFragmentTracker::isFragmentFinished, metadata, - new PlanChecker(new FeaturesConfig()), + new PlanChecker(new FeaturesConfig(), planCheckerProviderManager), new PlanNodeIdAllocator(), nodePartitioningManager, new QueryManagerConfig(), diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/Plugin.java b/presto-spi/src/main/java/com/facebook/presto/spi/Plugin.java index 81e8f55b0a66..aa976b80cac5 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/Plugin.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/Plugin.java @@ -22,6 +22,7 @@ import com.facebook.presto.spi.eventlistener.EventListenerFactory; import com.facebook.presto.spi.function.FunctionNamespaceManagerFactory; import com.facebook.presto.spi.nodestatus.NodeStatusNotificationProviderFactory; +import com.facebook.presto.spi.plan.PlanCheckerProviderFactory; import com.facebook.presto.spi.prerequisites.QueryPrerequisitesFactory; import com.facebook.presto.spi.resourceGroups.ResourceGroupConfigurationManagerFactory; import com.facebook.presto.spi.security.PasswordAuthenticatorFactory; @@ -142,4 +143,9 @@ default Iterable getNodeStatusNotificatio { return emptyList(); } + + default Iterable getPlanCheckerProviderFactories() + { + return emptyList(); + } } diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanChecker.java b/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanChecker.java new file mode 100644 index 000000000000..16789510c62e --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanChecker.java @@ -0,0 +1,27 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.facebook.presto.spi.plan; + +import com.facebook.presto.spi.WarningCollector; + +public interface PlanChecker +{ + void validate(PlanNode planNode, WarningCollector warningCollector); + + default void validateFragment(SimplePlanFragment planFragment, WarningCollector warningCollector) + { + validate(planFragment.getRoot(), warningCollector); + } +} diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProvider.java b/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProvider.java new file mode 100644 index 000000000000..2391c542776e --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProvider.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.facebook.presto.spi.plan; + +import java.util.Collections; +import java.util.List; + +public interface PlanCheckerProvider +{ + default List getIntermediatePlanCheckers() + { + return Collections.emptyList(); + } + + default List getFinalPlanCheckers() + { + return Collections.emptyList(); + } + + default List getFragmentPlanCheckers() + { + return Collections.emptyList(); + } +} diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProviderFactory.java b/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProviderFactory.java new file mode 100644 index 000000000000..11d4cb726b2b --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/plan/PlanCheckerProviderFactory.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.spi.plan; + +public interface PlanCheckerProviderFactory +{ + String getName(); + + PlanCheckerProvider create(SimplePlanFragmentSerde simplePlanFragmentSerde); +} diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/plan/SimplePlanFragmentSerde.java b/presto-spi/src/main/java/com/facebook/presto/spi/plan/SimplePlanFragmentSerde.java new file mode 100644 index 000000000000..09e9f4443de4 --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/plan/SimplePlanFragmentSerde.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.spi.plan; + +public interface SimplePlanFragmentSerde +{ + String serialize(SimplePlanFragment planFragment); + + SimplePlanFragment deserialize(String value); +} diff --git a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueryFramework.java b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueryFramework.java index 1757ba65e8d5..184330bd4854 100644 --- a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueryFramework.java +++ b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueryFramework.java @@ -572,14 +572,14 @@ private QueryExplainer getQueryExplainer() .getPlanningTimeOptimizers(); return new QueryExplainer( optimizers, - new PlanFragmenter(metadata, queryRunner.getNodePartitioningManager(), new QueryManagerConfig(), featuresConfig), + new PlanFragmenter(metadata, queryRunner.getNodePartitioningManager(), new QueryManagerConfig(), featuresConfig, queryRunner.getPlanCheckerProviderManager()), metadata, queryRunner.getAccessControl(), sqlParser, queryRunner.getStatsCalculator(), costCalculator, ImmutableMap.of(), - new PlanChecker(featuresConfig, false)); + new PlanChecker(featuresConfig, false, queryRunner.getPlanCheckerProviderManager())); } protected static void skipTestUnless(boolean requirement) diff --git a/presto-tests/src/main/java/com/facebook/presto/tests/DistributedQueryRunner.java b/presto-tests/src/main/java/com/facebook/presto/tests/DistributedQueryRunner.java index 8b5f88303ee0..5329d380ce9e 100644 --- a/presto-tests/src/main/java/com/facebook/presto/tests/DistributedQueryRunner.java +++ b/presto-tests/src/main/java/com/facebook/presto/tests/DistributedQueryRunner.java @@ -46,6 +46,7 @@ import com.facebook.presto.sql.planner.ConnectorPlanOptimizerManager; import com.facebook.presto.sql.planner.NodePartitioningManager; import com.facebook.presto.sql.planner.Plan; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.QueryRunner; import com.facebook.presto.testing.TestingAccessControlManager; @@ -581,6 +582,13 @@ public TestingAccessControlManager getAccessControl() return coordinators.get(0).getAccessControl(); } + @Override + public PlanCheckerProviderManager getPlanCheckerProviderManager() + { + checkState(coordinators.size() == 1, "Expected a single coordinator"); + return coordinators.get(0).getPlanCheckerProviderManager(); + } + public TestingPrestoServer getCoordinator() { checkState(coordinators.size() == 1, "Expected a single coordinator"); diff --git a/presto-tests/src/main/java/com/facebook/presto/tests/StandaloneQueryRunner.java b/presto-tests/src/main/java/com/facebook/presto/tests/StandaloneQueryRunner.java index 05e87aa335de..980fb4f991bc 100644 --- a/presto-tests/src/main/java/com/facebook/presto/tests/StandaloneQueryRunner.java +++ b/presto-tests/src/main/java/com/facebook/presto/tests/StandaloneQueryRunner.java @@ -30,6 +30,7 @@ import com.facebook.presto.sql.parser.SqlParserOptions; import com.facebook.presto.sql.planner.ConnectorPlanOptimizerManager; import com.facebook.presto.sql.planner.NodePartitioningManager; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.QueryRunner; import com.facebook.presto.testing.TestingAccessControlManager; @@ -163,6 +164,12 @@ public ConnectorPlanOptimizerManager getPlanOptimizerManager() return server.getPlanOptimizerManager(); } + @Override + public PlanCheckerProviderManager getPlanCheckerProviderManager() + { + return server.getPlanCheckerProviderManager(); + } + @Override public StatsCalculator getStatsCalculator() { diff --git a/presto-tests/src/test/java/com/facebook/presto/execution/TestQueues.java b/presto-tests/src/test/java/com/facebook/presto/execution/TestQueues.java index 478b7566cde5..c03ee3b3104e 100644 --- a/presto-tests/src/test/java/com/facebook/presto/execution/TestQueues.java +++ b/presto-tests/src/test/java/com/facebook/presto/execution/TestQueues.java @@ -44,11 +44,13 @@ import java.io.IOException; import java.util.Optional; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import static com.facebook.airlift.http.client.Request.Builder.prepareGet; import static com.facebook.airlift.http.client.Request.Builder.preparePut; import static com.facebook.airlift.http.client.StringResponseHandler.createStringResponseHandler; import static com.facebook.airlift.testing.Closeables.closeQuietly; +import static com.facebook.presto.SystemSessionProperties.EAGER_PLAN_VALIDATION_ENABLED; import static com.facebook.presto.SystemSessionProperties.HASH_PARTITION_COUNT; import static com.facebook.presto.execution.QueryState.FAILED; import static com.facebook.presto.execution.QueryState.FINISHED; @@ -374,6 +376,49 @@ public void testQueuedQueryInteraction() assertEquals(queryInfo.getErrorCode(), ADMINISTRATIVELY_PREEMPTED.toErrorCode()); } + @Test(timeOut = 240_000) + public void testEagerPlanValidation() + throws Exception + { + AtomicBoolean triggerValidationFailure = new AtomicBoolean(); + + queryRunner.installPlugin(new ResourceGroupManagerPlugin()); + queryRunner.installPlugin(new TestingPlanCheckerProviderPlugin(triggerValidationFailure)); + queryRunner.getPlanCheckerProviderManager().loadPlanCheckerProviders(); + queryRunner.getCoordinator().getResourceGroupManager().get().forceSetConfigurationManager("file", ImmutableMap.of("resource-groups.config-file", getResourceFilePath("resource_groups_config_eager_plan_validation.json"))); + + Session.SessionBuilder builder = testSessionBuilder() + .setCatalog("tpch") + .setSchema("sf100000") + .setSource("eager") + .setSystemProperty(EAGER_PLAN_VALIDATION_ENABLED, "true"); + + Session firstSession = builder.setQueryId(QueryId.valueOf("20240930_203743_00001_11111")).build(); + QueryId firstQuery = createQuery(queryRunner, firstSession, LONG_LASTING_QUERY); + waitForQueryState(queryRunner, firstQuery, RUNNING); + + Session secondSession = builder.setQueryId(QueryId.valueOf("20240930_203743_00002_22222")).build(); + QueryId secondQuery = createQuery(queryRunner, secondSession, LONG_LASTING_QUERY); + waitForQueryState(queryRunner, secondQuery, QUEUED); + + Session thirdSession = builder.setQueryId(QueryId.valueOf("20240930_203743_00003_33333")).build(); + QueryId thirdQuery = createQuery(queryRunner, thirdSession, LONG_LASTING_QUERY); + + // Force failure during plan validation after queuing has begun + triggerValidationFailure.set(true); + waitForQueryState(queryRunner, thirdQuery, FAILED); + + DispatchManager dispatchManager = queryRunner.getCoordinator().getDispatchManager(); + BasicQueryInfo queryInfo = dispatchManager.getQueryInfo(thirdQuery); + assertEquals(queryInfo.getErrorCode(), TriggerFailurePlanChecker.FAILURE_ERROR_CODE.toErrorCode()); + assertNotNull(queryInfo.getFailureInfo()); + assertNotNull(queryInfo.getFailureInfo().getMessage()); + assertEquals(queryInfo.getFailureInfo().getMessage(), TriggerFailurePlanChecker.FAILURE_MESSAGE); + + cancelQuery(queryRunner, secondQuery); + cancelQuery(queryRunner, firstQuery); + } + private void assertResourceGroup(DistributedQueryRunner queryRunner, Session session, String query, ResourceGroupId expectedResourceGroup) throws InterruptedException { diff --git a/presto-tests/src/test/java/com/facebook/presto/execution/TestingPlanCheckerProviderPlugin.java b/presto-tests/src/test/java/com/facebook/presto/execution/TestingPlanCheckerProviderPlugin.java new file mode 100644 index 000000000000..050bebcd44a9 --- /dev/null +++ b/presto-tests/src/test/java/com/facebook/presto/execution/TestingPlanCheckerProviderPlugin.java @@ -0,0 +1,59 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.execution; + +import com.facebook.presto.spi.Plugin; +import com.facebook.presto.spi.plan.PlanChecker; +import com.facebook.presto.spi.plan.PlanCheckerProvider; +import com.facebook.presto.spi.plan.PlanCheckerProviderFactory; +import com.facebook.presto.spi.plan.SimplePlanFragmentSerde; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +public class TestingPlanCheckerProviderPlugin + implements Plugin, PlanCheckerProviderFactory, PlanCheckerProvider +{ + private final AtomicBoolean triggerValidationFailure; + + public TestingPlanCheckerProviderPlugin(AtomicBoolean triggerValidationFailure) + { + this.triggerValidationFailure = triggerValidationFailure; + } + + @Override + public Iterable getPlanCheckerProviderFactories() + { + return ImmutableList.of(this); + } + + @Override + public String getName() + { + return "TestPlanCheckers"; + } + + @Override + public PlanCheckerProvider create(SimplePlanFragmentSerde simplePlanFragmentSerde) + { + return this; + } + + @Override + public List getIntermediatePlanCheckers() + { + return ImmutableList.of(new TriggerFailurePlanChecker(triggerValidationFailure)); + } +} diff --git a/presto-tests/src/test/java/com/facebook/presto/execution/TriggerFailurePlanChecker.java b/presto-tests/src/test/java/com/facebook/presto/execution/TriggerFailurePlanChecker.java new file mode 100644 index 000000000000..c2e64a16b56b --- /dev/null +++ b/presto-tests/src/test/java/com/facebook/presto/execution/TriggerFailurePlanChecker.java @@ -0,0 +1,45 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.execution; + +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.StandardErrorCode; +import com.facebook.presto.spi.WarningCollector; +import com.facebook.presto.spi.plan.PlanChecker; +import com.facebook.presto.spi.plan.PlanNode; + +import java.util.concurrent.atomic.AtomicBoolean; + +import static com.facebook.presto.spi.StandardErrorCode.QUERY_REJECTED; + +public class TriggerFailurePlanChecker + implements PlanChecker +{ + public static final StandardErrorCode FAILURE_ERROR_CODE = QUERY_REJECTED; + public static final String FAILURE_MESSAGE = "Plan validation failure triggered"; + private final AtomicBoolean triggerValidationFailure; + + public TriggerFailurePlanChecker(AtomicBoolean triggerValidationFailure) + { + this.triggerValidationFailure = triggerValidationFailure; + } + + @Override + public void validate(PlanNode planNode, WarningCollector warningCollector) + { + if (triggerValidationFailure.get()) { + throw new PrestoException(FAILURE_ERROR_CODE, FAILURE_MESSAGE); + } + } +} diff --git a/presto-tests/src/test/resources/resource_groups_config_eager_plan_validation.json b/presto-tests/src/test/resources/resource_groups_config_eager_plan_validation.json new file mode 100644 index 000000000000..de4655f643ca --- /dev/null +++ b/presto-tests/src/test/resources/resource_groups_config_eager_plan_validation.json @@ -0,0 +1,32 @@ +{ + "rootGroups": [ + { + "name": "global", + "softMemoryLimit": "1MB", + "hardConcurrencyLimit": 100, + "maxQueued": 1000, + "subGroups": [ + { + "name": "user-${USER}", + "softMemoryLimit": "1MB", + "hardConcurrencyLimit": 3, + "maxQueued": 3, + "subGroups": [ + { + "name": "eager-${USER}", + "softMemoryLimit": "1MB", + "hardConcurrencyLimit": 1, + "maxQueued": 2 + } + ] + } + ] + } + ], + "selectors": [ + { + "source": "(?i).*eager.*", + "group": "global.user-${USER}.eager-${USER}" + } + ] +} diff --git a/presto-thrift-connector/src/test/java/com/facebook/presto/connector/thrift/integration/ThriftQueryRunner.java b/presto-thrift-connector/src/test/java/com/facebook/presto/connector/thrift/integration/ThriftQueryRunner.java index 7f47f65ff49e..c9a4f82b7cd8 100644 --- a/presto-thrift-connector/src/test/java/com/facebook/presto/connector/thrift/integration/ThriftQueryRunner.java +++ b/presto-thrift-connector/src/test/java/com/facebook/presto/connector/thrift/integration/ThriftQueryRunner.java @@ -36,6 +36,7 @@ import com.facebook.presto.split.SplitManager; import com.facebook.presto.sql.planner.ConnectorPlanOptimizerManager; import com.facebook.presto.sql.planner.NodePartitioningManager; +import com.facebook.presto.sql.planner.sanity.PlanCheckerProviderManager; import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.QueryRunner; import com.facebook.presto.testing.TestingAccessControlManager; @@ -229,6 +230,12 @@ public ConnectorPlanOptimizerManager getPlanOptimizerManager() return source.getPlanOptimizerManager(); } + @Override + public PlanCheckerProviderManager getPlanCheckerProviderManager() + { + return source.getPlanCheckerProviderManager(); + } + @Override public StatsCalculator getStatsCalculator() { From 9c4e018d2fb35feb882729c01cac6c0bbd651361 Mon Sep 17 00:00:00 2001 From: Jialiang Tan Date: Thu, 10 Oct 2024 18:12:49 -0700 Subject: [PATCH 73/86] [prestissimo] Fix memory push back latency unit --- .../presto_cpp/main/PeriodicMemoryChecker.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp index 7ae178ce5f3b..1f9591bf700c 100644 --- a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp +++ b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp @@ -169,10 +169,10 @@ void PeriodicMemoryChecker::pushbackMemory() { const uint64_t bytesToShrink = currentMemBytes - targetMemBytes; VELOX_CHECK_GT(bytesToShrink, 0); - uint64_t latencyMs{0}; + uint64_t latencyUs{0}; uint64_t freedBytes{0}; { - velox::MicrosecondTimer timer(&latencyMs); + velox::MicrosecondTimer timer(&latencyUs); auto* cache = velox::cache::AsyncDataCache::getInstance(); auto systemConfig = SystemConfig::instance(); freedBytes = cache != nullptr ? cache->shrink(bytesToShrink) : 0; @@ -208,7 +208,7 @@ void PeriodicMemoryChecker::pushbackMemory() { } } RECORD_HISTOGRAM_METRIC_VALUE( - kCounterMemoryPushbackLatencyMs, latencyMs * 1000); + kCounterMemoryPushbackLatencyMs, latencyUs / 1000); LOG(INFO) << "Shrunk " << velox::succinctBytes(freedBytes); } } // namespace facebook::presto From fb3a00f51de7446e103aeaa4e7ec515a5c7269c5 Mon Sep 17 00:00:00 2001 From: Xiaoxuan Meng Date: Thu, 10 Oct 2024 22:58:29 -0700 Subject: [PATCH 74/86] [native]Prestissimo exception error code conversion fix --- .../presto_cpp/main/common/Exception.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/common/Exception.h b/presto-native-execution/presto_cpp/main/common/Exception.h index 7d646fbd238f..c29a4265d587 100644 --- a/presto-native-execution/presto_cpp/main/common/Exception.h +++ b/presto-native-execution/presto_cpp/main/common/Exception.h @@ -47,13 +47,17 @@ class VeloxToPrestoExceptionTranslator { kTranslateMap = { {velox::error_source::kErrorSourceRuntime, {{velox::error_code::kMemCapExceeded, - {0x00020000, - "GENERIC_INSUFFICIENT_RESOURCES", + {0x00020007, + "EXCEEDED_LOCAL_MEMORY_LIMIT", protocol::ErrorType::INSUFFICIENT_RESOURCES}}, {velox::error_code::kMemAborted, {0x00020000, "GENERIC_INSUFFICIENT_RESOURCES", protocol::ErrorType::INSUFFICIENT_RESOURCES}}, + {velox::error_code::kSpillLimitExceeded, + {0x00020006, + "EXCEEDED_SPILL_LIMIT", + protocol::ErrorType::INSUFFICIENT_RESOURCES}}, {velox::error_code::kInvalidState, {0x00010000, "GENERIC_INTERNAL_ERROR", From 6bf9e3ca513324aea271afc8c87c50b255ade7a5 Mon Sep 17 00:00:00 2001 From: Steve Burnett Date: Tue, 8 Oct 2024 11:17:09 -0400 Subject: [PATCH 75/86] Replace presto-router/README.txt with README.md --- presto-router/README.md | 32 ++++++++++++++++++++++++++++++++ presto-router/README.txt | 32 +++----------------------------- 2 files changed, 35 insertions(+), 29 deletions(-) create mode 100644 presto-router/README.md diff --git a/presto-router/README.md b/presto-router/README.md new file mode 100644 index 000000000000..08193c7fdd2c --- /dev/null +++ b/presto-router/README.md @@ -0,0 +1,32 @@ +# Presto Router + +Presto Router is a service that sits in front of Presto clusters. It routes requests to Presto clusters, collects statistics from Presto clusters, and shows aggregated results in a UI. + +## Running Presto Router in your IDE + +After building Presto, load the project into your IDE and run the router. In IntelliJ IDEA, use the following options to create a run configuration: + +* Main Class: `com.facebook.presto.router.PrestoRouter` +* VM Options: `-Drouter.config-file=etc/router-config.json -Dnode.environment=devel` +* Working directory: `$MODULE_WORKING_DIR$` or `$MODULE_DIR$`(Depends on your version of IntelliJ) +* Use classpath of module: `presto-router` + +The working directory should be the `presto-router` subdirectory. + +If necessary, edit the `etc/router-config.json` file and add the Presto clusters' endpoints in the `groups.members` field. + +## Building the Web UI + +Similar to the Presto Web UI, the router Web UI is also composed of React components and is written in JSX and ES6. To update this folder after making changes, run: + + yarn --cwd presto-router/src/main/resources/router_ui/src install + +If no JavaScript dependencies have changed (that is, no changes to `package.json`), it is faster to run: + + yarn --cwd presto-router/src/main/resources/router_ui/src run package + +To simplify iteration, you can run in `watch` mode, which automatically re-compiles when changes to source files are detected: + + yarn --cwd presto-router/src/main/resources/router_ui/src run watch + +To iterate quickly, re-build the project in IntelliJ after packaging is complete. Project resources are then hot-reloaded and changes are reflected on browser refresh. diff --git a/presto-router/README.txt b/presto-router/README.txt index 24df6dfdbe77..3cd86b1163dc 100755 --- a/presto-router/README.txt +++ b/presto-router/README.txt @@ -1,32 +1,6 @@ # Presto Router -Presto router is a service sitting in front of Presto clusters. It routes requests to Presto clusters, collects statistics from Presto clusters, and shows aggregated results in a UI. +Presto router is a service sitting in front of Presto clusters. -## Running Presto router in your IDE - -After building Presto, you can load the project into your IDE and run the router. In IntelliJ IDEA, use the following options to create a run configuration: - -* Main Class: `com.facebook.presto.router.PrestoRouter` -* VM Options: `-Drouter.config-file=etc/router-config.json -Dnode.environment=devel` -* Working directory: `$MODULE_WORKING_DIR$` or `$MODULE_DIR$`(Depends your version of IntelliJ) -* Use classpath of module: `presto-router` - -The working directory should be the `presto-router` subdirectory. - -If necessary, edit the `etc/router-config.json` file with the Presto clusters' endpoints in the `groups.members` field. - -## Building the Web UI - -Similar to the Presto Web UI, the router Web UI is also composed of React components and is written in JSX and ES6. To update this folder after making changes, simply run: - - yarn --cwd presto-router/src/main/resources/router_ui/src install - -If no JavaScript dependencies have changed (i.e., no changes to `package.json`), it is faster to run: - - yarn --cwd presto-router/src/main/resources/router_ui/src run package - -To simplify iteration, you can also run in `watch` mode, which automatically re-compiles when changes to source files are detected: - - yarn --cwd presto-router/src/main/resources/router_ui/src run watch - -To iterate quickly, simply re-build the project in IntelliJ after packaging is complete. Project resources will be hot-reloaded and changes are reflected on browser refresh. +For more information, see +https://github.com/prestodb/presto/blob/master/presto-router/README.md \ No newline at end of file From 4799dd2f739ebfd074d01571bf413fcfa71749c1 Mon Sep 17 00:00:00 2001 From: Feilong Liu Date: Fri, 11 Oct 2024 10:02:07 -0700 Subject: [PATCH 76/86] Enable verbose runtime stats for connector optimizer --- .../com/facebook/presto/sql/Optimizer.java | 24 +-------- .../presto/sql/OptimizerRuntimeTrackUtil.java | 53 +++++++++++++++++++ .../ApplyConnectorOptimization.java | 9 ++++ 3 files changed, 64 insertions(+), 22 deletions(-) create mode 100644 presto-main/src/main/java/com/facebook/presto/sql/OptimizerRuntimeTrackUtil.java diff --git a/presto-main/src/main/java/com/facebook/presto/sql/Optimizer.java b/presto-main/src/main/java/com/facebook/presto/sql/Optimizer.java index a36a85bcd658..4b5a7f5fdb6f 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/Optimizer.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/Optimizer.java @@ -36,16 +36,13 @@ import com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher; import com.facebook.presto.sql.planner.optimizations.PlanOptimizer; import com.facebook.presto.sql.planner.optimizations.PlanOptimizerResult; -import com.facebook.presto.sql.planner.optimizations.StatsRecordingPlanOptimizer; import com.facebook.presto.sql.planner.plan.JoinNode; import com.facebook.presto.sql.planner.plan.SemiJoinNode; import com.facebook.presto.sql.planner.sanity.PlanChecker; -import com.google.common.base.Splitter; import java.util.List; import java.util.Optional; -import static com.facebook.presto.SystemSessionProperties.getOptimizersToEnableVerboseRuntimeStats; import static com.facebook.presto.SystemSessionProperties.getQueryAnalyzerTimeout; import static com.facebook.presto.SystemSessionProperties.isPrintStatsForNonJoinQuery; import static com.facebook.presto.SystemSessionProperties.isVerboseOptimizerInfoEnabled; @@ -54,6 +51,8 @@ import static com.facebook.presto.spi.StandardErrorCode.QUERY_PLANNING_TIMEOUT; import static com.facebook.presto.sql.Optimizer.PlanStage.OPTIMIZED; import static com.facebook.presto.sql.Optimizer.PlanStage.OPTIMIZED_AND_VALIDATED; +import static com.facebook.presto.sql.OptimizerRuntimeTrackUtil.getOptimizerNameForLog; +import static com.facebook.presto.sql.OptimizerRuntimeTrackUtil.trackOptimizerRuntime; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -131,16 +130,6 @@ public Plan validateAndOptimizePlan(PlanNode root, PlanStage stage) return new Plan(root, types, computeStats(root, types)); } - private boolean trackOptimizerRuntime(Session session, PlanOptimizer optimizer) - { - String optimizerString = getOptimizersToEnableVerboseRuntimeStats(session); - if (optimizerString.isEmpty()) { - return false; - } - List optimizers = Splitter.on(",").trimResults().splitToList(optimizerString); - return optimizers.contains(getOptimizerNameForLog(optimizer)); - } - private StatsAndCosts computeStats(PlanNode root, TypeProvider types) { if (explain || isPrintStatsForNonJoinQuery(session) || @@ -180,13 +169,4 @@ private void collectOptimizerInformation(PlanOptimizer optimizer, PlanNode oldNo session.getOptimizerResultCollector().addOptimizerResult(optimizerName, oldNodeStr, newNodeStr); } } - - private String getOptimizerNameForLog(PlanOptimizer optimizer) - { - String optimizerName = optimizer.getClass().getSimpleName(); - if (optimizer instanceof StatsRecordingPlanOptimizer) { - optimizerName = format("%s:%s", optimizerName, ((StatsRecordingPlanOptimizer) optimizer).getDelegate().getClass().getSimpleName()); - } - return optimizerName; - } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/OptimizerRuntimeTrackUtil.java b/presto-main/src/main/java/com/facebook/presto/sql/OptimizerRuntimeTrackUtil.java new file mode 100644 index 000000000000..99c6c1c78df2 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/OptimizerRuntimeTrackUtil.java @@ -0,0 +1,53 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql; + +import com.facebook.presto.Session; +import com.facebook.presto.spi.ConnectorPlanOptimizer; +import com.facebook.presto.sql.planner.optimizations.PlanOptimizer; +import com.facebook.presto.sql.planner.optimizations.StatsRecordingPlanOptimizer; +import com.google.common.base.Splitter; + +import java.util.List; + +import static com.facebook.presto.SystemSessionProperties.getOptimizersToEnableVerboseRuntimeStats; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; + +public class OptimizerRuntimeTrackUtil +{ + private OptimizerRuntimeTrackUtil() + { + } + + public static String getOptimizerNameForLog(Object optimizer) + { + checkArgument(optimizer instanceof PlanOptimizer || optimizer instanceof ConnectorPlanOptimizer); + String optimizerName = optimizer.getClass().getSimpleName(); + if (optimizer instanceof StatsRecordingPlanOptimizer) { + optimizerName = format("%s:%s", optimizerName, ((StatsRecordingPlanOptimizer) optimizer).getDelegate().getClass().getSimpleName()); + } + return optimizerName; + } + + public static boolean trackOptimizerRuntime(Session session, Object optimizer) + { + String optimizerString = getOptimizersToEnableVerboseRuntimeStats(session); + if (optimizerString.isEmpty()) { + return false; + } + List optimizers = Splitter.on(",").trimResults().splitToList(optimizerString); + return optimizers.contains(getOptimizerNameForLog(optimizer)); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/ApplyConnectorOptimization.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/ApplyConnectorOptimization.java index 1cddf0dcf29a..18c0ffb66877 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/ApplyConnectorOptimization.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/ApplyConnectorOptimization.java @@ -14,6 +14,7 @@ package com.facebook.presto.sql.planner.optimizations; import com.facebook.presto.Session; +import com.facebook.presto.SystemSessionProperties; import com.facebook.presto.expressions.LogicalRowExpressions; import com.facebook.presto.spi.ConnectorId; import com.facebook.presto.spi.ConnectorPlanOptimizer; @@ -55,6 +56,9 @@ import java.util.Queue; import java.util.Set; +import static com.facebook.presto.common.RuntimeUnit.NANO; +import static com.facebook.presto.sql.OptimizerRuntimeTrackUtil.getOptimizerNameForLog; +import static com.facebook.presto.sql.OptimizerRuntimeTrackUtil.trackOptimizerRuntime; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static java.util.Objects.requireNonNull; @@ -99,6 +103,7 @@ public PlanOptimizerResult optimize(PlanNode plan, Session session, TypeProvider requireNonNull(variableAllocator, "variableAllocator is null"); requireNonNull(idAllocator, "idAllocator is null"); + boolean enableVerboseRuntimeStats = SystemSessionProperties.isVerboseRuntimeStatsEnabled(session); Map> connectorOptimizers = connectorOptimizersSupplier.get(); if (connectorOptimizers.isEmpty()) { return PlanOptimizerResult.optimizerResult(plan, false); @@ -143,7 +148,11 @@ public PlanOptimizerResult optimize(PlanNode plan, Session session, TypeProvider // the returned node is still a max closure (only if there is no new connector added, which does happen but ignored here) for (ConnectorPlanOptimizer optimizer : optimizers) { + long start = System.nanoTime(); newNode = optimizer.optimize(newNode, session.toConnectorSession(connectorId), variableAllocator, idAllocator); + if (enableVerboseRuntimeStats || trackOptimizerRuntime(session, optimizer)) { + session.getRuntimeStats().addMetricValue(String.format("optimizer%sTimeNanos", getOptimizerNameForLog(optimizer)), NANO, System.nanoTime() - start); + } } if (node != newNode) { From 10227bbe57921f90a89bfbd5b22db036aa6018c7 Mon Sep 17 00:00:00 2001 From: Bikramjeet Vig Date: Fri, 11 Oct 2024 15:58:50 -0700 Subject: [PATCH 77/86] [native] Add counter to track effectiveness of memory pushback Adds a new counter `presto_cpp.memory_pushback_reduction_bytes` that keeps track of the actual used memory reduction for every memory pushback attempt. --- .../presto_cpp/main/PeriodicMemoryChecker.cpp | 8 +++++++- .../presto_cpp/main/common/Counters.cpp | 9 +++++++++ .../presto_cpp/main/common/Counters.h | 5 +++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp index 1f9591bf700c..d0611712c0d1 100644 --- a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp +++ b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp @@ -209,6 +209,12 @@ void PeriodicMemoryChecker::pushbackMemory() { } RECORD_HISTOGRAM_METRIC_VALUE( kCounterMemoryPushbackLatencyMs, latencyUs / 1000); - LOG(INFO) << "Shrunk " << velox::succinctBytes(freedBytes); + const auto actualFreedBytes = std::max( + 0, static_cast(currentMemBytes) - systemUsedMemoryBytes()); + RECORD_HISTOGRAM_METRIC_VALUE( + kCounterMemoryPushbackLatencyMs, actualFreedBytes); + LOG(INFO) << "Memory pushback shrunk " << velox::succinctBytes(freedBytes) + << " Effective bytes shrunk: " + << velox::succinctBytes(actualFreedBytes); } } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/common/Counters.cpp b/presto-native-execution/presto_cpp/main/common/Counters.cpp index 5fb8584dfe0e..a39559637012 100644 --- a/presto-native-execution/presto_cpp/main/common/Counters.cpp +++ b/presto-native-execution/presto_cpp/main/common/Counters.cpp @@ -102,6 +102,15 @@ void registerPrestoMetrics() { DEFINE_METRIC(kCounterMemoryPushbackCount, facebook::velox::StatType::COUNT); DEFINE_HISTOGRAM_METRIC( kCounterMemoryPushbackLatencyMs, 10'000, 0, 100'000, 50, 90, 99, 100); + DEFINE_HISTOGRAM_METRIC( + kCounterMemoryPushbackLatencyMs, + 100l * 1024 * 1024, // 100MB + 0, + 15l * 1024 * 1024 * 1024, // 15GB + 50, + 90, + 99, + 100); // NOTE: Metrics type exporting for file handle cache counters are in // PeriodicTaskManager because they have dynamic names. The following counters diff --git a/presto-native-execution/presto_cpp/main/common/Counters.h b/presto-native-execution/presto_cpp/main/common/Counters.h index 5a2a59f0d2a8..6369671c0544 100644 --- a/presto-native-execution/presto_cpp/main/common/Counters.h +++ b/presto-native-execution/presto_cpp/main/common/Counters.h @@ -166,4 +166,9 @@ constexpr folly::StringPiece kCounterMemoryPushbackCount{ /// reports P50, P90, P99, and P100. constexpr folly::StringPiece kCounterMemoryPushbackLatencyMs{ "presto_cpp.memory_pushback_latency_ms"}; +/// Distribution of reduction in memory usage achieved by each memory pushback +/// attempt. This is to gauge its effectiveness. In range of [0, 15GB] with 150 +/// buckets and reports P50, P90, P99, and P100. +constexpr folly::StringPiece kCounterMemoryPushbackReductionBytes{ + "presto_cpp.memory_pushback_reduction_bytes"}; } // namespace facebook::presto From 3236937a64879f0908c5b4c5e06d884805d7b161 Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Fri, 11 Oct 2024 16:52:21 -0700 Subject: [PATCH 78/86] [native] Advance velox. --- .../presto_cpp/main/types/PrestoToVeloxConnector.cpp | 9 ++++----- presto-native-execution/velox | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp index a9a6f21ed25f..d8d1c64b7098 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp @@ -1356,11 +1356,10 @@ IcebergPrestoToVeloxConnector::toVeloxSplit( deletes.emplace_back(icebergDeleteFile); } - std::unordered_map metadataColumns; - metadataColumns.reserve(1); - metadataColumns.insert( + std::unordered_map infoColumns = { {"$data_sequence_number", - std::to_string(icebergSplit->dataSequenceNumber)}); + std::to_string(icebergSplit->dataSequenceNumber)}, + {"$path", icebergSplit->path}}; return std::make_unique( catalogId, @@ -1373,7 +1372,7 @@ IcebergPrestoToVeloxConnector::toVeloxSplit( customSplitInfo, nullptr, deletes, - metadataColumns); + infoColumns); } std::unique_ptr diff --git a/presto-native-execution/velox b/presto-native-execution/velox index acd57170b6d9..adbebfdad56b 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit acd57170b6d98206def1ef02b74c467e3ba18061 +Subproject commit adbebfdad56b1c07a274cf89ede25732201c2360 From 27eb6663a0b6a5e5587fce5b2f00b1aa19345bf8 Mon Sep 17 00:00:00 2001 From: Jialiang Tan Date: Sat, 12 Oct 2024 13:28:18 -0700 Subject: [PATCH 79/86] [native] Add query trace session properties to native session --- .../presto_cpp/main/SessionProperties.cpp | 60 ++++++++++++++++--- .../presto_cpp/main/SessionProperties.h | 23 ++++++- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/presto-native-execution/presto_cpp/main/SessionProperties.cpp b/presto-native-execution/presto_cpp/main/SessionProperties.cpp index 338d17766e72..334820042630 100644 --- a/presto-native-execution/presto_cpp/main/SessionProperties.cpp +++ b/presto-native-execution/presto_cpp/main/SessionProperties.cpp @@ -177,20 +177,20 @@ SessionProperties::SessionProperties() { std::to_string(c.rowNumberSpillEnabled())); addSessionProperty( - kNativeSpillerNumPartitionBits, + kSpillerNumPartitionBits, "none", TINYINT(), false, QueryConfig::kSpillNumPartitionBits, - std::to_string(c.spillNumPartitionBits())), + std::to_string(c.spillNumPartitionBits())); - addSessionProperty( - kTopNRowNumberSpillEnabled, - "Native Execution only. Enable topN row number spilling on native engine", - BOOLEAN(), - false, - QueryConfig::kTopNRowNumberSpillEnabled, - boolToString(c.topNRowNumberSpillEnabled())); + addSessionProperty( + kTopNRowNumberSpillEnabled, + "Native Execution only. Enable topN row number spilling on native engine", + BOOLEAN(), + false, + QueryConfig::kTopNRowNumberSpillEnabled, + boolToString(c.topNRowNumberSpillEnabled())); addSessionProperty( kValidateOutputFromOperators, @@ -254,6 +254,48 @@ SessionProperties::SessionProperties() { QueryConfig::kSelectiveNimbleReaderEnabled, boolToString(c.selectiveNimbleReaderEnabled())); + addSessionProperty( + kQueryTraceEnabled, + "Enables query tracing.", + BOOLEAN(), + false, + QueryConfig::kQueryTraceEnabled, + boolToString(c.queryTraceEnabled())); + + addSessionProperty( + kQueryTraceDir, + "Base dir of a query to store tracing data.", + VARCHAR(), + false, + QueryConfig::kQueryTraceDir, + c.queryTraceDir()); + + addSessionProperty( + kQueryTraceNodeIds, + "A comma-separated list of plan node ids whose input data will be traced." + " Empty string if only want to trace the query metadata.", + VARCHAR(), + false, + QueryConfig::kQueryTraceNodeIds, + c.queryTraceNodeIds()); + + addSessionProperty( + kQueryTraceMaxBytes, + "The max trace bytes limit. Tracing is disabled if zero.", + BIGINT(), + false, + QueryConfig::kQueryTraceMaxBytes, + std::to_string(c.queryTraceMaxBytes())); + + addSessionProperty( + kQueryTraceTaskRegExp, + "The regexp of traced task id. We only enable trace on a task if its id" + " matches.", + BIGINT(), + false, + QueryConfig::kQueryTraceTaskRegExp, + c.queryTraceTaskRegExp()); + // If `legacy_timestamp` is true, the coordinator expects timestamp // conversions without a timezone to be converted to the user's // session_timezone. diff --git a/presto-native-execution/presto_cpp/main/SessionProperties.h b/presto-native-execution/presto_cpp/main/SessionProperties.h index 50f9869267e6..f3e90fe2ee8a 100644 --- a/presto-native-execution/presto_cpp/main/SessionProperties.h +++ b/presto-native-execution/presto_cpp/main/SessionProperties.h @@ -131,7 +131,7 @@ class SessionProperties { static constexpr const char* kJoinSpillPartitionBits = "native_join_spiller_partition_bits"; - static constexpr const char* kNativeSpillerNumPartitionBits = + static constexpr const char* kSpillerNumPartitionBits = "native_spiller_num_partition_bits"; /// Enable topN row number spilling on native engine. @@ -179,6 +179,27 @@ class SessionProperties { static constexpr const char* kDriverCpuTimeSliceLimitMs = "driver_cpu_time_slice_limit_ms"; + /// Enables query tracing. + static constexpr const char* kQueryTraceEnabled = + "native_query_trace_enabled"; + + /// Base dir of a query to store tracing data. + static constexpr const char* kQueryTraceDir = "native_query_trace_dir"; + + /// A comma-separated list of plan node ids whose input data will be traced. + /// Empty string if only want to trace the query metadata. + static constexpr const char* kQueryTraceNodeIds = + "native_query_trace_node_ids"; + + /// The max trace bytes limit. Tracing is disabled if zero. + static constexpr const char* kQueryTraceMaxBytes = + "native_query_trace_max_bytes"; + + /// The regexp of traced task id. We only enable trace on a task if its id + /// matches. + static constexpr const char* kQueryTraceTaskRegExp = + "native_query_trace_task_reg_exp"; + SessionProperties(); const std::unordered_map>& From bee3c6e20fa27e0068d5ae84842527654c3f06b2 Mon Sep 17 00:00:00 2001 From: Yihong Wang Date: Mon, 14 Oct 2024 15:06:36 -0700 Subject: [PATCH 80/86] fix the multi-level subgroup issue Update the `ResourceGroupView` component to properly display a subgroup which is in 3rd or more level. Signed-off-by: Yihong Wang --- presto-ui/src/components/ResourceGroupView.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-ui/src/components/ResourceGroupView.jsx b/presto-ui/src/components/ResourceGroupView.jsx index b875eb67263e..cf3750aa6ed8 100644 --- a/presto-ui/src/components/ResourceGroupView.jsx +++ b/presto-ui/src/components/ResourceGroupView.jsx @@ -190,7 +190,7 @@ export default function ResourceGroupView() { }); return; } - fetch('/v1/resourceGroupState/' + group.replace('.', '/')) + fetch(`/v1/resourceGroupState/${group.replaceAll('.', '/')}`) .then(response => response.json()) .then((resources) => { dataSet.current = { From 6efcf0f4567c2e1f249ee96051e81feb76dd7af8 Mon Sep 17 00:00:00 2001 From: ajay-kharat Date: Mon, 14 Oct 2024 09:52:28 +0530 Subject: [PATCH 81/86] Centralize dependency versions in parent pom.xml --- pom.xml | 6 ++++++ presto-cassandra/pom.xml | 11 ----------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/pom.xml b/pom.xml index 8487bfedfa0b..fd8bb1baddba 100644 --- a/pom.xml +++ b/pom.xml @@ -204,6 +204,12 @@ + + io.netty + netty-handler + 4.1.107.Final + + com.facebook.presto presto-testing-docker diff --git a/presto-cassandra/pom.xml b/presto-cassandra/pom.xml index e767145b3523..4efb732167f5 100644 --- a/presto-cassandra/pom.xml +++ b/presto-cassandra/pom.xml @@ -185,17 +185,6 @@ - - - - - io.netty - netty-handler - 4.1.107.Final - - - - From d280efccd3358f53eaec4dd83fd31cde412d8a8b Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Mon, 14 Oct 2024 12:09:30 -0400 Subject: [PATCH 82/86] [native] Install all adapter dependencies in dependency dockerfiles --- .../scripts/dockerfiles/centos-dependency.dockerfile | 2 +- .../scripts/dockerfiles/ubuntu-22.04-dependency.dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/presto-native-execution/scripts/dockerfiles/centos-dependency.dockerfile b/presto-native-execution/scripts/dockerfiles/centos-dependency.dockerfile index a65a1b57ad2e..ca93f48030e1 100644 --- a/presto-native-execution/scripts/dockerfiles/centos-dependency.dockerfile +++ b/presto-native-execution/scripts/dockerfiles/centos-dependency.dockerfile @@ -21,6 +21,6 @@ COPY scripts /scripts COPY velox/scripts /velox/scripts RUN mkdir build && \ (cd build && ../scripts/setup-centos.sh && \ - ../velox/scripts/setup-adapters.sh aws && \ + ../velox/scripts/setup-adapters.sh && \ ../scripts/setup-adapters.sh ) && \ rm -rf build diff --git a/presto-native-execution/scripts/dockerfiles/ubuntu-22.04-dependency.dockerfile b/presto-native-execution/scripts/dockerfiles/ubuntu-22.04-dependency.dockerfile index 609a39480fa8..e31cc72739ae 100644 --- a/presto-native-execution/scripts/dockerfiles/ubuntu-22.04-dependency.dockerfile +++ b/presto-native-execution/scripts/dockerfiles/ubuntu-22.04-dependency.dockerfile @@ -30,6 +30,6 @@ COPY velox/scripts /velox/scripts RUN mkdir build && \ (cd build && ../scripts/setup-ubuntu.sh && \ apt install -y rpm && \ - ../velox/scripts/setup-adapters.sh aws && \ + ../velox/scripts/setup-adapters.sh && \ ../scripts/setup-adapters.sh ) && \ rm -rf build From 5f3835ef76afe93f966b25f502ba503924e21068 Mon Sep 17 00:00:00 2001 From: Sergey Pershin Date: Tue, 15 Oct 2024 10:27:08 -0700 Subject: [PATCH 83/86] [native] Advance Velox version. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index adbebfdad56b..f59d9a759e28 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit adbebfdad56b1c07a274cf89ede25732201c2360 +Subproject commit f59d9a759e28964f750fe7a1662ddd39c4bf4741 From f1058ccdddaae07888e7cb1077a9264a0cbd5917 Mon Sep 17 00:00:00 2001 From: wangd Date: Wed, 16 Oct 2024 00:29:51 +0800 Subject: [PATCH 84/86] [Iceberg]Enable test cases for rename table on REST and NESSIE catalog --- .../presto/iceberg/TestIcebergDistributedQueries.java | 6 ------ .../hadoop/TestIcebergHadoopCatalogDistributedQueries.java | 6 ++++++ .../hive/TestIcebergHiveCatalogDistributedQueries.java | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergDistributedQueries.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergDistributedQueries.java index bb4bc569261d..8bfb68d48872 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergDistributedQueries.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergDistributedQueries.java @@ -66,12 +66,6 @@ protected boolean supportsNotNullColumns() return false; } - @Override - public void testRenameTable() - { - // Rename table are not supported by the connector - } - @Override public void testUpdate() { diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergHadoopCatalogDistributedQueries.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergHadoopCatalogDistributedQueries.java index 1ad902834fc4..53b6a17c4bd1 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergHadoopCatalogDistributedQueries.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergHadoopCatalogDistributedQueries.java @@ -29,4 +29,10 @@ protected boolean supportsViews() { return false; } + + @Override + public void testRenameTable() + { + // Rename table are not supported by hadoop catalog + } } diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergHiveCatalogDistributedQueries.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergHiveCatalogDistributedQueries.java index 800ad6904318..3330458287b5 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergHiveCatalogDistributedQueries.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergHiveCatalogDistributedQueries.java @@ -28,4 +28,10 @@ public TestIcebergHiveCatalogDistributedQueries() { super(HIVE, ImmutableMap.of("iceberg.hive-statistics-merge-strategy", Joiner.on(",").join(NUMBER_OF_DISTINCT_VALUES.name(), TOTAL_SIZE_IN_BYTES.name()))); } + + @Override + public void testRenameTable() + { + // Rename table are not supported by hive catalog + } } From 87277f87c8787cf9c209815f5975d3e87cb39444 Mon Sep 17 00:00:00 2001 From: Zuyu ZHANG Date: Mon, 14 Oct 2024 20:42:41 -0700 Subject: [PATCH 85/86] [native] Add the native isBlockedTiming stat to operator stats --- .../presto/operator/OperatorContext.java | 7 ++ .../presto/operator/OperatorStats.java | 72 +++++++++++++++++++ .../presto/server/TaskResourceUtils.java | 8 +++ .../presto/execution/TestQueryStats.java | 16 +++++ .../presto/operator/TestOperatorStats.java | 8 +++ .../presto_cpp/main/PrestoTask.cpp | 15 ++-- .../presto_protocol/presto_protocol.cpp | 56 +++++++++++++++ .../presto_protocol/presto_protocol.h | 4 ++ 8 files changed, 182 insertions(+), 4 deletions(-) diff --git a/presto-main/src/main/java/com/facebook/presto/operator/OperatorContext.java b/presto-main/src/main/java/com/facebook/presto/operator/OperatorContext.java index 7b5d36c1f9a6..b9f9e7fb8a9f 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/OperatorContext.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/OperatorContext.java @@ -63,6 +63,8 @@ public class OperatorContext private final DriverContext driverContext; private final Executor executor; + private final OperationTiming isBlockedTiming = new OperationTiming(); + private final CounterStat rawInputDataSize = new CounterStat(); private final CounterStat rawInputPositions = new CounterStat(); @@ -536,6 +538,11 @@ public OperatorStats getOperatorStats() 1, + isBlockedTiming.getCalls(), + succinctNanos(isBlockedTiming.getWallNanos()), + succinctNanos(isBlockedTiming.getCpuNanos()), + succinctBytes(isBlockedTiming.getAllocationBytes()), + addInputTiming.getCalls(), succinctNanos(addInputTiming.getWallNanos()), succinctNanos(addInputTiming.getCpuNanos()), diff --git a/presto-main/src/main/java/com/facebook/presto/operator/OperatorStats.java b/presto-main/src/main/java/com/facebook/presto/operator/OperatorStats.java index 86aca23a3fcf..e32d83df9d8a 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/OperatorStats.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/OperatorStats.java @@ -50,6 +50,11 @@ public class OperatorStats private final long totalDrivers; + private final long isBlockedCalls; + private final Duration isBlockedWall; + private final Duration isBlockedCpu; + private final DataSize isBlockedAllocation; + private final long addInputCalls; private final Duration addInputWall; private final Duration addInputCpu; @@ -113,6 +118,11 @@ public OperatorStats( @JsonProperty("totalDrivers") long totalDrivers, + @JsonProperty("isBlockedCalls") long isBlockedCalls, + @JsonProperty("isBlockedWall") Duration isBlockedWall, + @JsonProperty("isBlockedCpu") Duration isBlockedCpu, + @JsonProperty("isBlockedAllocation") DataSize isBlockedAllocation, + @JsonProperty("addInputCalls") long addInputCalls, @JsonProperty("addInputWall") Duration addInputWall, @JsonProperty("addInputCpu") Duration addInputCpu, @@ -171,6 +181,11 @@ public OperatorStats( this.totalDrivers = totalDrivers; + this.isBlockedCalls = isBlockedCalls; + this.isBlockedWall = requireNonNull(isBlockedWall, "isBlockedWall is null"); + this.isBlockedCpu = requireNonNull(isBlockedCpu, "isBlockedCpu is null"); + this.isBlockedAllocation = requireNonNull(isBlockedAllocation, "isBlockedAllocation is null"); + this.addInputCalls = addInputCalls; this.addInputWall = requireNonNull(addInputWall, "addInputWall is null"); this.addInputCpu = requireNonNull(addInputCpu, "addInputCpu is null"); @@ -235,6 +250,11 @@ public OperatorStats( long totalDrivers, + long isBlockedCalls, + Duration isBlockedWall, + Duration isBlockedCpu, + DataSize isBlockedAllocation, + long addInputCalls, Duration addInputWall, Duration addInputCpu, @@ -293,6 +313,11 @@ public OperatorStats( this.totalDrivers = totalDrivers; + this.isBlockedCalls = isBlockedCalls; + this.isBlockedWall = requireNonNull(isBlockedWall, "isBlockedWall is null"); + this.isBlockedCpu = requireNonNull(isBlockedCpu, "isBlockedCpu is null"); + this.isBlockedAllocation = requireNonNull(isBlockedAllocation, "isBlockedAllocation is null"); + this.addInputCalls = addInputCalls; this.addInputWall = requireNonNull(addInputWall, "addInputWall is null"); this.addInputCpu = requireNonNull(addInputCpu, "addInputCpu is null"); @@ -663,6 +688,34 @@ public DynamicFilterStats getDynamicFilterStats() return dynamicFilterStats; } + @JsonProperty + @ThriftField(45) + public long getIsBlockedCalls() + { + return isBlockedCalls; + } + + @JsonProperty + @ThriftField(46) + public Duration getIsBlockedWall() + { + return isBlockedWall; + } + + @JsonProperty + @ThriftField(47) + public Duration getIsBlockedCpu() + { + return isBlockedCpu; + } + + @JsonProperty + @ThriftField(48) + public DataSize getIsBlockedAllocation() + { + return isBlockedAllocation; + } + public OperatorStats add(OperatorStats operatorStats) { return add(ImmutableList.of(operatorStats)); @@ -672,6 +725,11 @@ public OperatorStats add(Iterable operators) { long totalDrivers = this.totalDrivers; + long isBlockedCalls = this.isBlockedCalls; + long isBlockedWall = this.isBlockedWall.roundTo(NANOSECONDS); + long isBlockedCpu = this.isBlockedCpu.roundTo(NANOSECONDS); + long isBlockedAllocation = this.isBlockedAllocation.toBytes(); + long addInputCalls = this.addInputCalls; long addInputWall = this.addInputWall.roundTo(NANOSECONDS); long addInputCpu = this.addInputCpu.roundTo(NANOSECONDS); @@ -724,6 +782,11 @@ public OperatorStats add(Iterable operators) totalDrivers += operator.totalDrivers; + isBlockedCalls += operator.getGetOutputCalls(); + isBlockedWall += operator.getGetOutputWall().roundTo(NANOSECONDS); + isBlockedCpu += operator.getGetOutputCpu().roundTo(NANOSECONDS); + isBlockedAllocation += operator.getIsBlockedAllocation().toBytes(); + addInputCalls += operator.getAddInputCalls(); addInputWall += operator.getAddInputWall().roundTo(NANOSECONDS); addInputCpu += operator.getAddInputCpu().roundTo(NANOSECONDS); @@ -789,6 +852,11 @@ public OperatorStats add(Iterable operators) totalDrivers, + isBlockedCalls, + succinctNanos(isBlockedWall), + succinctNanos(isBlockedCpu), + succinctBytes(isBlockedAllocation), + addInputCalls, succinctNanos(addInputWall), succinctNanos(addInputCpu), @@ -866,6 +934,10 @@ public OperatorStats summarize() planNodeId, operatorType, totalDrivers, + isBlockedCalls, + isBlockedWall, + isBlockedCpu, + isBlockedAllocation, addInputCalls, addInputWall, addInputCpu, diff --git a/presto-main/src/main/java/com/facebook/presto/server/TaskResourceUtils.java b/presto-main/src/main/java/com/facebook/presto/server/TaskResourceUtils.java index 78b71641bff6..96f4356385a8 100644 --- a/presto-main/src/main/java/com/facebook/presto/server/TaskResourceUtils.java +++ b/presto-main/src/main/java/com/facebook/presto/server/TaskResourceUtils.java @@ -217,6 +217,10 @@ private static OperatorStats convertToThriftOperatorStats(OperatorStats operator operatorStats.getPlanNodeId(), operatorStats.getOperatorType(), operatorStats.getTotalDrivers(), + operatorStats.getIsBlockedCalls(), + operatorStats.getIsBlockedWall(), + operatorStats.getIsBlockedCpu(), + operatorStats.getIsBlockedAllocation(), operatorStats.getAddInputCalls(), operatorStats.getAddInputWall(), operatorStats.getAddInputCpu(), @@ -452,6 +456,10 @@ private static OperatorStats convertFromThriftOperatorStats(OperatorStats thrift thriftOperatorStats.getPlanNodeId(), thriftOperatorStats.getOperatorType(), thriftOperatorStats.getTotalDrivers(), + thriftOperatorStats.getIsBlockedCalls(), + thriftOperatorStats.getIsBlockedWall(), + thriftOperatorStats.getIsBlockedCpu(), + thriftOperatorStats.getIsBlockedAllocation(), thriftOperatorStats.getAddInputCalls(), thriftOperatorStats.getAddInputWall(), thriftOperatorStats.getAddInputCpu(), diff --git a/presto-main/src/test/java/com/facebook/presto/execution/TestQueryStats.java b/presto-main/src/test/java/com/facebook/presto/execution/TestQueryStats.java index 806a6ccbb876..6d8f0130f510 100644 --- a/presto-main/src/test/java/com/facebook/presto/execution/TestQueryStats.java +++ b/presto-main/src/test/java/com/facebook/presto/execution/TestQueryStats.java @@ -68,6 +68,10 @@ public class TestQueryStats new PlanNodeId("13"), TableWriterOperator.class.getSimpleName(), 14L, + 14L, + new Duration(15, NANOSECONDS), + new Duration(16, NANOSECONDS), + new DataSize(121, BYTE), 15L, new Duration(16, NANOSECONDS), new Duration(17, NANOSECONDS), @@ -113,6 +117,10 @@ public class TestQueryStats new PlanNodeId("23"), FilterAndProjectOperator.class.getSimpleName(), 24L, + 24L, + new Duration(25, NANOSECONDS), + new Duration(26, NANOSECONDS), + new DataSize(1210, BYTE), 25L, new Duration(26, NANOSECONDS), new Duration(27, NANOSECONDS), @@ -158,6 +166,10 @@ public class TestQueryStats new PlanNodeId("33"), TableWriterOperator.class.getSimpleName(), 34L, + 34L, + new Duration(35, NANOSECONDS), + new Duration(36, NANOSECONDS), + new DataSize(12100, BYTE), 35L, new Duration(36, NANOSECONDS), new Duration(37, NANOSECONDS), @@ -527,6 +539,10 @@ private static OperatorStats createOperatorStats(int stageId, int stageExecution new Duration(0, NANOSECONDS), new Duration(0, NANOSECONDS), new DataSize(0, BYTE), + 0L, + new Duration(0, NANOSECONDS), + new Duration(0, NANOSECONDS), + new DataSize(0, BYTE), rawInputDataSize, rawInputPositions, inputDataSize, diff --git a/presto-main/src/test/java/com/facebook/presto/operator/TestOperatorStats.java b/presto-main/src/test/java/com/facebook/presto/operator/TestOperatorStats.java index 4ddc2316680f..6291afd0b6dc 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/TestOperatorStats.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/TestOperatorStats.java @@ -56,6 +56,10 @@ public class TestOperatorStats 1, + 1, + new Duration(2, NANOSECONDS), + new Duration(3, NANOSECONDS), + new DataSize(234, BYTE), 2, new Duration(3, NANOSECONDS), new Duration(4, NANOSECONDS), @@ -111,6 +115,10 @@ public class TestOperatorStats 1, + 1, + new Duration(2, NANOSECONDS), + new Duration(3, NANOSECONDS), + new DataSize(234, BYTE), 2, new Duration(3, NANOSECONDS), new Duration(4, NANOSECONDS), diff --git a/presto-native-execution/presto_cpp/main/PrestoTask.cpp b/presto-native-execution/presto_cpp/main/PrestoTask.cpp index 44fac67de078..50850cdeb763 100644 --- a/presto-native-execution/presto_cpp/main/PrestoTask.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoTask.cpp @@ -697,6 +697,11 @@ void PrestoTask::updateExecutionInfoLocked( prestoOp.outputDataSize = protocol::DataSize(veloxOp.outputBytes, protocol::DataUnit::BYTE); + setTiming( + veloxOp.isBlockedTiming, + prestoOp.isBlockedCalls, + prestoOp.isBlockedWall, + prestoOp.isBlockedCpu); setTiming( veloxOp.addInputTiming, prestoOp.addInputCalls, @@ -776,10 +781,12 @@ void PrestoTask::updateExecutionInfoLocked( addSpillingOperatorMetrics(operatorStatsCollector); } - auto wallNanos = veloxOp.addInputTiming.wallNanos + - veloxOp.getOutputTiming.wallNanos + veloxOp.finishTiming.wallNanos; - auto cpuNanos = veloxOp.addInputTiming.cpuNanos + - veloxOp.getOutputTiming.cpuNanos + veloxOp.finishTiming.cpuNanos; + auto wallNanos = veloxOp.isBlockedTiming.wallNanos + + veloxOp.addInputTiming.wallNanos + veloxOp.getOutputTiming.wallNanos + + veloxOp.finishTiming.wallNanos; + auto cpuNanos = veloxOp.isBlockedTiming.cpuNanos + + veloxOp.addInputTiming.cpuNanos + veloxOp.getOutputTiming.cpuNanos + + veloxOp.finishTiming.cpuNanos; prestoPipeline.totalScheduledTimeInNanos += wallNanos; prestoPipeline.totalCpuTimeInNanos += cpuNanos; diff --git a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp index 4fa8486e6f92..02f6dcd925e9 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp +++ b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.cpp @@ -3986,6 +3986,34 @@ void to_json(json& j, const OperatorStats& p) { "OperatorStats", "int64_t", "totalDrivers"); + to_json_key( + j, + "isBlockedCalls", + p.isBlockedCalls, + "OperatorStats", + "int64_t", + "isBlockedCalls"); + to_json_key( + j, + "isBlockedWall", + p.isBlockedWall, + "OperatorStats", + "Duration", + "isBlockedWall"); + to_json_key( + j, + "isBlockedCpu", + p.isBlockedCpu, + "OperatorStats", + "Duration", + "isBlockedCpu"); + to_json_key( + j, + "isBlockedAllocation", + p.isBlockedAllocation, + "OperatorStats", + "DataSize", + "isBlockedAllocation"); to_json_key( j, "addInputCalls", @@ -4265,6 +4293,34 @@ void from_json(const json& j, OperatorStats& p) { "OperatorStats", "int64_t", "totalDrivers"); + from_json_key( + j, + "isBlockedCalls", + p.isBlockedCalls, + "OperatorStats", + "int64_t", + "isBlockedCalls"); + from_json_key( + j, + "isBlockedWall", + p.isBlockedWall, + "OperatorStats", + "Duration", + "isBlockedWall"); + from_json_key( + j, + "isBlockedCpu", + p.isBlockedCpu, + "OperatorStats", + "Duration", + "isBlockedCpu"); + from_json_key( + j, + "isBlockedAllocation", + p.isBlockedAllocation, + "OperatorStats", + "DataSize", + "isBlockedAllocation"); from_json_key( j, "addInputCalls", diff --git a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h index 1bcc6a7a0ae1..04fe38e7c3b0 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h +++ b/presto-native-execution/presto_cpp/presto_protocol/presto_protocol.h @@ -1257,6 +1257,10 @@ struct OperatorStats { PlanNodeId planNodeId = {}; String operatorType = {}; int64_t totalDrivers = {}; + int64_t isBlockedCalls = {}; + Duration isBlockedWall = {}; + Duration isBlockedCpu = {}; + DataSize isBlockedAllocation = {}; int64_t addInputCalls = {}; Duration addInputWall = {}; Duration addInputCpu = {}; From e4c7e05c96f181232ff227d9081af92f63ea0c38 Mon Sep 17 00:00:00 2001 From: Amit Dutta Date: Tue, 15 Oct 2024 20:58:26 -0700 Subject: [PATCH 86/86] [native] Advance velox. --- presto-native-execution/velox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/velox b/presto-native-execution/velox index f59d9a759e28..47a932857452 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit f59d9a759e28964f750fe7a1662ddd39c4bf4741 +Subproject commit 47a9328574522451239aa12a8067cfa7b7054e96