diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 90147239..94d5a690 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -105,6 +105,7 @@ file(GLOB SOURCES "jogasaki/scheduler/details/*.cpp" "jogasaki/serializer/*.cpp" "jogasaki/utils/*.cpp" + "jogasaki/dist/*.cpp" ) if(ENABLE_ALTIMETER) diff --git a/src/jogasaki/dist/key_distribution.h b/src/jogasaki/dist/key_distribution.h new file mode 100644 index 00000000..bc508a2f --- /dev/null +++ b/src/jogasaki/dist/key_distribution.h @@ -0,0 +1,81 @@ +/* + * Copyright 2018-2024 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "key_range.h" + +#include +#include + +namespace jogasaki::dist { + +/** + * @brief provides the key distribution information on index. + */ +class key_distribution { + public: + using size_type = std::size_t; + using range_type = key_range; + using pivot_type = key_range::key_type; + + constexpr key_distribution() = default; + + key_distribution(key_distribution const&) = delete; + key_distribution& operator=(key_distribution const&) = delete; + + key_distribution(key_distribution&&) = delete; + key_distribution& operator=(key_distribution&&) = delete; + + virtual ~key_distribution() = default; + /** + * @brief computes the estimated count of the entries in the range on index. + * @param range the range on index + * @return the estimated count of the entries in the range + * @return empty if it is not available + */ + [[nodiscard]] virtual std::optional estimate_count(range_type const& range) = 0; + /** + * @brief computes the estimated key size in the range on index. + * @param range the range on index + * @return the estimated key size of an entry in the range + * @return empty if it is not available + */ + [[nodiscard]] virtual std::optional estimate_key_size(range_type const& range) = 0; + /** + * @brief computes the estimated value size in the range on index. + * @param range the range on index + * @return the estimated value size of an entry in the range + * @return empty if it is not available + */ + [[nodiscard]] virtual std::optional estimate_value_size(range_type const& range) = 0; + /** + * @brief compute a sequence of pivots that split the range on index. + * @details + * The resulting pivot sequence is sorted by the order of the keys on the index, + * and does not include the keys at both begin and end of the range. + * @param max_count maximum count of the pivots + * @param range the range on index + * @return a sequence of pivots, must be <= max_count + * @return empty list if the range is not splittable + * @note the returned pivots may not be the actual keys on the index, + * and ill-formed from the actual keys + * @post individual pivots are within the range + */ + [[nodiscard]] virtual std::vector compute_pivots( + size_type max_count, range_type const& range) = 0; +}; + +} // namespace jogasaki::dist diff --git a/src/jogasaki/dist/key_range.cpp b/src/jogasaki/dist/key_range.cpp new file mode 100644 index 00000000..f4dedf10 --- /dev/null +++ b/src/jogasaki/dist/key_range.cpp @@ -0,0 +1,30 @@ +/* + * Copyright 2018-2024 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "key_range.h" + +#include + +namespace jogasaki::dist { + +key_range::key_type key_range::begin_key() const noexcept { return begin_key_; } + +key_range::endpoint_type key_range::begin_endpoint() const noexcept { return begin_endpoint_; } + +key_range::key_type key_range::end_key() const noexcept { return end_key_; } + +key_range::endpoint_type key_range::end_endpoint() const noexcept { return end_endpoint_; } +} // namespace jogasaki::dist diff --git a/src/jogasaki/dist/key_range.h b/src/jogasaki/dist/key_range.h new file mode 100644 index 00000000..fef8cb4a --- /dev/null +++ b/src/jogasaki/dist/key_range.h @@ -0,0 +1,84 @@ +/* + * Copyright 2018-2024 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace jogasaki::dist { + +class key_range { + public: + /// @brief the key type. + using key_type = std::string_view; + + /// @brief the endpoint type. + using endpoint_type = enum { unspecified, inclusive, exclusive, prefix_inclusive }; + + /** + * @brief creates a whole range on index. + */ + key_range() noexcept : begin_endpoint_(unspecified), end_endpoint_(unspecified){}; + + /** + * @brief creates a new range on index. + * @param begin_key begin key of the range + * @param begin_endpoint endpoint type of the begin key, + * or unspecified if the range starts from head of the index + * @param end_key end key of the range + * @param end_endpoint endpoint type of the end key, + * or unspecified if the range goes to tail of the index + */ + key_range(key_type begin_key, endpoint_type begin_endpoint, key_type end_key, + endpoint_type end_endpoint) noexcept + : begin_key_(begin_key), begin_endpoint_(begin_endpoint), end_key_(end_key), + end_endpoint_(end_endpoint){}; + + /** + * @brief returns the begin key of the range. + * @return the begin key + * @return don't care if begin_endpoint() returns unspecified + */ + [[nodiscard]] key_type begin_key() const noexcept; + + /** + * @brief returns the endpoint type of the begin key. + * @return the endpoint type + * @return unspecified if the range starts from head of the index + */ + [[nodiscard]] endpoint_type begin_endpoint() const noexcept; + + /** + * @brief returns the end key of the range. + * @return the end key. + * @return don't care if end_endpoint() returns unspecified + */ + [[nodiscard]] key_type end_key() const noexcept; + + /** + * @brief returns the endpoint type of the end key. + * @return the endpoint type + * @return unspecified if the range goes to tail of the index + */ + [[nodiscard]] endpoint_type end_endpoint() const noexcept; + + private: + key_type begin_key_; + endpoint_type begin_endpoint_; + key_type end_key_; + endpoint_type end_endpoint_; +}; + +} // namespace jogasaki::dist diff --git a/src/jogasaki/dist/simple_key_distribution.cpp b/src/jogasaki/dist/simple_key_distribution.cpp new file mode 100644 index 00000000..a1cd4644 --- /dev/null +++ b/src/jogasaki/dist/simple_key_distribution.cpp @@ -0,0 +1,60 @@ +/* + * Copyright 2018-2024 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "simple_key_distribution.h" + +#include + +namespace jogasaki::dist { + +std::optional simple_key_distribution::estimate_count(range_type const& /*range*/) { + return std::nullopt; +} + +std::optional simple_key_distribution::estimate_key_size(range_type const& /*range*/) { + return std::nullopt; +} + +std::optional simple_key_distribution::estimate_value_size(range_type const& /*range*/) { + return std::nullopt; +} + +std::vector simple_key_distribution::compute_pivots( + size_type max_count, range_type const& range) { + std::vector pivots; + static const unsigned char prefix = 0x81; + + for (int i = 0; i <= 0xff; ++i) { + std::string key = {static_cast(prefix), static_cast(i)}; + pivot_type pivot(key.data(), key.size()); + + if ((range.begin_key().empty() || pivot >= range.begin_key()) && + (range.end_key().empty() || pivot < range.end_key())) { + pivots.emplace_back(pivot); + } + } + + if (max_count < pivots.size()) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(pivots.begin(), pivots.end(), g); + pivots.resize(max_count); + } + + return pivots; +} + +} // namespace jogasaki::dist diff --git a/src/jogasaki/dist/simple_key_distribution.h b/src/jogasaki/dist/simple_key_distribution.h new file mode 100644 index 00000000..fa79d71b --- /dev/null +++ b/src/jogasaki/dist/simple_key_distribution.h @@ -0,0 +1,53 @@ +/* + * Copyright 2018-2024 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "key_distribution.h" +#include +#include +#include + +namespace jogasaki::dist { + +class simple_key_distribution : public key_distribution { + public: + using size_type = key_distribution::size_type; + using range_type = key_distribution::range_type; + using pivot_type = key_distribution::pivot_type; + + constexpr simple_key_distribution() = default; + + ~simple_key_distribution() override = default; + + simple_key_distribution(simple_key_distribution const&) = delete; + + simple_key_distribution& operator=(simple_key_distribution const&) = delete; + + simple_key_distribution(simple_key_distribution&&) = delete; + + simple_key_distribution& operator=(simple_key_distribution&&) = delete; + + [[nodiscard]] std::optional estimate_count(range_type const& range) override; + + [[nodiscard]] std::optional estimate_key_size(range_type const& range) override; + + [[nodiscard]] std::optional estimate_value_size(range_type const& range) override; + + [[nodiscard]] std::vector compute_pivots( + size_type max_count, range_type const& range) override; +}; + +} // namespace jogasaki::dist