-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #118 from project-tsurugi/key_util
Add simple key partitioning utility
- Loading branch information
Showing
6 changed files
with
309 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/* | ||
* Copyright 2018-2024 Project Tsurugi. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include "key_range.h" | ||
|
||
#include <optional> | ||
#include <vector> | ||
|
||
namespace jogasaki::dist { | ||
|
||
/** | ||
* @brief provides the key distribution information on index. | ||
*/ | ||
class key_distribution { | ||
public: | ||
using size_type = std::size_t; | ||
using range_type = key_range; | ||
using pivot_type = key_range::key_type; | ||
|
||
constexpr key_distribution() = default; | ||
|
||
key_distribution(key_distribution const&) = delete; | ||
key_distribution& operator=(key_distribution const&) = delete; | ||
|
||
key_distribution(key_distribution&&) = delete; | ||
key_distribution& operator=(key_distribution&&) = delete; | ||
|
||
virtual ~key_distribution() = default; | ||
/** | ||
* @brief computes the estimated count of the entries in the range on index. | ||
* @param range the range on index | ||
* @return the estimated count of the entries in the range | ||
* @return empty if it is not available | ||
*/ | ||
[[nodiscard]] virtual std::optional<double> estimate_count(range_type const& range) = 0; | ||
/** | ||
* @brief computes the estimated key size in the range on index. | ||
* @param range the range on index | ||
* @return the estimated key size of an entry in the range | ||
* @return empty if it is not available | ||
*/ | ||
[[nodiscard]] virtual std::optional<double> estimate_key_size(range_type const& range) = 0; | ||
/** | ||
* @brief computes the estimated value size in the range on index. | ||
* @param range the range on index | ||
* @return the estimated value size of an entry in the range | ||
* @return empty if it is not available | ||
*/ | ||
[[nodiscard]] virtual std::optional<double> estimate_value_size(range_type const& range) = 0; | ||
/** | ||
* @brief compute a sequence of pivots that split the range on index. | ||
* @details | ||
* The resulting pivot sequence is sorted by the order of the keys on the index, | ||
* and does not include the keys at both begin and end of the range. | ||
* @param max_count maximum count of the pivots | ||
* @param range the range on index | ||
* @return a sequence of pivots, must be <= max_count | ||
* @return empty list if the range is not splittable | ||
* @note the returned pivots may not be the actual keys on the index, | ||
* and ill-formed from the actual keys | ||
* @post individual pivots are within the range | ||
*/ | ||
[[nodiscard]] virtual std::vector<pivot_type> compute_pivots( | ||
size_type max_count, range_type const& range) = 0; | ||
}; | ||
|
||
} // namespace jogasaki::dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright 2018-2024 Project Tsurugi. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "key_range.h" | ||
|
||
#include <optional> | ||
|
||
namespace jogasaki::dist { | ||
|
||
key_range::key_type key_range::begin_key() const noexcept { return begin_key_; } | ||
|
||
key_range::endpoint_type key_range::begin_endpoint() const noexcept { return begin_endpoint_; } | ||
|
||
key_range::key_type key_range::end_key() const noexcept { return end_key_; } | ||
|
||
key_range::endpoint_type key_range::end_endpoint() const noexcept { return end_endpoint_; } | ||
} // namespace jogasaki::dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Copyright 2018-2024 Project Tsurugi. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <string_view> | ||
|
||
namespace jogasaki::dist { | ||
|
||
class key_range { | ||
public: | ||
/// @brief the key type. | ||
using key_type = std::string_view; | ||
|
||
/// @brief the endpoint type. | ||
using endpoint_type = enum { unspecified, inclusive, exclusive, prefix_inclusive }; | ||
|
||
/** | ||
* @brief creates a whole range on index. | ||
*/ | ||
key_range() noexcept : begin_endpoint_(unspecified), end_endpoint_(unspecified){}; | ||
|
||
/** | ||
* @brief creates a new range on index. | ||
* @param begin_key begin key of the range | ||
* @param begin_endpoint endpoint type of the begin key, | ||
* or unspecified if the range starts from head of the index | ||
* @param end_key end key of the range | ||
* @param end_endpoint endpoint type of the end key, | ||
* or unspecified if the range goes to tail of the index | ||
*/ | ||
key_range(key_type begin_key, endpoint_type begin_endpoint, key_type end_key, | ||
endpoint_type end_endpoint) noexcept | ||
: begin_key_(begin_key), begin_endpoint_(begin_endpoint), end_key_(end_key), | ||
end_endpoint_(end_endpoint){}; | ||
|
||
/** | ||
* @brief returns the begin key of the range. | ||
* @return the begin key | ||
* @return don't care if begin_endpoint() returns unspecified | ||
*/ | ||
[[nodiscard]] key_type begin_key() const noexcept; | ||
|
||
/** | ||
* @brief returns the endpoint type of the begin key. | ||
* @return the endpoint type | ||
* @return unspecified if the range starts from head of the index | ||
*/ | ||
[[nodiscard]] endpoint_type begin_endpoint() const noexcept; | ||
|
||
/** | ||
* @brief returns the end key of the range. | ||
* @return the end key. | ||
* @return don't care if end_endpoint() returns unspecified | ||
*/ | ||
[[nodiscard]] key_type end_key() const noexcept; | ||
|
||
/** | ||
* @brief returns the endpoint type of the end key. | ||
* @return the endpoint type | ||
* @return unspecified if the range goes to tail of the index | ||
*/ | ||
[[nodiscard]] endpoint_type end_endpoint() const noexcept; | ||
|
||
private: | ||
key_type begin_key_; | ||
endpoint_type begin_endpoint_; | ||
key_type end_key_; | ||
endpoint_type end_endpoint_; | ||
}; | ||
|
||
} // namespace jogasaki::dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright 2018-2024 Project Tsurugi. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "simple_key_distribution.h" | ||
|
||
#include <algorithm> | ||
|
||
namespace jogasaki::dist { | ||
|
||
std::optional<double> simple_key_distribution::estimate_count(range_type const& /*range*/) { | ||
return std::nullopt; | ||
} | ||
|
||
std::optional<double> simple_key_distribution::estimate_key_size(range_type const& /*range*/) { | ||
return std::nullopt; | ||
} | ||
|
||
std::optional<double> simple_key_distribution::estimate_value_size(range_type const& /*range*/) { | ||
return std::nullopt; | ||
} | ||
|
||
std::vector<simple_key_distribution::pivot_type> simple_key_distribution::compute_pivots( | ||
size_type max_count, range_type const& range) { | ||
std::vector<pivot_type> pivots; | ||
static const unsigned char prefix = 0x81; | ||
|
||
for (int i = 0; i <= 0xff; ++i) { | ||
std::string key = {static_cast<char>(prefix), static_cast<char>(i)}; | ||
pivot_type pivot(key.data(), key.size()); | ||
|
||
if ((range.begin_key().empty() || pivot >= range.begin_key()) && | ||
(range.end_key().empty() || pivot < range.end_key())) { | ||
pivots.emplace_back(pivot); | ||
} | ||
} | ||
|
||
if (max_count < pivots.size()) { | ||
std::random_device rd; | ||
std::mt19937 g(rd()); | ||
std::shuffle(pivots.begin(), pivots.end(), g); | ||
pivots.resize(max_count); | ||
} | ||
|
||
return pivots; | ||
} | ||
|
||
} // namespace jogasaki::dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* Copyright 2018-2024 Project Tsurugi. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include "key_distribution.h" | ||
#include <optional> | ||
#include <random> | ||
#include <vector> | ||
|
||
namespace jogasaki::dist { | ||
|
||
class simple_key_distribution : public key_distribution { | ||
public: | ||
using size_type = key_distribution::size_type; | ||
using range_type = key_distribution::range_type; | ||
using pivot_type = key_distribution::pivot_type; | ||
|
||
constexpr simple_key_distribution() = default; | ||
|
||
~simple_key_distribution() override = default; | ||
|
||
simple_key_distribution(simple_key_distribution const&) = delete; | ||
|
||
simple_key_distribution& operator=(simple_key_distribution const&) = delete; | ||
|
||
simple_key_distribution(simple_key_distribution&&) = delete; | ||
|
||
simple_key_distribution& operator=(simple_key_distribution&&) = delete; | ||
|
||
[[nodiscard]] std::optional<double> estimate_count(range_type const& range) override; | ||
|
||
[[nodiscard]] std::optional<double> estimate_key_size(range_type const& range) override; | ||
|
||
[[nodiscard]] std::optional<double> estimate_value_size(range_type const& range) override; | ||
|
||
[[nodiscard]] std::vector<pivot_type> compute_pivots( | ||
size_type max_count, range_type const& range) override; | ||
}; | ||
|
||
} // namespace jogasaki::dist |