Skip to content

Commit

Permalink
Merge pull request #118 from project-tsurugi/key_util
Browse files Browse the repository at this point in the history
Add simple key partitioning utility
  • Loading branch information
YoshiakiNishimura authored Sep 27, 2024
2 parents ed23348 + 68423e3 commit b8d1f8f
Show file tree
Hide file tree
Showing 6 changed files with 309 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ file(GLOB SOURCES
"jogasaki/scheduler/details/*.cpp"
"jogasaki/serializer/*.cpp"
"jogasaki/utils/*.cpp"
"jogasaki/dist/*.cpp"
)

if(ENABLE_ALTIMETER)
Expand Down
81 changes: 81 additions & 0 deletions src/jogasaki/dist/key_distribution.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright 2018-2024 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "key_range.h"

#include <optional>
#include <vector>

namespace jogasaki::dist {

/**
* @brief provides the key distribution information on index.
*/
class key_distribution {
public:
using size_type = std::size_t;
using range_type = key_range;
using pivot_type = key_range::key_type;

constexpr key_distribution() = default;

key_distribution(key_distribution const&) = delete;
key_distribution& operator=(key_distribution const&) = delete;

key_distribution(key_distribution&&) = delete;
key_distribution& operator=(key_distribution&&) = delete;

virtual ~key_distribution() = default;
/**
* @brief computes the estimated count of the entries in the range on index.
* @param range the range on index
* @return the estimated count of the entries in the range
* @return empty if it is not available
*/
[[nodiscard]] virtual std::optional<double> estimate_count(range_type const& range) = 0;
/**
* @brief computes the estimated key size in the range on index.
* @param range the range on index
* @return the estimated key size of an entry in the range
* @return empty if it is not available
*/
[[nodiscard]] virtual std::optional<double> estimate_key_size(range_type const& range) = 0;
/**
* @brief computes the estimated value size in the range on index.
* @param range the range on index
* @return the estimated value size of an entry in the range
* @return empty if it is not available
*/
[[nodiscard]] virtual std::optional<double> estimate_value_size(range_type const& range) = 0;
/**
* @brief compute a sequence of pivots that split the range on index.
* @details
* The resulting pivot sequence is sorted by the order of the keys on the index,
* and does not include the keys at both begin and end of the range.
* @param max_count maximum count of the pivots
* @param range the range on index
* @return a sequence of pivots, must be <= max_count
* @return empty list if the range is not splittable
* @note the returned pivots may not be the actual keys on the index,
* and ill-formed from the actual keys
* @post individual pivots are within the range
*/
[[nodiscard]] virtual std::vector<pivot_type> compute_pivots(
size_type max_count, range_type const& range) = 0;
};

} // namespace jogasaki::dist
30 changes: 30 additions & 0 deletions src/jogasaki/dist/key_range.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright 2018-2024 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "key_range.h"

#include <optional>

namespace jogasaki::dist {

key_range::key_type key_range::begin_key() const noexcept { return begin_key_; }

key_range::endpoint_type key_range::begin_endpoint() const noexcept { return begin_endpoint_; }

key_range::key_type key_range::end_key() const noexcept { return end_key_; }

key_range::endpoint_type key_range::end_endpoint() const noexcept { return end_endpoint_; }
} // namespace jogasaki::dist
84 changes: 84 additions & 0 deletions src/jogasaki/dist/key_range.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright 2018-2024 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <string_view>

namespace jogasaki::dist {

class key_range {
public:
/// @brief the key type.
using key_type = std::string_view;

/// @brief the endpoint type.
using endpoint_type = enum { unspecified, inclusive, exclusive, prefix_inclusive };

/**
* @brief creates a whole range on index.
*/
key_range() noexcept : begin_endpoint_(unspecified), end_endpoint_(unspecified){};

/**
* @brief creates a new range on index.
* @param begin_key begin key of the range
* @param begin_endpoint endpoint type of the begin key,
* or unspecified if the range starts from head of the index
* @param end_key end key of the range
* @param end_endpoint endpoint type of the end key,
* or unspecified if the range goes to tail of the index
*/
key_range(key_type begin_key, endpoint_type begin_endpoint, key_type end_key,
endpoint_type end_endpoint) noexcept
: begin_key_(begin_key), begin_endpoint_(begin_endpoint), end_key_(end_key),
end_endpoint_(end_endpoint){};

/**
* @brief returns the begin key of the range.
* @return the begin key
* @return don't care if begin_endpoint() returns unspecified
*/
[[nodiscard]] key_type begin_key() const noexcept;

/**
* @brief returns the endpoint type of the begin key.
* @return the endpoint type
* @return unspecified if the range starts from head of the index
*/
[[nodiscard]] endpoint_type begin_endpoint() const noexcept;

/**
* @brief returns the end key of the range.
* @return the end key.
* @return don't care if end_endpoint() returns unspecified
*/
[[nodiscard]] key_type end_key() const noexcept;

/**
* @brief returns the endpoint type of the end key.
* @return the endpoint type
* @return unspecified if the range goes to tail of the index
*/
[[nodiscard]] endpoint_type end_endpoint() const noexcept;

private:
key_type begin_key_;
endpoint_type begin_endpoint_;
key_type end_key_;
endpoint_type end_endpoint_;
};

} // namespace jogasaki::dist
60 changes: 60 additions & 0 deletions src/jogasaki/dist/simple_key_distribution.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright 2018-2024 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "simple_key_distribution.h"

#include <algorithm>

namespace jogasaki::dist {

std::optional<double> simple_key_distribution::estimate_count(range_type const& /*range*/) {
return std::nullopt;
}

std::optional<double> simple_key_distribution::estimate_key_size(range_type const& /*range*/) {
return std::nullopt;
}

std::optional<double> simple_key_distribution::estimate_value_size(range_type const& /*range*/) {
return std::nullopt;
}

std::vector<simple_key_distribution::pivot_type> simple_key_distribution::compute_pivots(
size_type max_count, range_type const& range) {
std::vector<pivot_type> pivots;
static const unsigned char prefix = 0x81;

for (int i = 0; i <= 0xff; ++i) {
std::string key = {static_cast<char>(prefix), static_cast<char>(i)};
pivot_type pivot(key.data(), key.size());

if ((range.begin_key().empty() || pivot >= range.begin_key()) &&
(range.end_key().empty() || pivot < range.end_key())) {
pivots.emplace_back(pivot);
}
}

if (max_count < pivots.size()) {
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(pivots.begin(), pivots.end(), g);
pivots.resize(max_count);
}

return pivots;
}

} // namespace jogasaki::dist
53 changes: 53 additions & 0 deletions src/jogasaki/dist/simple_key_distribution.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright 2018-2024 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "key_distribution.h"
#include <optional>
#include <random>
#include <vector>

namespace jogasaki::dist {

class simple_key_distribution : public key_distribution {
public:
using size_type = key_distribution::size_type;
using range_type = key_distribution::range_type;
using pivot_type = key_distribution::pivot_type;

constexpr simple_key_distribution() = default;

~simple_key_distribution() override = default;

simple_key_distribution(simple_key_distribution const&) = delete;

simple_key_distribution& operator=(simple_key_distribution const&) = delete;

simple_key_distribution(simple_key_distribution&&) = delete;

simple_key_distribution& operator=(simple_key_distribution&&) = delete;

[[nodiscard]] std::optional<double> estimate_count(range_type const& range) override;

[[nodiscard]] std::optional<double> estimate_key_size(range_type const& range) override;

[[nodiscard]] std::optional<double> estimate_value_size(range_type const& range) override;

[[nodiscard]] std::vector<pivot_type> compute_pivots(
size_type max_count, range_type const& range) override;
};

} // namespace jogasaki::dist

0 comments on commit b8d1f8f

Please sign in to comment.