-
Notifications
You must be signed in to change notification settings - Fork 12k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libc++] Speed up set_intersection() by fast-forwarding over ranges …
…of non-matching elements with one-sided binary search. (#75230) One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general advantage of being constant time in the best case, with the downside of executing at most 2*log(N) comparisons vs classic binary search's exact log(N). There are two scenarios in which it really shines: the first one is when operating over non-random-access iterators, because the classic algorithm requires knowing the container's size upfront, which adds N iterator increments to the complexity. The second one is when traversing the container in order, trying to fast-forward to the next value: in that case the classic algorithm requires at least O(N*log(N)) comparisons and, for non-random-access iterators, O(N^2) iterator increments, whereas the one-sided version will yield O(N) operations on both counts, with a best-case of O(log(N)) comparisons which is very common in practice.
- Loading branch information
Showing
15 changed files
with
985 additions
and
219 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
184 changes: 184 additions & 0 deletions
184
libcxx/benchmarks/algorithms/set_intersection.bench.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <algorithm> | ||
#include <cstdlib> | ||
#include <iterator> | ||
#include <set> | ||
#include <vector> | ||
|
||
#include "common.h" | ||
#include "test_iterators.h" | ||
|
||
namespace { | ||
|
||
// types of containers we'll want to test, covering interesting iterator types | ||
struct VectorContainer { | ||
template <typename... Args> | ||
using type = std::vector<Args...>; | ||
|
||
static constexpr const char* Name = "Vector"; | ||
}; | ||
|
||
struct SetContainer { | ||
template <typename... Args> | ||
using type = std::set<Args...>; | ||
|
||
static constexpr const char* Name = "Set"; | ||
}; | ||
|
||
using AllContainerTypes = std::tuple<VectorContainer, SetContainer>; | ||
|
||
// set_intersection performance may depend on where matching values lie | ||
enum class OverlapPosition { | ||
None, | ||
Front, | ||
// performance-wise, matches at the back are identical to ones at the front | ||
Interlaced, | ||
}; | ||
|
||
struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 3> { | ||
static constexpr const char* Names[] = {"None", "Front", "Interlaced"}; | ||
}; | ||
|
||
// forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements | ||
template <typename Wrapped> | ||
struct StridedFwdIt { | ||
Wrapped base_; | ||
unsigned stride_; | ||
|
||
using iterator_category = std::forward_iterator_tag; | ||
using difference_type = typename Wrapped::difference_type; | ||
using value_type = typename Wrapped::value_type; | ||
using pointer = typename Wrapped::pointer; | ||
using reference = typename Wrapped::reference; | ||
|
||
StridedFwdIt(Wrapped base, unsigned stride) : base_(base), stride_(stride) { assert(stride_ != 0); } | ||
|
||
StridedFwdIt operator++() { | ||
for (unsigned i = 0; i < stride_; ++i) | ||
++base_; | ||
return *this; | ||
} | ||
StridedFwdIt operator++(int) { | ||
auto tmp = *this; | ||
++*this; | ||
return tmp; | ||
} | ||
value_type& operator*() { return *base_; } | ||
const value_type& operator*() const { return *base_; } | ||
value_type& operator->() { return *base_; } | ||
const value_type& operator->() const { return *base_; } | ||
bool operator==(const StridedFwdIt& o) const { return base_ == o.base_; } | ||
bool operator!=(const StridedFwdIt& o) const { return !operator==(o); } | ||
}; | ||
template <typename Wrapped> | ||
StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>; | ||
|
||
template <typename T> | ||
std::vector<T> getVectorOfRandom(size_t N) { | ||
std::vector<T> v; | ||
fillValues(v, N, Order::Random); | ||
sortValues(v, Order::Random); | ||
return std::vector<T>(v); | ||
} | ||
|
||
// Realistically, data won't all be nicely contiguous in a container, | ||
// we'll go through some effort to ensure that it's shuffled through memory | ||
// this is especially important for containers with non-contiguous element | ||
// storage, but it will affect even a std::vector, because when you copy a | ||
// std::vector<std::string> the underlying data storage position for the char | ||
// arrays of the copy are likely to have high locality | ||
template <class Container> | ||
std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) { | ||
using ValueType = typename Container::value_type; | ||
auto move_into = [](auto first, auto last) { | ||
Container out; | ||
std::move(first, last, std::inserter(out, out.begin())); | ||
return out; | ||
}; | ||
const auto src_size = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2); | ||
std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size); | ||
|
||
if (pos == OverlapPosition::None) { | ||
std::sort(src.begin(), src.end()); | ||
return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(src.begin() + size1, src.end())); | ||
} | ||
|
||
// All other overlap types will have to copy some part of the data, but if | ||
// we copy after sorting it will likely have high locality, so we sort | ||
// each copy separately | ||
auto copy = src; | ||
std::sort(src.begin(), src.end()); | ||
std::sort(copy.begin(), copy.end()); | ||
|
||
switch (pos) { | ||
case OverlapPosition::None: | ||
// we like -Wswitch :) | ||
break; | ||
|
||
case OverlapPosition::Front: | ||
return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(copy.begin(), copy.begin() + size2)); | ||
|
||
case OverlapPosition::Interlaced: | ||
const auto stride1 = size1 < size2 ? size2 / size1 : 1; | ||
const auto stride2 = size2 < size1 ? size1 / size2 : 1; | ||
return std::make_pair(move_into(StridedFwdIt(src.begin(), stride1), StridedFwdIt(src.end(), stride1)), | ||
move_into(StridedFwdIt(copy.begin(), stride2), StridedFwdIt(copy.end(), stride2))); | ||
} | ||
std::abort(); // would be std::unreachable() if it could | ||
return std::pair<Container, Container>(); | ||
} | ||
|
||
template <class ValueType, class Container, class Overlap> | ||
struct SetIntersection { | ||
using ContainerType = typename Container::template type<Value<ValueType>>; | ||
size_t size1_; | ||
size_t size2_; | ||
|
||
SetIntersection(size_t size1, size_t size2) : size1_(size1), size2_(size2) {} | ||
|
||
bool skip() const noexcept { | ||
// let's save some time and skip simmetrical runs | ||
return size1_ < size2_; | ||
} | ||
|
||
void run(benchmark::State& state) const { | ||
auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap()); | ||
std::vector<Value<ValueType>> out(std::min(size1_, size2_)); | ||
|
||
const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_)); | ||
for (const auto& _ : state) { | ||
while (state.KeepRunningBatch(BATCH_SIZE)) { | ||
for (unsigned i = 0; i < BATCH_SIZE; ++i) { | ||
const auto& [c1, c2] = input; | ||
auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin()); | ||
benchmark::DoNotOptimize(res); | ||
} | ||
} | ||
} | ||
} | ||
|
||
std::string name() const { | ||
return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name + ValueType::name() + '_' + | ||
std::to_string(size1_) + '_' + std::to_string(size2_); | ||
} | ||
}; | ||
|
||
} // namespace | ||
|
||
int main(int argc, char** argv) { /**/ | ||
benchmark::Initialize(&argc, argv); | ||
if (benchmark::ReportUnrecognizedArguments(argc, argv)) | ||
return 1; | ||
|
||
makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>( | ||
Quantities, Quantities); | ||
benchmark::RunSpecifiedBenchmarks(); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.