sources.bib

@manual{iso690,
  organization = {\'U\v rad pro technickou normalizaci, metrologii a st\'atn\'\i{} zku\v sebnictv\'\i{}},
  title        = {\v CSN ISO 690 Informace a dokumentace -- Pravidla pro bibliografick\'e odkazy a citace informa\v cn\'\i{}ch zdroj{\r u}},
  year         = {2011}
}

@inproceedings{awad,
  author    = {Awad, Muhammad A. and Ashkiani, Saman and Johnson, Rob and Farach-Colton, Martín and Owens, John D.},
  title     = {Engineering a High-Performance GPU B-Tree},
  year      = {2019},
  isbn      = {9781450362252},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3293883.3295706},
  doi       = {10.1145/3293883.3295706},
  abstract  = {We engineer a GPU implementation of a B-Tree that supports concurrent queries (point, range, and successor) and updates (insertions and deletions). Our B-tree outperforms the state of the art, a GPU log-structured merge tree (LSM) and a GPU sorted array. In particular, point and range queries are significantly faster than in a GPU LSM (the GPU LSM does not implement successor queries). Furthermore, B-Tree insertions are also faster than LSM and sorted array insertions unless insertions come in batches of more than roughly 100k. Because we cache the upper levels of the tree, we achieve lookup throughput that exceeds the DRAM bandwidth of the GPU. We demonstrate that the key limiter of performance on a GPU is contention and describe the design choices that allow us to achieve this high performance.},
  booktitle = {Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming},
  pages     = {145–157},
  numpages  = {13},
  keywords  = {GPU, B-tree, mutable, dynamic, data structures},
  location  = {Washington, District of Columbia},
  series    = {PPoPP '19}
}

@article{goetz-tech,
  author     = {Graefe, Goetz},
  title      = {Modern B-Tree Techniques},
  year       = {2011},
  issue_date = {April 2011},
  publisher  = {Now Publishers Inc.},
  address    = {Hanover, MA, USA},
  volume     = {3},
  number     = {4},
  issn       = {1931-7883},
  url        = {https://doi.org/10.1561/1900000028},
  doi        = {10.1561/1900000028},
  abstract   = {Invented about 40 years ago and called ubiquitous less than 10 years later, B-tree indexes have been used in a wide variety of computing systems from handheld devices to mainframes and server farms. Over the years, many techniques have been added to the basic design in order to improve efficiency or to add functionality. Examples include separation of updates to structure or contents, utility operations such as non-logged yet transactional index creation, and robust query processing such as graceful degradation during index-to-index navigation.This survey reviews the basics of B-trees and of B-tree indexes in databases, transactional techniques and query processing techniques related to B-trees, B-tree utilities essential for database operations, and many optimizations and improvements. It is intended both as a survey and as a reference, enabling researchers to compare index innovations with advanced B-tree techniques and enabling professionals to select features, functions, and tradeoffs most appropriate for their data management challenges.},
  journal    = {Found. Trends Databases},
  month      = apr,
  pages      = {203–402},
  numpages   = {200}
}

@article{goetz-lock,
  author     = {Graefe, Goetz},
  title      = {A Survey of B-Tree Locking Techniques},
  year       = {2010},
  issue_date = {July 2010},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = {35},
  number     = {3},
  issn       = {0362-5915},
  url        = {https://doi.org/10.1145/1806907.1806908},
  doi        = {10.1145/1806907.1806908},
  abstract   = {B-trees have been ubiquitous in database management systems for several decades, and they are used in other storage systems as well. Their basic structure and basic operations are well and widely understood including search, insertion, and deletion. Concurrency control of operations in B-trees, however, is perceived as a difficult subject with many subtleties and special cases. The purpose of this survey is to clarify, simplify, and structure the topic of concurrency control in B-trees by dividing it into two subtopics and exploring each of them in depth.},
  journal    = {ACM Trans. Database Syst.},
  month      = jul,
  articleno  = {16},
  numpages   = {26}
}

@article{jaluta,
  author     = {Jaluta, Ibrahim and Sippu, Seppo and Soisalon-Soininen, Eljas},
  title      = {Concurrency Control and Recovery for Balanced B-Link Trees},
  year       = {2005},
  issue_date = {April 2005},
  publisher  = {Springer-Verlag},
  address    = {Berlin, Heidelberg},
  volume     = {14},
  number     = {2},
  issn       = {1066-8888},
  url        = {https://doi.org/10.1007/s00778-004-0140-6},
  doi        = {10.1007/s00778-004-0140-6},
  abstract   = {In this paper we present new concurrent and recoverable B-link-tree algorithms. Unlike previous algorithms, ours maintain the balance of the B-link tree at all times, so that a logarithmic time bound for a search or an update operation is guaranteed under arbitrary sequences of record insertions and deletions. A database transaction can contain any number of operations of the form “fetch the first (or next) matching record”, “insert a record”, or “delete a record”, where database records are identified by their primary keys. Repeatable-read-level isolation for transactions is guaranteed by key-range locking. The algorithms apply the write-ahead logging (WAL) protocol and the steal and no-force buffering policies for index and data pages. Record inserts and deletes on leaf pages of a B-link tree are logged using physiological redo-undo log records. Each structure modification such as a page split or merge is made an atomic action by keeping the pages involved in the modification latched for the (short) duration of the modification and the logging of that modification; at most two B-link-tree pages are kept X-latched at a time. Each structure modification brings the B-link tree into a structurally consistent and balanced state whenever the tree was structurally consistent and balanced initially. Each structure modification is logged using a single physiological redo-only log record. Thus, a structure modification will never be undone even if the transaction that gave rise to it eventually aborts. In restart recovery, the redo pass of our ARIES-based recovery protocol will always produce a structurally consistent and balanced B-link tree, on which the database updates by backward-rolling transactions can always be undone logically, when a physical (page-oriented) undo is no longer possible.},
  journal    = {The VLDB Journal},
  month      = apr,
  pages      = {257–277},
  numpages   = {21},
  keywords   = {Recovery, Concurrency control, Tree-structure modifications, Transaction}
}

@article{lehman,
  author     = {Lehman, Philip L. and Yao, s. Bing},
  title      = {Efficient Locking for Concurrent Operations on B-Trees},
  year       = {1981},
  issue_date = {Dec. 1981},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = {6},
  number     = {4},
  issn       = {0362-5915},
  url        = {https://doi.org/10.1145/319628.319663},
  doi        = {10.1145/319628.319663},
  abstract   = {The B-tree and its variants have been found to be highly useful (both theoretically and in practice) for storing large amounts of information, especially on secondary storage devices. We examine the problem of overcoming the inherent difficulty of concurrent operations on such structures, using a practical storage model. A single additional “link” pointer in each node allows a process to easily recover from tree modifications performed by other concurrent processes. Our solution compares favorably with earlier solutions in that the locking scheme is simpler (no read-locks are used) and only a (small) constant number of nodes are locked by any update process at any given time. An informal correctness proof for our system is given.},
  journal    = {ACM Trans. Database Syst.},
  month      = dec,
  pages      = {650–670},
  numpages   = {21},
  keywords   = {index organizations, correctness, B-tree, database, multiway search trees, consistencey, locking protocols, concurrency controls, data structures, concurrenct algorithms}
}

@inproceedings{kaczmarski,
  title        = {B$^+$-tree optimized for GPGPU},
  author       = {Kaczmarski, Krzysztof},
  booktitle    = {OTM Confederated International Conferences On the Move to Meaningful Internet Systems},
  pages        = {843--854},
  year         = {2012},
  organization = {Springer}
}

@inproceedings{bayer-org,
  author    = {Bayer, R. and McCreight, E.},
  title     = {Organization and Maintenance of Large Ordered Indices},
  year      = {1970},
  isbn      = {9781450379410},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/1734663.1734671},
  doi       = {10.1145/1734663.1734671},
  booktitle = {Proceedings of the 1970 ACM SIGFIDET (Now SIGMOD) Workshop on Data Description, Access and Control},
  pages     = {107–141},
  numpages  = {35},
  keywords  = {information retrieval, random access files, key insertion, data structures, key retrieval, dynamic index maintenance, paging, key deletion},
  location  = {Houston, Texas},
  series    = {SIGFIDET '70}
}

@article{10.1145/356770.356776,
  author     = {Comer, Douglas},
  title      = {Ubiquitous B-Tree},
  year       = {1979},
  issue_date = {June 1979},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = {11},
  number     = {2},
  issn       = {0360-0300},
  url        = {https://doi.org/10.1145/356770.356776},
  doi        = {10.1145/356770.356776},
  journal    = {ACM Comput. Surv.},
  month      = jun,
  pages      = {121–137},
  numpages   = {17}
}

@article{samadi1976b,
  title     = {B-trees in a system with multiple users},
  author    = {Samadi, Behrokh},
  journal   = {Information Processing Letters},
  volume    = {5},
  number    = {4},
  pages     = {107--112},
  year      = {1976},
  publisher = {Elsevier}
}

@article{knuth1998art,
  title   = {The Art of computer programming, Volume 3: Sorting and searching (1973)},
  author  = {Knuth, Donald E},
  journal = {Google Scholar Google Scholar Digital Library Digital Library},
  year    = {1998}
}

@inproceedings{fix2011accelerating,
  title     = {Accelerating Brai \\ ded B+ Tree Searches on a GPU with CUDA},
  author    = {Fix, Jordan and Wilkes, Andrew and Skadron, Kevin},
  booktitle = {2nd Workshop on Applications for Multi and Many Core Processors: Analysis, Implementation, and Performance (A4MMC), in conjunction with ISCA},
  year      = {2011}
}

@inproceedings{ashkiani2018dynamic,
  title        = {A dynamic hash table for the GPU},
  author       = {Ashkiani, Saman and Farach-Colton, Martin and Owens, John D},
  booktitle    = {2018 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
  pages        = {419--429},
  year         = {2018},
  organization = {IEEE}
}

@manual{cudaprog,
  title   = {CUDA C++ Programming Guide},
  author  = {{NVIDIA Corporation}},
  url     = {https://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf},
  urldate = {2021-04-05}
}

@manual{ptxisa,
  title   = {Parallel Thread Execution ISA},
  author  = {{NVIDIA Corporation}},
  url     = {https://docs.nvidia.com/cuda/pdf/ptx_isa_7.3.pdf},
  urldate = {2021-06-27}
}

@manual{tnl,
  title        = {Template Numerical Library},
  author       = {Oberhuber, Tomáš and Klinkovský, Jakub and Wodecki, Aleš},
  howpublished = {software},
  url          = {https://tnl-project.org/},
  urldate      = {2021-04-27}
}

@online{gtest,
  title        = {GoogleTest},
  author       = {{Google LLC.}},
  howpublished = {software},
  url          = {https://github.com/google/googletest},
  urldate      = {2021-04-27}
}

@manual{gtest-advanced,
  title        = {Advanced googletest Topics},
  author       = {{Google LLC.}},
  howpublished = {software},
  url          = {https://github.com/google/googletest/blob/master/docs/advanced.md},
  urldate      = {2021-04-27}
}


@online{cusolver,
  title   = {cuSOLVER},
  author  = {{NVIDIA Corporation}},
  url     = {https://docs.nvidia.com/cuda/cusolver/index.html},
  urldate = {2021-04-30}
}

@online{cudnn,
  title   = {cuDNN Developer Guide},
  author  = {{NVIDIA Corporation}},
  url     = {https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html},
  urldate = {2021-04-30}
}

@online{moderngpuarch,
  title   = {An Introduction to Modern GPU Architecture},
  author  = {Rege, Ashu},
  url     = {http://download.nvidia.com/developer/cuda/seminar/TDCI_Arch.pdf},
  urldate = {2021-04-30}
}

@manual{nvidiaampere,
  title   = {NVIDIA Ampere GA102 GPU Architecture},
  author  = {{NVIDIA Corporation}},
  url     = {https://www.nvidia.com/content/PDF/nvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.pdf},
  urldate = {2021-05-02}
}

@inproceedings{8425196,
  author    = {Ashkiani, Saman and Farach-Colton, Martin and Owens, John D.},
  booktitle = {2018 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
  title     = {A Dynamic Hash Table for the GPU},
  year      = {2018},
  volume    = {},
  number    = {},
  pages     = {419-429},
  doi       = {10.1109/IPDPS.2018.00052}
}

@inproceedings{harmonia,
  author    = {Yan, Zhaofeng and Lin, Yuzhe and Peng, Lu and Zhang, Weihua},
  title     = {Harmonia: A High Throughput B+tree for GPUs},
  year      = {2019},
  isbn      = {9781450362252},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3293883.3295704},
  doi       = {10.1145/3293883.3295704},
  abstract  = {B+tree is one of the most important data structures and has been widely used in different fields. With the increase of concurrent queries and data-scale in storage, designing an efficient B+tree structure has become critical. Due to abundant computation resources, GPUs provide potential opportunities to achieve high query throughput for B+tree. However, prior methods cannot achieve satisfactory performance results due to low resource utilization and poor memory performance.In this paper, we first identify the gaps between B+tree and GPUs. Concurrent B+tree queries involve many global memory accesses and different divergences, which mismatch with GPU features. Based on this observation, we propose Harmonia, a novel B+tree structure to bridge the gap. In Harmonia, a B+tree structure is divided into a key region and a child region. The key region stores the nodes with its keys in a breadth-first order. The child region is organized as a prefix-sum array, which only stores each node's first child index in the key region. Since the prefix-sum child region is small and the children's index can be retrieved through index computations, most of it can be stored in on-chip caches, which can achieve good cache locality. To make it more efficient, Harmonia also includes two optimizations: partially-sorted aggregation and narrowed thread-group traversal, which can mitigate memory and warp divergence and improve resource utilization. Evaluations on a TITAN V GPU show that Harmonia can achieve up to 3.6 billion queries per second, which is about 3.4X faster than that of HB+Tree [39], a recent state-of-the-art GPU solution.},
  booktitle = {Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming},
  pages     = {133–144},
  numpages  = {12},
  keywords  = {GPU, high-throughput, B+tree},
  location  = {Washington, District of Columbia},
  series    = {PPoPP '19}
}

@inproceedings{hb+tree,
  author    = {Shahvarani, Amirhesam and Jacobsen, Hans-Arno},
  title     = {A Hybrid B+-Tree as Solution for In-Memory Indexing on CPU-GPU Heterogeneous Computing Platforms},
  year      = {2016},
  isbn      = {9781450335317},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/2882903.2882918},
  doi       = {10.1145/2882903.2882918},
  abstract  = {An in-memory indexing tree is a critical component of many databases. Modern many-core processors, such as GPUs, are offering tremendous amounts of computing power making them an attractive choice for accelerating indexing. However, the memory available to the accelerating co-processor is rather limited and expensive in comparison to the memory available to the CPU. This drawback is a barrier to exploit the computing power of co-processors for arbitrarily large index trees. In this paper, we propose a novel design for a B+-tree based on the heterogeneous computing platform and the hybrid memory architecture found in GPUs. We propose a hybrid CPU-GPU B+-tree, "HB+-tree," which targets high search throughput use cases. Unique to our design is the joint and simultaneous use of computing and memory resources of CPU-GPU systems. Our experiments show that our HB+-tree can perform up to 240 million index queries per second, which is 2.4X higher than our CPU-optimized solution.},
  booktitle = {Proceedings of the 2016 International Conference on Management of Data},
  pages     = {1523–1538},
  numpages  = {16},
  keywords  = {indexing, heterogeneous computing, in-memory database, B+-tree},
  location  = {San Francisco, California, USA},
  series    = {SIGMOD '16}
}

@inproceedings{openbw-tree,
  author    = {Wang, Ziqi and Pavlo, Andrew and Lim, Hyeontaek and Leis, Viktor and Zhang, Huanchen and Kaminsky, Michael and Andersen, David G.},
  title     = {Building a Bw-Tree Takes More Than Just Buzz Words},
  year      = {2018},
  isbn      = {9781450347037},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3183713.3196895},
  doi       = {10.1145/3183713.3196895},
  abstract  = {In 2013, Microsoft Research proposed the Bw-Tree (humorously termed the "Buzz Word Tree''), a lock-free index that provides high throughput for transactional database workloads in SQL Server's Hekaton engine. The Buzz Word Tree avoids locks by appending delta record to tree nodes and using an indirection layer that allows it to atomically update physical pointers using compare-and-swap (CaS). Correctly implementing this techniques requires careful attention to detail. Unfortunately, the Bw-Tree papers from Microsoft are missing important details and the source code has not been released.This paper has two contributions: First, it is the missing guide for how to build a lock-free Bw-Tree. We clarify missing points in Microsoft's original design documents and then present techniques to improve the index's performance. Although our focus here is on the Bw-Tree, many of our methods apply more broadly to designing and implementing future lock-free in-memory data structures. Our experimental evaluation shows that our optimized variant achieves 1.1--2.5\texttimes{} better performance than the original Microsoft proposal for highly concurrent workloads. Second, our evaluation shows that despite our improvements, the Bw-Tree still does not perform as well as other concurrent data structures that use locks.},
  booktitle = {Proceedings of the 2018 International Conference on Management of Data},
  pages     = {473–488},
  numpages  = {16},
  keywords  = {multicore, lock-free, bw-tree, main memory oltp, database index},
  location  = {Houston, TX, USA},
  series    = {SIGMOD '18}
}

@inproceedings{bw-tree,
  author    = {Levandoski, Justin J. and Lomet, David B. and Sengupta, Sudipta},
  title     = {The Bw-Tree: A B-Tree for New Hardware Platforms},
  year      = {2013},
  isbn      = {9781467349093},
  publisher = {IEEE Computer Society},
  address   = {USA},
  url       = {https://doi.org/10.1109/ICDE.2013.6544834},
  doi       = {10.1109/ICDE.2013.6544834},
  abstract  = {The emergence of new hardware and platforms has led to reconsideration of how data management systems are designed. However, certain basic functions such as key indexed access to records remain essential. While we exploit the common architectural layering of prior systems, we make radically new design decisions about each layer. Our new form of B-tree, called the Bw-tree achieves its very high performance via a latch-free approach that effectively exploits the processor caches of modern multi-core chips. Our storage manager uses a unique form of log structuring that blurs the distinction between a page and a record store and works well with flash storage. This paper describes the architecture and algorithms for the Bw-tree, focusing on the main memory aspects. The paper includes results of our experiments that demonstrate that this fresh approach produces outstanding performance.},
  booktitle = {Proceedings of the 2013 IEEE International Conference on Data Engineering (ICDE 2013)},
  pages     = {302–313},
  numpages  = {12},
  series    = {ICDE '13}
}

@article{fast,
  author     = {Kim, Changkyu and Chhugani, Jatin and Satish, Nadathur and Sedlar, Eric and Nguyen, Anthony D. and Kaldewey, Tim and Lee, Victor W. and Brandt, Scott A. and Dubey, Pradeep},
  title      = {Designing Fast Architecture-Sensitive Tree Search on Modern Multicore/Many-Core Processors},
  year       = {2011},
  issue_date = {December 2011},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = {36},
  number     = {4},
  issn       = {0362-5915},
  url        = {\url{https://doi.org/10.1145/2043652.2043655}},
  doi        = {10.1145/2043652.2043655},
  abstract   = {In-memory tree structured index search is a fundamental database operation. Modern processors provide tremendous computing power by integrating multiple cores, each with wide vector units. There has been much work to exploit modern processor architectures for database primitives like scan, sort, join, and aggregation. However, unlike other primitives, tree search presents significant challenges due to irregular and unpredictable data accesses in tree traversal. In this article, we present FAST, an extremely fast architecture-sensitive layout of the index tree. FAST is a binary tree logically organized to optimize for architecture features like page size, cache line size, and Single Instruction Multiple Data (SIMD) width of the underlying hardware. FAST eliminates the impact of memory latency, and exploits thread-level and data-level parallelism on both CPUs and GPUs to achieve 50 million (CPU) and 85 million (GPU) queries per second for large trees of 64M elements, with even better results on smaller trees. These are 5X (CPU) and 1.7X (GPU) faster than the best previously reported performance on the same architectures. We also evaluated FAST on the Intel$^tinytextregistered$ Many Integrated Core architecture (Intel$^tinytextregistered$ MIC), showing a speedup of 2.4X--3X over CPU and 1.8X--4.4X over GPU. FAST supports efficient bulk updates by rebuilding index trees in less than 0.1 seconds for datasets as large as 64M keys and naturally integrates compression techniques, overcoming the memory bandwidth bottleneck and achieving a 6X performance improvement over uncompressed index search for large keys on CPUs.},
  journal    = {ACM Trans. Database Syst.},
  month      = dec,
  articleno  = {22},
  numpages   = {34},
  keywords   = {Tree search, many-core, GPU, multicore, compression, single instruction multiple data (SIMD), CPU}
}

@article{palm,
  author     = {Sewall, Jason and Chhugani, Jatin and Kim, Changkyu and Satish, Nadathur and Dubey, Pradeep},
  title      = {PALM: Parallel Architecture-Friendly Latch-Free Modifications to B+ Trees on Many-Core Processors},
  year       = {2011},
  issue_date = {August 2011},
  publisher  = {VLDB Endowment},
  volume     = {4},
  number     = {11},
  issn       = {2150-8097},
  url        = {https://doi.org/10.14778/3402707.3402719},
  doi        = {10.14778/3402707.3402719},
  abstract   = {Concurrency control on B+ trees is primarily achieved with latches, but serialization and contention can hinder scalability. As core counts on current processors increase, it is imperative to develop scalable latch-free techniques for concurrency control.We present PALM, a novel technique for performing multiple concurrent queries on in-memory B+ trees. PALM is based on the Bulk Synchronous Parallel model, which guarantees freedom from deadlocks and race conditions. Input queries are grouped and processed in atomic batches, and work proceeds in stages that preclude contention. Transitions between stages are accomplished with scalable point-to-point communication. PALM exploits data-and thread-level parallelism on modern many-core architectures, and performs 40M1 updates/second on trees with 128M keys, and 128M updates/second on trees with 512K keys on the latest CPU architectures. Our throughput is 2.3X--19X that of state-of-the-art concurrent update algorithms on in-memory B+ trees. PALM obtains close to peak throughput at very low response times of less than 350μs, even for large trees. We also evaluate PALM on the Intel® Many Integrated Core (Intel® MIC) architecture, and demonstrate a speedup of 1.5--2.1X for out-of-cache tree sizes on an Intel® Knights Ferry over a pair of Intel® Xeon® processors DP X5680 (Westmere-EP) in a dual-socket configuration.},
  journal    = {Proc. VLDB Endow.},
  month      = aug,
  pages      = {795–806},
  numpages   = {12}
}

@misc{TLX,
  title        = {{TLX}: Collection of Sophisticated {C++} Data Structures, Algorithms, and Miscellaneous Helpers},
  author       = {Bingmann, Timo},
  howpublished = {software},
  year         = 2018,
  url          = {https://panthema.net/tlx},
  urldate      = {2021-06-24}
}

@misc{palm-impl,
  title        = {An implementation of Intel's concurrent B+Tree (Palm Tree)},
  author       = {Xian, Ran and Zhu, Runshen},
  howpublished = {software},
  url          = {https://github.com/runshenzhu/palmtree},
  urldate      = {2021-06-24}
}

@misc{stx-b+tree,
  title        = {STX B+ Tree C++ Template Classes v0.9},
  author       = {Bingmann, Timo},
  howpublished = {software},
  url          = {https://github.com/bingmann/stx-btree},
  urldate      = {2021-06-24}
}

@misc{bwtree-impl,
  title   = {Open BwTree: An open sourced implementation of Bw-Tree in SQL Server Hekaton},
  author  = {Wang, Ziqi and Zhu, Runshen},
  url     = {https://github.com/wangziqi2013/BwTree},
  urldate = {2021-06-24}
}

@manual{cppreference-map,
  title   = {std::map},
  author  = {{cppreference.com}},
  url     = {https://en.cppreference.com/w/cpp/container/map},
  urldate = {2021-06-24}
}

@manual{postgresql,
  title        = {PostgreSQL: Documentation: 13: Implementation},
  author       = {Oberhuber, Tomáš and Klinkovský, Jakub and Wodecki, Aleš},
  howpublished = {software},
  url          = {https://www.postgresql.org/docs/13/btree-implementation.html},
  urldate      = {2021-04-27}
}

@inproceedings{vinkler2015register,
  title        = {Register efficient dynamic memory allocator for GPUs},
  author       = {Vinkler, Marek and Havran, Vlastimil},
  booktitle    = {Computer Graphics Forum},
  volume       = {34},
  number       = {8},
  pages        = {143--154},
  year         = {2015},
  organization = {Wiley Online Library}
}

@article{btrfs,
  author     = {Rodeh, Ohad and Bacik, Josef and Mason, Chris},
  title      = {BTRFS: The Linux B-Tree Filesystem},
  year       = {2013},
  issue_date = {August 2013},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = {9},
  number     = {3},
  issn       = {1553-3077},
  url        = {https://doi.org/10.1145/2501620.2501623},
  doi        = {10.1145/2501620.2501623},
  abstract   = {BTRFS is a Linux filesystem that has been adopted as the default filesystem in some popular versions of Linux. It is based on copy-on-write, allowing for efficient snapshots and clones. It uses B-trees as its main on-disk data structure. The design goal is to work well for many use cases and workloads. To this end, much effort has been directed to maintaining even performance as the filesystem ages, rather than trying to support a particular narrow benchmark use-case.Linux filesystems are installed on smartphones as well as enterprise servers. This entails challenges on many different fronts.---Scalability. The filesystem must scale in many dimensions: disk space, memory, and CPUs.---Data integrity. Losing data is not an option, and much effort is expended to safeguard the content. This includes checksums, metadata duplication, and RAID support built into the filesystem.---Disk diversity. The system should work well with SSDs and hard disks. It is also expected to be able to use an array of different sized disks, which poses challenges to the RAID and striping mechanisms.This article describes the core ideas, data structures, and algorithms of this filesystem. It sheds light on the challenges posed by defragmentation in the presence of snapshots, and the tradeoffs required to maintain even performance in the face of a wide spectrum of workloads.},
  journal    = {ACM Trans. Storage},
  month      = aug,
  articleno  = {9},
  numpages   = {32},
  keywords   = {concurrency, shadowing, copy-on-write, B-trees, snapshots, filesystem, RAID}
}

@manual{react,
  title        = {React: A JavaScript library for building user interfaces},
  author       = {{Facebook Inc.}},
  howpublished = {software},
  url          = {https://reactjs.org/},
  urldate      = {2021-04-27}
}

@inproceedings{olap,
  author    = {Gupta, H. and Harinarayan, V. and Rajaraman, A. and Ullman, J.D.},
  booktitle = {Proceedings 13th International Conference on Data Engineering},
  title     = {Index selection for OLAP},
  year      = {1997},
  pages     = {208-219},
  doi       = {10.1109/ICDE.1997.581755}
}

@article{cuda-g80,
  author       = {Shimpi, Anand Lal and Wilson, Derek},
  title        = {NVIDIA's GeForce 8800 (G80): GPUs Re-architected for DirectX 10},
  date         = {2006-11-08},
  journal      = {{Anandtech}},
  url          = {https://www.anandtech.com/show/2116/8},
  urldate      = {2021-04-27}
}