diff --git a/src/Makefile b/src/Makefile index 020b70d6d5..05fd3917f0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -423,7 +423,7 @@ endif ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o hashset.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX) diff --git a/src/acl.c b/src/acl.c index 688820fd89..52263185eb 100644 --- a/src/acl.c +++ b/src/acl.c @@ -652,14 +652,14 @@ void ACLChangeSelectorPerm(aclSelector *selector, struct serverCommand *cmd, int unsigned long id = cmd->id; ACLSetSelectorCommandBit(selector, id, allow); ACLResetFirstArgsForCommand(selector, id); - if (cmd->subcommands_dict) { - dictEntry *de; - dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *sub = (struct serverCommand *)dictGetVal(de); + if (cmd->subcommands_set) { + hashsetIterator iter; + hashsetInitSafeIterator(&iter, cmd->subcommands_set); + struct serverCommand *sub; + while (hashsetNext(&iter, (void **)&sub)) { ACLSetSelectorCommandBit(selector, sub->id, allow); } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } } @@ -669,19 +669,19 @@ void ACLChangeSelectorPerm(aclSelector *selector, struct serverCommand *cmd, int * value. Since the category passed by the user may be non existing, the * function returns C_ERR if the category was not found, or C_OK if it was * found and the operation was performed. */ -void ACLSetSelectorCommandBitsForCategory(dict *commands, aclSelector *selector, uint64_t cflag, int value) { - dictIterator *di = dictGetIterator(commands); - dictEntry *de; - while ((de = dictNext(di)) != NULL) { - struct serverCommand *cmd = dictGetVal(de); +void ACLSetSelectorCommandBitsForCategory(hashset *commands, aclSelector *selector, uint64_t cflag, int value) { + hashsetIterator iter; + hashsetInitIterator(&iter, commands); + struct serverCommand *cmd; + while (hashsetNext(&iter, (void **)&cmd)) { if (cmd->acl_categories & cflag) { ACLChangeSelectorPerm(selector, cmd, value); } - if (cmd->subcommands_dict) { - ACLSetSelectorCommandBitsForCategory(cmd->subcommands_dict, selector, cflag, value); + if (cmd->subcommands_set) { + ACLSetSelectorCommandBitsForCategory(cmd->subcommands_set, selector, cflag, value); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* This function is responsible for recomputing the command bits for all selectors of the existing users. @@ -732,26 +732,26 @@ int ACLSetSelectorCategory(aclSelector *selector, const char *category, int allo return C_OK; } -void ACLCountCategoryBitsForCommands(dict *commands, +void ACLCountCategoryBitsForCommands(hashset *commands, aclSelector *selector, unsigned long *on, unsigned long *off, uint64_t cflag) { - dictIterator *di = dictGetIterator(commands); - dictEntry *de; - while ((de = dictNext(di)) != NULL) { - struct serverCommand *cmd = dictGetVal(de); + hashsetIterator iter; + hashsetInitIterator(&iter, commands); + struct serverCommand *cmd; + while (hashsetNext(&iter, (void **)&cmd)) { if (cmd->acl_categories & cflag) { if (ACLGetSelectorCommandBit(selector, cmd->id)) (*on)++; else (*off)++; } - if (cmd->subcommands_dict) { - ACLCountCategoryBitsForCommands(cmd->subcommands_dict, selector, on, off, cflag); + if (cmd->subcommands_set) { + ACLCountCategoryBitsForCommands(cmd->subcommands_set, selector, on, off, cflag); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* Return the number of commands allowed (on) and denied (off) for the user 'u' @@ -1163,7 +1163,7 @@ int ACLSetSelector(aclSelector *selector, const char *op, size_t oplen) { return C_ERR; } - if (cmd->subcommands_dict) { + if (cmd->subcommands_set) { /* If user is trying to allow a valid subcommand we can just add its unique ID */ cmd = ACLLookupCommand(op + 1); if (cmd == NULL) { @@ -2754,22 +2754,22 @@ sds getAclErrorMessage(int acl_res, user *user, struct serverCommand *cmd, sds e * ==========================================================================*/ /* ACL CAT category */ -void aclCatWithFlags(client *c, dict *commands, uint64_t cflag, int *arraylen) { - dictEntry *de; - dictIterator *di = dictGetIterator(commands); +void aclCatWithFlags(client *c, hashset *commands, uint64_t cflag, int *arraylen) { + hashsetIterator iter; + hashsetInitIterator(&iter, commands); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *cmd = dictGetVal(de); + struct serverCommand *cmd; + while (hashsetNext(&iter, (void **)&cmd)) { if (cmd->acl_categories & cflag) { addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname)); (*arraylen)++; } - if (cmd->subcommands_dict) { - aclCatWithFlags(c, cmd->subcommands_dict, cflag, arraylen); + if (cmd->subcommands_set) { + aclCatWithFlags(c, cmd->subcommands_set, cflag, arraylen); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* Add the formatted response from a single selector to the ACL GETUSER diff --git a/src/config.c b/src/config.c index 663cf5da38..1d7fadfe73 100644 --- a/src/config.c +++ b/src/config.c @@ -532,7 +532,6 @@ void loadServerConfigFromString(char *config) { loadServerConfig(argv[1], 0, NULL); } else if (!strcasecmp(argv[0], "rename-command") && argc == 3) { struct serverCommand *cmd = lookupCommandBySds(argv[1]); - int retval; if (!cmd) { err = "No such command in rename-command"; @@ -541,16 +540,13 @@ void loadServerConfigFromString(char *config) { /* If the target command name is the empty string we just * remove it from the command table. */ - retval = dictDelete(server.commands, argv[1]); - serverAssert(retval == DICT_OK); + serverAssert(hashsetDelete(server.commands, argv[1])); /* Otherwise we re-add the command under a different name. */ if (sdslen(argv[2]) != 0) { - sds copy = sdsdup(argv[2]); - - retval = dictAdd(server.commands, copy, cmd); - if (retval != DICT_OK) { - sdsfree(copy); + sdsfree(cmd->fullname); + cmd->fullname = sdsdup(argv[2]); + if (!hashsetAdd(server.commands, cmd)) { err = "Target command name already exists"; goto loaderr; } diff --git a/src/hashset.c b/src/hashset.c new file mode 100644 index 0000000000..707604b30f --- /dev/null +++ b/src/hashset.c @@ -0,0 +1,1700 @@ +/* + * Copyright Valkey Contributors. + * All rights reserved. + * SPDX-License-Identifier: BSD 3-Clause + */ + +/* Hashset + * ======= + * + * This is an implementation of an open addressing hash table with cache-line + * sized buckets. It's designed for speed and low memory overhead. It provides + * lookups using a single memory access in most cases and it provides the + * following features: + * + * - Incremental rehashing using two tables. + * + * - Stateless iteration using 'scan'. + * + * - A hash table contains pointer-sized elements rather than key-value entries. + * Using it as a set is straight-forward. Using it as a key-value store requires + * combining key and value in an object and inserting this object into the + * hash table. A callback for fetching the key from within the element is + * provided by the caller when creating the hash table. + * + * - The element type, key type, hash function and other properties are + * configurable as callbacks in a 'type' structure provided when creating a + * hash table. + * + * Conventions + * ----------- + * + * Functions and types are prefixed by "hashset", macros by "HASHSET". Internal + * names don't use the prefix. Internal functions are 'static'. + * + * Credits + * ------- + * + * - The design of the cache-line aware open addressing scheme is inspired by + * tricks used in 'Swiss tables' (Sam Benzaquen, Alkis Evlogimenos, Matt + * Kulukundis, and Roman Perepelitsa et. al.). + * + * - The incremental rehashing using two tables, though for a chaining hash + * table, was designed by Salvatore Sanfilippo. + * + * - The original scan algorithm (for a chained hash table) was designed by + * Pieter Noordhuis. + * + * - The incremental rehashing and the scan algorithm were adapted for the open + * addressing scheme, including the use of linear probing by scan cursor + * increment, by Viktor Söderqvist. */ +#include "hashset.h" +#include "serverassert.h" +#include "zmalloc.h" +#include "mt19937-64.h" +#include "monotonic.h" + +#include +#include +#include +#include +#include +#include + +/* The default hashing function uses the SipHash implementation in siphash.c. */ + +uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k); +uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k); + +/* --- Global variables --- */ + +static uint8_t hash_function_seed[16]; +static hashsetResizePolicy resize_policy = HASHSET_RESIZE_ALLOW; + +/* --- Fill factor --- */ + +/* We use a soft and a hard limit for the minimum and maximum fill factor. The + * hard limits are used when resizing should be avoided, according to the resize + * policy. Resizing is typically to be avoided when we have forked child process + * running. Then, we don't want to move too much memory around, since the fork + * is using copy-on-write. + * + * With open addressing, the physical fill factor limit is 100% (probes the + * whole table) so we may need to expand even if when it's preferred to avoid + * it. Even if we resize and start inserting new elements in the new table, we + * can avoid actively moving elements from the old table to the new table. When + * the resize policy is AVOID, we perform a step of incremental rehashing only + * on insertions and not on lookups. */ + +#define MAX_FILL_PERCENT_SOFT 77 +#define MAX_FILL_PERCENT_HARD 90 + +#define MIN_FILL_PERCENT_SOFT 13 +#define MIN_FILL_PERCENT_HARD 3 + +/* --- Hash function API --- */ + +/* The seed needs to be 16 bytes. */ +void hashsetSetHashFunctionSeed(const uint8_t *seed) { + memcpy(hash_function_seed, seed, sizeof(hash_function_seed)); +} + +uint8_t *hashsetGetHashFunctionSeed(void) { + return hash_function_seed; +} + +uint64_t hashsetGenHashFunction(const char *buf, size_t len) { + return siphash((const uint8_t *)buf, len, hash_function_seed); +} + +uint64_t hashsetGenCaseHashFunction(const char *buf, size_t len) { + return siphash_nocase((const uint8_t *)buf, len, hash_function_seed); +} + +/* --- Global resize policy API --- */ + +/* The global resize policy is one of + * + * - HASHSET_RESIZE_ALLOW: Rehash as required for optimal performance. + * + * - HASHSET_RESIZE_AVOID: Don't rehash and move memory if it can be avoided; + * used when there is a fork running and we want to avoid affecting + * copy-on-write memory. + * + * - HASHSET_RESIZE_FORBID: Don't rehash at all. Used in a child process which + * doesn't add any keys. + * + * Incremental rehashing works in the following way: A new table is allocated + * and elements are incrementally moved from the old to the new table. + * + * To avoid affecting copy-on-write , we avoids rehashing when there is a forked + * child process. + * + * With an open addressing scheme, we can't completely forbid resizing the table + * if we want to be able to insert elements. It's impossible to insert more + * elements than the number of slots, so we need to allow resizing even if the + * resize policy is set to HASHSET_RESIZE_AVOID, but we resize with incremental + * rehashing paused, so new elements are added to the new table and the old + * elements are rehashed only when the child process is done. + * + * This also means that we may need to resize even if rehashing is already + * started and paused. In the worst case, we need to resize multiple times while + * a child process is running. We fast-forward the rehashing in this case. */ +void hashsetSetResizePolicy(hashsetResizePolicy policy) { + resize_policy = policy; +} + +/* --- Hash table layout --- */ + +#if SIZE_MAX == UINT64_MAX /* 64-bit version */ + +#define ELEMENTS_PER_BUCKET 7 + +/* Selecting the number of buckets. + * + * When resizing the table, we want to select an appropriate number of buckets + * without an expensive division. Division by a power of two is cheap, but any + * other division is expensive. We pick a fill factor to make division cheap for + * our choice of ELEMENTS_PER_BUCKET. + * + * The number of buckets we want is NUM_ELEMENTS / (ELEMENTS_PER_BUCKET * FILL_FACTOR), + * rounded up. The fill is the number of elements we have, or want to put, in + * the table. + * + * Instead of the above fraction, we multiply by an integer BUCKET_FACTOR and + * divide by a power-of-two BUCKET_DIVISOR. This gives us a fill factor of at + * most MAX_FILL_PERCENT_SOFT, the soft limit for expanding. + * + * NUM_BUCKETS = ceil(NUM_ELEMENTS * BUCKET_FACTOR / BUCKET_DIVISOR) + * + * This gives us + * + * FILL_FACTOR = NUM_ELEMENTS / (NUM_BUCKETS * ELEMENTS_PER_BUCKET) + * = 1 / (BUCKET_FACTOR / BUCKET_DIVISOR) / ELEMENTS_PER_BUCKET + * = BUCKET_DIVISOR / BUCKET_FACTOR / ELEMENTS_PER_BUCKET + */ + +#define BUCKET_FACTOR 3 +#define BUCKET_DIVISOR 16 +/* When resizing, we get a fill of at most 76.19% (16 / 3 / 7). */ + +#define randomSizeT() ((size_t)genrand64_int64()) + +#elif SIZE_MAX == UINT32_MAX /* 32-bit version */ + +#define ELEMENTS_PER_BUCKET 12 +#define BUCKET_FACTOR 7 +#define BUCKET_DIVISOR 64 +/* When resizing, we get a fill of at most 76.19% (64 / 7 / 12). */ + +#define randomSizeT() ((size_t)random()) + +#else +#error "Only 64-bit or 32-bit architectures are supported" +#endif /* 64-bit vs 32-bit version */ + +#ifndef static_assert +#define static_assert _Static_assert +#endif + +static_assert(100 * BUCKET_DIVISOR / BUCKET_FACTOR / ELEMENTS_PER_BUCKET <= MAX_FILL_PERCENT_SOFT, + "Expand must result in a fill below the soft max fill factor"); +static_assert(MAX_FILL_PERCENT_SOFT <= MAX_FILL_PERCENT_HARD, "Soft vs hard fill factor"); +static_assert(MAX_FILL_PERCENT_HARD < 100, "Hard fill factor must be below 100%"); + +/* --- Random element --- */ + +#define FAIR_RANDOM_SAMPLE_SIZE (ELEMENTS_PER_BUCKET * 40) +#define WEAK_RANDOM_SAMPLE_SIZE ELEMENTS_PER_BUCKET + +/* --- Types --- */ + +/* Open addressing scheme + * ---------------------- + * + * We use an open addressing scheme, with buckets of 64 bytes (one cache line). + * Each bucket contains metadata and element slots for a fixed number of + * elements. In a 64-bit system, there are up to 7 elements per bucket. These + * are unordered and an element can be inserted in any of the free slots. + * Additionally, the bucket contains metadata for the elements. This includes a + * few bits of the hash of the key of each element, which are used to rule out + * false negatives when looking up elements. + * + * The bucket metadata contains a bit that is set if the bucket has ever been + * full. This bit acts as a tombstone for the bucket and it's what we need to + * know if probing the next bucket is necessary. + * + * Bucket layout, 64-bit version, 7 elements per bucket: + * + * 1 bit 7 bits [1 byte] x 7 [8 bytes] x 7 = 64 bytes + * everfull presence hashes elements + * + * everfull: a shared tombstone; set if the bucket has ever been full + * presence: an bit per element slot indicating if an element present or not + * hashes: some bits of hash of each element to rule out false positives + * elements: the actual elements, typically pointers (pointer-sized) + * + * The 32-bit version has 12 elements and 19 unused bits per bucket: + * + * 1 bit 12 bits 3 bits [1 byte] x 12 2 bytes [4 bytes] x 12 + * everfull presence unused hashes unused elements + */ + +#if ELEMENTS_PER_BUCKET < 8 +#define BUCKET_BITS_TYPE uint8_t +#define BITS_NEEDED_TO_STORE_POS_WITHIN_BUCKET 3 +#elif ELEMENTS_PER_BUCKET < 16 +#define BUCKET_BITS_TYPE uint16_t +#define BITS_NEEDED_TO_STORE_POS_WITHIN_BUCKET 4 +#else +#error "Unexpected value of ELEMENTS_PER_BUCKET" +#endif + +typedef struct { + BUCKET_BITS_TYPE everfull : 1; + BUCKET_BITS_TYPE presence : ELEMENTS_PER_BUCKET; + uint8_t hashes[ELEMENTS_PER_BUCKET]; + void *elements[ELEMENTS_PER_BUCKET]; +} bucket; + +/* A key property is that the bucket size is one cache line. */ +static_assert(sizeof(bucket) == HASHSET_BUCKET_SIZE, "Bucket size mismatch"); + +struct hashset { + hashsetType *type; + ssize_t rehash_idx; /* -1 = rehashing not in progress. */ + bucket *tables[2]; /* 0 = main table, 1 = rehashing target. */ + size_t used[2]; /* Number of elements in each table. */ + int8_t bucket_exp[2]; /* Exponent for num buckets (num = 1 << exp). */ + int16_t pause_rehash; /* Non-zero = rehashing is paused */ + int16_t pause_auto_shrink; /* Non-zero = automatic resizing disallowed. */ + size_t everfulls[2]; /* Number of buckets with the everfull flag set. */ + void *metadata[]; +}; + +/* Struct used for stats functions. */ +struct hashsetStats { + int table_index; /* 0 or 1 (old or new while rehashing). */ + unsigned long buckets; /* Number of buckets. */ + unsigned long max_chain_len; /* Length of longest probing chain. */ + unsigned long probe_count; /* Number of buckets with probing flag. */ + unsigned long size; /* Number of element slots (incl. empty). */ + unsigned long used; /* Number of elements. */ + unsigned long *clvector; /* (Probing-)chain length vector; element i + * counts probing chains of length i. */ +}; + +/* Struct for sampling elements using scan, used by random key functions. */ + +typedef struct { + unsigned size; /* Size of the elements array. */ + unsigned count; /* Number of elements already sampled. */ + void **elements; /* Array of sampled elements. */ +} scan_samples; + +/* --- Internal functions --- */ + +static bucket *findBucketForInsert(hashset *s, uint64_t hash, int *pos_in_bucket, int *table_index); + +static inline void freeElement(hashset *s, void *element) { + if (s->type->elementDestructor) s->type->elementDestructor(s, element); +} + +static inline int compareKeys(hashset *s, const void *key1, const void *key2) { + if (s->type->keyCompare != NULL) { + return s->type->keyCompare(s, key1, key2); + } else { + return key1 != key2; + } +} + +static inline const void *elementGetKey(hashset *s, const void *element) { + if (s->type->elementGetKey != NULL) { + return s->type->elementGetKey(element); + } else { + return element; + } +} + +static inline uint64_t hashKey(hashset *s, const void *key) { + if (s->type->hashFunction != NULL) { + return s->type->hashFunction(key); + } else { + return hashsetGenHashFunction((const char *)&key, sizeof(key)); + } +} + +static inline uint64_t hashElement(hashset *s, const void *element) { + return hashKey(s, elementGetKey(s, element)); +} + + +/* For the hash bits stored in the bucket, we use the highest bits of the hash + * value, since these are not used for selecting the bucket. */ +static inline uint8_t highBits(uint64_t hash) { + return hash >> (CHAR_BIT * 7); +} + +static inline int bucketIsFull(bucket *b) { + return b->presence == (1 << ELEMENTS_PER_BUCKET) - 1; +} + +/* Returns non-zero if the position within the bucket is occupied. */ +static inline int isPositionFilled(bucket *b, int position) { + return b->presence & (1 << position); +} +static void resetTable(hashset *s, int table_idx) { + s->tables[table_idx] = NULL; + s->used[table_idx] = 0; + s->bucket_exp[table_idx] = -1; + s->everfulls[table_idx] = 0; +} + +static inline size_t numBuckets(int exp) { + return exp == -1 ? 0 : (size_t)1 << exp; +} + +/* Bitmask for masking the hash value to get bucket index. */ +static inline size_t expToMask(int exp) { + return exp == -1 ? 0 : numBuckets(exp) - 1; +} + +/* Returns the 'exp', where num_buckets = 1 << exp. The number of + * buckets is a power of two. */ +static signed char nextBucketExp(size_t min_capacity) { + if (min_capacity == 0) return -1; + /* ceil(x / y) = floor((x - 1) / y) + 1 */ + size_t min_buckets = (min_capacity * BUCKET_FACTOR - 1) / BUCKET_DIVISOR + 1; + if (min_buckets >= SIZE_MAX / 2) return CHAR_BIT * sizeof(size_t) - 1; + if (min_buckets == 1) return 0; + return CHAR_BIT * sizeof(size_t) - __builtin_clzl(min_buckets - 1); +} + +/* Swaps the tables and frees the old table. */ +static void rehashingCompleted(hashset *s) { + if (s->type->rehashingCompleted) s->type->rehashingCompleted(s); + if (s->tables[0]) zfree(s->tables[0]); + s->bucket_exp[0] = s->bucket_exp[1]; + s->tables[0] = s->tables[1]; + s->used[0] = s->used[1]; + s->everfulls[0] = s->everfulls[1]; + resetTable(s, 1); + s->rehash_idx = -1; +} + +/* Reverse bits, adapted to use bswap, from + * https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ +static size_t rev(size_t v) { +#if SIZE_MAX == UINT64_MAX + /* Swap odd and even bits. */ + v = ((v >> 1) & 0x5555555555555555) | ((v & 0x5555555555555555) << 1); + /* Swap consecutive pairs. */ + v = ((v >> 2) & 0x3333333333333333) | ((v & 0x3333333333333333) << 2); + /* Swap nibbles. */ + v = ((v >> 4) & 0x0F0F0F0F0F0F0F0F) | ((v & 0x0F0F0F0F0F0F0F0F) << 4); + /* Reverse bytes. */ + v = __builtin_bswap64(v); +#else + /* 32-bit version. */ + v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); + v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); + v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); + v = __builtin_bswap32(v); +#endif + return v; +} + +/* Advances a scan cursor to the next value. It increments the reverse bit + * representation of the masked bits of v. This algorithm was invented by Pieter + * Noordhuis. */ +size_t nextCursor(size_t v, size_t mask) { + v |= ~mask; /* Set the unmasked (high) bits. */ + v = rev(v); /* Reverse. The unmasked bits are now the low bits. */ + v++; /* Increment the reversed cursor, flipping the unmasked bits to + * 0 and increments the masked bits. */ + v = rev(v); /* Reverse the bits back to normal. */ + return v; +} + +/* The reverse of nextCursor. */ +static size_t prevCursor(size_t v, size_t mask) { + v = rev(v); + v--; + v = rev(v); + v = v & mask; + return v; +} + +/* Returns 1 if cursor A is less then cursor B, compared in cursor next/prev + * order, 0 otherwise. This function can be used to compare bucket indexes in + * probing order (since probing order is cursor order) and to check if a bucket + * has already been rehashed, since incremental rehashing is also performed in + * cursor order. */ +static inline int cursorIsLessThan(size_t a, size_t b) { + /* Since cursors are advanced in reversed-bits order, we can just reverse + * both numbers to compare them. If a cursor with more bits than the other, + * it is not significant, since the more significatnt bits become less + * significant when reversing. */ + return rev(a) < rev(b); +} + +/* Rehashes one bucket. */ +static void rehashStep(hashset *s) { + assert(hashsetIsRehashing(s)); + size_t idx = s->rehash_idx; + bucket *b = &s->tables[0][idx]; + int pos; + for (pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (!isPositionFilled(b, pos)) continue; /* empty */ + void *element = b->elements[pos]; + uint8_t h2 = b->hashes[pos]; + /* Insert into table 1. */ + uint64_t hash; + /* When shrinking, it's possible to avoid computing the hash. We can + * just use idx has the hash, but only if we know that probing didn't + * push this element away from its primary bucket, so only if the + * bucket before the current one hasn't ever been full. */ + if (s->bucket_exp[1] <= s->bucket_exp[0] && + !s->tables[0][prevCursor(idx, expToMask(s->bucket_exp[0]))].everfull) { + hash = idx; + } else { + hash = hashElement(s, element); + } + int pos_in_dst_bucket; + bucket *dst = findBucketForInsert(s, hash, &pos_in_dst_bucket, NULL); + dst->elements[pos_in_dst_bucket] = element; + dst->hashes[pos_in_dst_bucket] = h2; + dst->presence |= (1 << pos_in_dst_bucket); + if (!dst->everfull && bucketIsFull(dst)) { + dst->everfull = 1; + s->everfulls[1]++; + } + s->used[0]--; + s->used[1]++; + } + /* Mark the source bucket as empty. */ + b->presence = 0; + /* Bucket done. Advance to the next bucket in probing order. We rehash in + * this order to be able to skip already rehashed buckets in scan. */ + s->rehash_idx = nextCursor(s->rehash_idx, expToMask(s->bucket_exp[0])); + if (s->rehash_idx == 0) { + rehashingCompleted(s); + } +} + +/* Called internally on lookup and other reads to the table. */ +static inline void rehashStepOnReadIfNeeded(hashset *s) { + if (!hashsetIsRehashing(s) || s->pause_rehash) return; + if (resize_policy != HASHSET_RESIZE_ALLOW) return; + rehashStep(s); +} + +/* When inserting or deleting, we first do a find (read) and rehash one step if + * resize policy is set to ALLOW, so here we only do it if resize policy is + * AVOID. The reason for doing it on insert and delete is to ensure that we + * finish rehashing before we need to resize the table again. */ +static inline void rehashStepOnWriteIfNeeded(hashset *s) { + if (!hashsetIsRehashing(s) || s->pause_rehash) return; + if (resize_policy != HASHSET_RESIZE_AVOID) return; + rehashStep(s); +} + +/* Allocates a new table and initiates incremental rehashing if necessary. + * Returns 1 on resize (success), 0 on no resize (failure). If 0 is returned and + * 'malloc_failed' is provided, it is set to 1 if allocation failed. If + * 'malloc_failed' is not provided, an allocation failure triggers a panic. */ +static int resize(hashset *s, size_t min_capacity, int *malloc_failed) { + if (malloc_failed) *malloc_failed = 0; + + /* Adjust minimum size. We don't resize to zero currently. */ + if (min_capacity == 0) min_capacity = 1; + + /* Size of new table. */ + signed char exp = nextBucketExp(min_capacity); + size_t num_buckets = numBuckets(exp); + size_t new_capacity = num_buckets * ELEMENTS_PER_BUCKET; + if (new_capacity < min_capacity || num_buckets * sizeof(bucket) < num_buckets) { + /* Overflow */ + return 0; + } + + signed char old_exp = s->bucket_exp[hashsetIsRehashing(s) ? 1 : 0]; + size_t alloc_size = num_buckets * sizeof(bucket); + if (exp == old_exp) { + /* The only time we want to allow resize to the same size is when we + * have too many tombstones and need to rehash to improve probing + * performance. */ + if (hashsetIsRehashing(s)) return 0; + size_t old_num_buckets = numBuckets(s->bucket_exp[0]); + if (s->everfulls[0] < old_num_buckets / 2) return 0; + if (s->everfulls[0] != old_num_buckets && s->everfulls[0] < 10) return 0; + } else if (s->type->resizeAllowed) { + double fill_factor = (double)min_capacity / ((double)numBuckets(old_exp) * ELEMENTS_PER_BUCKET); + if (fill_factor * 100 < MAX_FILL_PERCENT_HARD && !s->type->resizeAllowed(alloc_size, fill_factor)) { + /* Resize callback says no. */ + return 0; + } + } + + /* We can't resize if rehashing is already ongoing. Fast-forward ongoing + * rehashing before we continue. */ + while (hashsetIsRehashing(s)) { + rehashStep(s); + } + + /* Allocate the new hash table. */ + bucket *new_table; + if (malloc_failed) { + new_table = ztrycalloc(alloc_size); + if (new_table == NULL) { + *malloc_failed = 1; + return 0; + } + } else { + new_table = zcalloc(alloc_size); + } + s->bucket_exp[1] = exp; + s->tables[1] = new_table; + s->used[1] = 0; + s->rehash_idx = 0; + if (s->type->rehashingStarted) s->type->rehashingStarted(s); + + /* If the old table was empty, the rehashing is completed immediately. */ + if (s->tables[0] == NULL || s->used[0] == 0) { + rehashingCompleted(s); + } else if (s->type->instant_rehashing) { + while (hashsetIsRehashing(s)) { + rehashStep(s); + } + } + return 1; +} + +/* Probing is slow when there are too long probing chains, i.e. too many + * tombstones. Resize to the same size to trigger rehashing. */ +static int cleanUpProbingChainsIfNeeded(hashset *s) { + if (hashsetIsRehashing(s) || resize_policy == HASHSET_RESIZE_FORBID) { + return 0; + } + if (s->everfulls[0] * 100 >= numBuckets(s->bucket_exp[0]) * MAX_FILL_PERCENT_SOFT) { + return resize(s, s->used[0], NULL); + } + return 0; +} + +/* Returns 1 if the table is expanded, 0 if not expanded. If 0 is returned and + * 'malloc_failed' is proveded, it is set to 1 if malloc failed and 0 + * otherwise. */ +static int expand(hashset *s, size_t size, int *malloc_failed) { + if (size < hashsetSize(s)) { + return 0; + } + return resize(s, size, malloc_failed); +} + +/* Finds an element matching the key. If a match is found, returns a pointer to + * the bucket containing the matching element and points 'pos_in_bucket' to the + * index within the bucket. Returns NULL if no matching element was found. + * + * If 'table_index' is provided, it is set to the index of the table (0 or 1) + * the returned bucket belongs to. */ +static bucket *findBucket(hashset *s, uint64_t hash, const void *key, int *pos_in_bucket, int *table_index) { + if (hashsetSize(s) == 0) return 0; + uint8_t h2 = highBits(hash); + int table; + + /* Do some incremental rehashing. */ + rehashStepOnReadIfNeeded(s); + + /* Check rehashing destination table first, since it is newer and typically + * has less 'everfull' flagged buckets. Therefore it needs less probing for + * lookup. */ + for (table = 1; table >= 0; table--) { + if (s->used[table] == 0) continue; + size_t mask = expToMask(s->bucket_exp[table]); + size_t bucket_idx = hash & mask; + size_t start_bucket_idx = bucket_idx; + while (1) { + bucket *b = &s->tables[table][bucket_idx]; + /* Find candidate elements with presence flag set and matching h2 hash. */ + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (isPositionFilled(b, pos) && b->hashes[pos] == h2) { + /* It's a candidate. */ + void *element = b->elements[pos]; + const void *elem_key = elementGetKey(s, element); + if (compareKeys(s, key, elem_key) == 0) { + /* It's a match. */ + assert(pos_in_bucket != NULL); + *pos_in_bucket = pos; + if (table_index) *table_index = table; + return b; + } + } + } + + /* Probe the next bucket? */ + if (!b->everfull) break; + bucket_idx = nextCursor(bucket_idx, mask); + if (bucket_idx == start_bucket_idx) { + /* We probed the whole table. It can happen that all buckets + * have the 'everfull' bit set. This can only happen for small + * tables and then rehashing is already in progress. */ + break; + } + } + } + return NULL; +} + +/* Find an empty position in the table for inserting an element with the given hash. */ +static bucket *findBucketForInsert(hashset *s, uint64_t hash, int *pos_in_bucket, int *table_index) { + int table = hashsetIsRehashing(s) ? 1 : 0; + assert(s->tables[table]); + size_t mask = expToMask(s->bucket_exp[table]); + size_t bucket_idx = hash & mask; + while (1) { + bucket *b = &s->tables[table][bucket_idx]; + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (isPositionFilled(b, pos)) continue; /* busy */ + assert(pos_in_bucket != NULL); + *pos_in_bucket = pos; + if (table_index) *table_index = table; + return b; + } + bucket_idx = nextCursor(bucket_idx, mask); + } +} + +/* Encode bucket_index, pos_in_bucket, table_index into an opaque pointer. */ +static void *encodePositionInTable(size_t bucket_index, int pos_in_bucket, int table_index) { + uintptr_t encoded = bucket_index; + encoded <<= BITS_NEEDED_TO_STORE_POS_WITHIN_BUCKET; + encoded |= pos_in_bucket; + encoded <<= 1; + encoded |= table_index; + encoded++; /* Add one to make sure we don't return NULL. */ + return (void *)encoded; +} + +/* Decodes a position in the table encoded using encodePositionInTable(). */ +static void decodePositionInTable(void *encoded_position, size_t *bucket_index, int *pos_in_bucket, int *table_index) { + uintptr_t encoded = (uintptr_t)encoded_position; + encoded--; + *table_index = encoded & 1; + encoded >>= 1; + *pos_in_bucket = encoded & ((1 << BITS_NEEDED_TO_STORE_POS_WITHIN_BUCKET) - 1); + encoded >>= BITS_NEEDED_TO_STORE_POS_WITHIN_BUCKET; + *bucket_index = encoded; +} + +/* Helper to insert an element. Doesn't check if an element with a matching key + * already exists. This must be ensured by the caller. */ +static void insert(hashset *s, uint64_t hash, void *element) { + hashsetExpandIfNeeded(s); + rehashStepOnWriteIfNeeded(s); + int pos_in_bucket; + int table_index; + bucket *b = findBucketForInsert(s, hash, &pos_in_bucket, &table_index); + b->elements[pos_in_bucket] = element; + b->presence |= (1 << pos_in_bucket); + b->hashes[pos_in_bucket] = highBits(hash); + s->used[table_index]++; + if (!b->everfull && bucketIsFull(b)) { + b->everfull = 1; + s->everfulls[table_index]++; + cleanUpProbingChainsIfNeeded(s); + } +} + +/* A 63-bit fingerprint of some of the state of the hash table. */ +static uint64_t hashsetFingerprint(hashset *s) { + uint64_t integers[6], hash = 0; + integers[0] = (uintptr_t)s->tables[0]; + integers[1] = s->bucket_exp[0]; + integers[2] = s->used[0]; + integers[3] = (uintptr_t)s->tables[1]; + integers[4] = s->bucket_exp[1]; + integers[5] = s->used[1]; + + /* Result = hash(hash(hash(int1)+int2)+int3) */ + for (int j = 0; j < 6; j++) { + hash += integers[j]; + /* Tomas Wang's 64 bit integer hash. */ + hash = (~hash) + (hash << 21); /* hash = (hash << 21) - hash - 1; */ + hash = hash ^ (hash >> 24); + hash = (hash + (hash << 3)) + (hash << 8); /* hash * 265 */ + hash = hash ^ (hash >> 14); + hash = (hash + (hash << 2)) + (hash << 4); /* hash * 21 */ + hash = hash ^ (hash >> 28); + hash = hash + (hash << 31); + } + + /* Clear the highest bit. We only want 63 bits. */ + hash &= 0x7fffffffffffffff; + return hash; +} + +/* Scan callback function used by hashsetGetSomeElements() for sampling elements + * using scan. */ +static void sampleElementsScanFn(void *privdata, void *element) { + scan_samples *samples = privdata; + if (samples->count < samples->size) { + samples->elements[samples->count++] = element; + } +} + +/* --- API functions --- */ + +/* Allocates and initializes a new hashtable specified by the given type. */ +hashset *hashsetCreate(hashsetType *type) { + size_t metasize = type->getMetadataSize ? type->getMetadataSize() : 0; + hashset *s = zmalloc(sizeof(*s) + metasize); + if (metasize > 0) { + memset(&s->metadata, 0, metasize); + } + s->type = type; + s->rehash_idx = -1; + s->pause_rehash = 0; + s->pause_auto_shrink = 0; + resetTable(s, 0); + resetTable(s, 1); + return s; +} + +/* Deletes all the elements. If a callback is provided, it is called from time + * to time to indicate progress. */ +void hashsetEmpty(hashset *s, void(callback)(hashset *)) { + if (hashsetIsRehashing(s)) { + /* Pretend rehashing completed. */ + if (s->type->rehashingCompleted) s->type->rehashingCompleted(s); + s->rehash_idx = -1; + } + for (int table_index = 0; table_index <= 1; table_index++) { + if (s->bucket_exp[table_index] < 0) { + continue; + } + if (s->type->elementDestructor) { + /* Call the destructor with each element. */ + for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table_index]); idx++) { + if (callback && (idx & 65535) == 0) callback(s); + bucket *b = &s->tables[table_index][idx]; + if (b->presence == 0) { + continue; + } + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (isPositionFilled(b, pos)) { + s->type->elementDestructor(s, b->elements[pos]); + } + } + } + } + zfree(s->tables[table_index]); + resetTable(s, table_index); + } +} + +/* Deletes all the elements and frees the table. */ +void hashsetRelease(hashset *s) { + hashsetEmpty(s, NULL); + zfree(s); +} + +/* Returns the type of the hashtable. */ +hashsetType *hashsetGetType(hashset *s) { + return s->type; +} + +/* Returns a pointer to the table's metadata (userdata) section. */ +void *hashsetMetadata(hashset *s) { + return &s->metadata; +} + +/* Returns the number of elements stored. */ +size_t hashsetSize(hashset *s) { + return s->used[0] + s->used[1]; +} + +/* Returns the number of hash table buckets. */ +size_t hashsetBuckets(hashset *s) { + return numBuckets(s->bucket_exp[0]) + numBuckets(s->bucket_exp[1]); +} + +/* Returns the number of buckets that have the probe flag (tombstone) set. */ +size_t hashsetProbeCounter(hashset *s, int table) { + return s->everfulls[table]; +} + +/* Returns the size of the hashset structures, in bytes (not including the sizes + * of the elements, if the elements are pointers to allocated objects). */ +size_t hashsetMemUsage(hashset *s) { + size_t num_buckets = numBuckets(s->bucket_exp[0]) + numBuckets(s->bucket_exp[1]); + size_t metasize = s->type->getMetadataSize ? s->type->getMetadataSize() : 0; + return sizeof(hashset) + metasize + sizeof(bucket) * num_buckets; +} + +/* Pauses automatic shrinking. This can be called before deleting a lot of + * elements, to prevent automatic shrinking from being triggered multiple times. + * Call hashtableResumeAutoShrink afterwards to restore automatic shrinking. */ +void hashsetPauseAutoShrink(hashset *s) { + s->pause_auto_shrink++; +} + +/* Re-enables automatic shrinking, after it has been paused. If you have deleted + * many elements while automatic shrinking was paused, you may want to call + * hashsetShrinkIfNeeded. */ +void hashsetResumeAutoShrink(hashset *s) { + s->pause_auto_shrink--; + if (s->pause_auto_shrink == 0) { + hashsetShrinkIfNeeded(s); + } +} + +/* Pauses incremental rehashing. */ +void hashsetPauseRehashing(hashset *s) { + s->pause_rehash++; +} + +/* Resumes incremental rehashing, after pausing it. */ +void hashsetResumeRehashing(hashset *s) { + s->pause_rehash--; +} + +/* Returns 1 if incremental rehashing is paused, 0 if it isn't. */ +int hashsetIsRehashingPaused(hashset *s) { + return s->pause_rehash > 0; +} + +/* Returns 1 if incremental rehashing is in progress, 0 otherwise. */ +int hashsetIsRehashing(hashset *s) { + return s->rehash_idx != -1; +} + +/* Provides the number of buckets in the old and new tables during rehashing. + * To get the sizes in bytes, multiply by HASHTAB_BUCKET_SIZE. This function can + * only be used when rehashing is in progress, and from the rehashingStarted and + * rehashingCompleted callbacks. */ +void hashsetRehashingInfo(hashset *s, size_t *from_size, size_t *to_size) { + assert(hashsetIsRehashing(s)); + *from_size = numBuckets(s->bucket_exp[0]); + *to_size = numBuckets(s->bucket_exp[1]); +} + +int hashsetRehashMicroseconds(hashset *s, uint64_t us) { + if (s->pause_rehash > 0) return 0; + if (resize_policy != HASHSET_RESIZE_ALLOW) return 0; + + monotime timer; + elapsedStart(&timer); + int rehashes = 0; + + while (hashsetIsRehashing(s)) { + rehashStep(s); + rehashes++; + if (rehashes % 128 == 0 && elapsedUs(timer) >= us) break; + } + return rehashes; +} + +/* Return 1 if expand was performed; 0 otherwise. */ +int hashsetExpand(hashset *s, size_t size) { + return expand(s, size, NULL); +} + +/* Returns 1 if expand was performed or if expand is not needed. Returns 0 if + * expand failed due to memory allocation failure. */ +int hashsetTryExpand(hashset *s, size_t size) { + int malloc_failed = 0; + return expand(s, size, &malloc_failed) || !malloc_failed; +} + +/* Expanding is done automatically on insertion, but less eagerly if resize + * policy is set to AVOID or FORBID. After restoring resize policy to ALLOW, you + * may want to call hashsetExpandIfNeeded. Returns 1 if expanding, 0 if not + * expanding. */ +int hashsetExpandIfNeeded(hashset *s) { + size_t min_capacity = s->used[0] + s->used[1] + 1; + size_t num_buckets = numBuckets(s->bucket_exp[hashsetIsRehashing(s) ? 1 : 0]); + size_t current_capacity = num_buckets * ELEMENTS_PER_BUCKET; + unsigned max_fill_percent = resize_policy == HASHSET_RESIZE_AVOID ? MAX_FILL_PERCENT_HARD : MAX_FILL_PERCENT_SOFT; + if (min_capacity * 100 <= current_capacity * max_fill_percent) { + return 0; + } + return resize(s, min_capacity, NULL); +} + +/* Shrinking is done automatically on deletion, but less eagerly if resize + * policy is set to AVOID and not at all if set to FORBID. After restoring + * resize policy to ALLOW, you may want to call hashsetShrinkIfNeeded. */ +int hashsetShrinkIfNeeded(hashset *s) { + /* Don't shrink if rehashing is already in progress. */ + if (hashsetIsRehashing(s) || resize_policy == HASHSET_RESIZE_FORBID) { + return 0; + } + size_t current_capacity = numBuckets(s->bucket_exp[0]) * ELEMENTS_PER_BUCKET; + unsigned min_fill_percent = resize_policy == HASHSET_RESIZE_AVOID ? MIN_FILL_PERCENT_HARD : MIN_FILL_PERCENT_SOFT; + if (s->used[0] * 100 > current_capacity * min_fill_percent) { + return 0; + } + return resize(s, s->used[0], NULL); +} + +/* Defragment the internal allocations of the hashset by reallocating them. The + * provided defragfn callback should either return NULL (if reallocation is not + * necessary) or reallocate the memory like realloc() would do. + * + * Returns NULL if the hashset's top-level struct hasn't been reallocated. + * Returns non-NULL if the top-level allocation has been allocated and thus + * making the 's' pointer invalid. */ +hashset *hashsetDefragInternals(hashset *s, void *(*defragfn)(void *)) { + /* The hashset struct */ + hashset *s1 = defragfn(s); + if (s1 != NULL) s = s1; + /* The tables */ + for (int i = 0; i <= 1; i++) { + if (s->tables[i] == NULL) continue; + void *table = defragfn(s->tables[i]); + if (table != NULL) s->tables[i] = table; + } + return s1; +} + +/* Returns 1 if an element was found matching the key. Also points *found to it, + * if found is provided. Returns 0 if no matching element was found. */ +int hashsetFind(hashset *s, const void *key, void **found) { + if (hashsetSize(s) == 0) return 0; + uint64_t hash = hashKey(s, key); + int pos_in_bucket = 0; + bucket *b = findBucket(s, hash, key, &pos_in_bucket, NULL); + if (b) { + if (found) *found = b->elements[pos_in_bucket]; + return 1; + } else { + return 0; + } +} + +/* Returns a pointer to where an element is stored within the hash table, or + * NULL if not found. To get the element, dereference the returned pointer. The + * pointer can be used to replace the element with an equivalent element (same + * key, same hash value), but note that the pointer may be invalidated by future + * accesses to the hash table due to incermental rehashing, so use with care. */ +void **hashsetFindRef(hashset *s, const void *key) { + if (hashsetSize(s) == 0) return NULL; + uint64_t hash = hashKey(s, key); + int pos_in_bucket = 0; + bucket *b = findBucket(s, hash, key, &pos_in_bucket, NULL); + return b ? &b->elements[pos_in_bucket] : NULL; +} + +/* Adds an element. Returns 1 on success. Returns 0 if there was already an element + * with the same key. */ +int hashsetAdd(hashset *s, void *element) { + return hashsetAddOrFind(s, element, NULL); +} + +/* Adds an element and returns 1 on success. Returns 0 if there was already an + * element with the same key and, if an 'existing' pointer is provided, it is + * pointed to the existing element. */ +int hashsetAddOrFind(hashset *s, void *element, void **existing) { + const void *key = elementGetKey(s, element); + uint64_t hash = hashKey(s, key); + int pos_in_bucket = 0; + bucket *b = findBucket(s, hash, key, &pos_in_bucket, NULL); + if (b != NULL) { + if (existing) *existing = b->elements[pos_in_bucket]; + return 0; + } else { + insert(s, hash, element); + return 1; + } +} + +/* Finds and returns the position within the hashset where an element with the + * given key should be inserted using hashsetInsertAtPosition. This is the first + * phase in a two-phase insert operation and it can be used if you want to avoid + * creating an element before you know if it already exists in the table or not, + * and without a separate lookup to the table. + * + * The returned pointer is opaque, but if it's NULL, it means that an element + * with the given key already exists in the table. + * + * If a non-NULL pointer is returned, this pointer can be passed as the + * 'position' argument to hashsetInsertAtPosition to insert an element. */ +void *hashsetFindPositionForInsert(hashset *s, void *key, void **existing) { + uint64_t hash = hashKey(s, key); + int pos_in_bucket, table_index; + bucket *b = findBucket(s, hash, key, &pos_in_bucket, NULL); + if (b != NULL) { + if (existing) *existing = b->elements[pos_in_bucket]; + return NULL; + } else { + hashsetExpandIfNeeded(s); + rehashStepOnWriteIfNeeded(s); + b = findBucketForInsert(s, hash, &pos_in_bucket, &table_index); + assert(!isPositionFilled(b, pos_in_bucket)); + + /* Store the hash bits now, so we don't need to compute the hash again + * when hashsetInsertAtPosition() is called. */ + b->hashes[pos_in_bucket] = highBits(hash); + + /* Compute bucket index from bucket pointer. */ + void *b0 = &s->tables[table_index][0]; + size_t bucket_index = ((uintptr_t)b - (uintptr_t)b0) / sizeof(bucket); + assert(&s->tables[table_index][bucket_index] == b); + + /* Encode position as pointer. */ + return encodePositionInTable(bucket_index, pos_in_bucket, table_index); + } +} + +/* Inserts an element at the position previously acquired using + * hashsetFindPositionForInsert(). The element must match the key provided when + * finding the position. You must not access the hashset in any way between + * hashsetFindPositionForInsert() and hashsetInsertAtPosition(), since even a + * hashsetFind() may cause incremental rehashing to move elements in memory. */ +void hashsetInsertAtPosition(hashset *s, void *element, void *position) { + /* Decode position. */ + size_t bucket_index; + int table_index, pos_in_bucket; + decodePositionInTable(position, &bucket_index, &pos_in_bucket, &table_index); + + /* Insert the element at this position. */ + bucket *b = &s->tables[table_index][bucket_index]; + assert(!isPositionFilled(b, pos_in_bucket)); + b->presence |= (1 << pos_in_bucket); + b->elements[pos_in_bucket] = element; + s->used[table_index]++; + /* Hash bits are already set by hashsetFindPositionForInsert. */ + if (!b->everfull && bucketIsFull(b)) { + b->everfull = 1; + s->everfulls[table_index]++; + cleanUpProbingChainsIfNeeded(s); + } +} + +/* Add or overwrite. Returns 1 if an new element was inserted, 0 if an existing + * element was overwritten. */ +int hashsetReplace(hashset *s, void *element) { + const void *key = elementGetKey(s, element); + int pos_in_bucket = 0; + uint64_t hash = hashKey(s, key); + bucket *b = findBucket(s, hash, key, &pos_in_bucket, NULL); + if (b != NULL) { + freeElement(s, b->elements[pos_in_bucket]); + b->elements[pos_in_bucket] = element; + return 0; + } else { + insert(s, hash, element); + return 1; + } +} + +/* Removes the element with the matching key and returns it. The element + * destructor is not called. Returns 1 and points 'popped' to the element if a + * matching element was found. Returns 0 if no matching element was found. */ +int hashsetPop(hashset *s, const void *key, void **popped) { + if (hashsetSize(s) == 0) return 0; + uint64_t hash = hashKey(s, key); + int pos_in_bucket = 0; + int table_index = 0; + bucket *b = findBucket(s, hash, key, &pos_in_bucket, &table_index); + if (b) { + if (popped) *popped = b->elements[pos_in_bucket]; + b->presence &= ~(1 << pos_in_bucket); + s->used[table_index]--; + hashsetShrinkIfNeeded(s); + return 1; + } else { + return 0; + } +} + +/* Deletes the element with the matching key. Returns 1 if an element was + * deleted, 0 if no matching element was found. */ +int hashsetDelete(hashset *s, const void *key) { + void *element; + if (hashsetPop(s, key, &element)) { + freeElement(s, element); + return 1; + } else { + return 0; + } +} + +/* Two-phase pop: Look up an element, do something with it, then delete it + * without searching the hash table again. + * + * hashsetTwoPhasePopFindRef finds an element in the table and also the position + * of the element within the table, so that it can be deleted without looking it + * up in the table again. The function returns a pointer to the element the + * element pointer within the hash table, if an element with a matching key is + * found, and NULL otherwise. + * + * If non-NULL is returned, call 'hashsetTwoPhasePopDelete' with the returned + * 'position' afterwards to actually delete the element from the table. These + * two functions are designed be used in pair. `hashsetTwoPhasePopFindRef` + * pauses rehashing and `hashsetTwoPhasePopDelete` resumes rehashing. + * + * While hashsetPop finds and returns an element, the purpose of two-phase pop + * is to provide an optimized equivalent of hashsetFindRef followed by + * hashsetDelete, where the first call finds the element but doesn't delete it + * from the hash table and the latter doesn't need to look up the element in the + * hash table again. + * + * Example: + * + * void *position; + * void **ref = hashsetTwoPhasePopFindRef(s, key, &position) + * if (ref != NULL) { + * void *element = *ref; + * // do something with the element, then... + * hashsetTwoPhasePopDelete(s, position); + * } + */ + +/* Like hashsetTwoPhasePopFind, but returns a pointer to where the element is + * stored in the table, or NULL if no matching element is found. */ +void **hashsetTwoPhasePopFindRef(hashset *s, const void *key, void **position) { + if (hashsetSize(s) == 0) return NULL; + uint64_t hash = hashKey(s, key); + int pos_in_bucket = 0; + int table_index = 0; + bucket *b = findBucket(s, hash, key, &pos_in_bucket, &table_index); + if (b) { + hashsetPauseRehashing(s); + + /* Compute bucket index from bucket pointer. */ + void *b0 = &s->tables[table_index][0]; + size_t bucket_index = ((uintptr_t)b - (uintptr_t)b0) / sizeof(bucket); + assert(&s->tables[table_index][bucket_index] == b); + + /* Encode position as pointer. */ + *position = encodePositionInTable(bucket_index, pos_in_bucket, table_index); + return &b->elements[pos_in_bucket]; + } else { + return NULL; + } +} + +/* Clears the position of the element in the hashset and resumes rehashing. The + * element destructor is NOT called. The position is an opaque representation of + * its position as found using hashsetTwoPhasePopFindRef(). */ +void hashsetTwoPhasePopDelete(hashset *s, void *position) { + /* Decode position. */ + size_t bucket_index; + int table_index, pos_in_bucket; + decodePositionInTable(position, &bucket_index, &pos_in_bucket, &table_index); + + /* Delete the element and resume rehashing. */ + bucket *b = &s->tables[table_index][bucket_index]; + assert(isPositionFilled(b, pos_in_bucket)); + b->presence &= ~(1 << pos_in_bucket); + s->used[table_index]--; + hashsetShrinkIfNeeded(s); + hashsetResumeRehashing(s); +} + +/* --- Scan --- */ + +/* Scan is a stateless iterator. It works with a cursor that is returned to the + * caller and which should be provided to the next call to continue scanning. + * The hash table can be modified in any way between two scan calls. The scan + * still continues iterating where it was. + * + * A full scan is performed like this: Start with a cursor of 0. The scan + * callback is invoked for each element scanned and a new cursor is returned. + * Next time, call this function with the new cursor. Continue until the + * function returns 0. + * + * We say that an element is *emitted* when it's passed to the scan callback. + * + * Scan guarantees: + * + * - An element that is present in the hash table during an entire full scan + * will be returned (emitted) at least once. (Most of the time exactly once, + * but sometimes twice.) + * + * - An element that is inserted or deleted during a full scan may or may not be + * returned during the scan. + * + * The hash table uses a variant of linear probing with a cursor increment + * rather than a regular increment of the index when probing. The scan algorithm + * needs to continue scanning as long as a bucket in either of the tables has + * ever been full. This means that we may wrap around cursor zero and still + * continue until we find a bucket where we can stop, so some elements can be + * returned twice (in the first and the last scan calls) due to this. + * + * The 'flags' argument can be used to tweak the behaviour. It's a bitwise-or + * (zero means no flags) of the following: + * + * - HASHSET_SCAN_EMIT_REF: Emit a pointer to the element's location in the + * table to the scan function instead of the actual element. This can be used + * for advanced things like reallocating the memory of an element (for the + * purpose of defragmentation) and updating the pointer to the element inside + * the hash table. + * + * - HASHSET_SCAN_SINGLE_STEP: This flag can be used for selecting fewer + * elements when the scan guarantees don't need to be enforced. With this + * flag, we don't continue scanning complete probing chains, so if rehashing + * happens between calls, elements can be missed. The scan cursor is advanced + * only a single step. */ +size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *privdata, int flags) { + if (hashsetSize(s) == 0) return 0; + + /* Prevent elements from being moved around during the scan call, as a + * side-effect of the scan callback. */ + hashsetPauseRehashing(s); + + /* Flags. */ + int emit_ref = (flags & HASHSET_SCAN_EMIT_REF); + int single_step = (flags & HASHSET_SCAN_SINGLE_STEP); + + /* If any element that hashes to the current bucket may have been inserted + * in another bucket due to probing, we need to continue to cover the whole + * probe sequence in the same scan cycle. Otherwise we may miss those + * elements if they are rehashed before the next scan call. */ + int in_probe_sequence = 0; + + /* When the cursor reaches zero, may need to continue scanning and advancing + * the cursor until the probing chain ends, but when we stop, we return 0 to + * indicate that the full scan is completed. */ + int cursor_passed_zero = 0; + + /* Mask the start cursor to the bigger of the tables, so we can detect if we + * come back to the start cursor and break the loop. It can happen if enough + * tombstones (in both tables while rehashing) make us continue scanning. */ + cursor = cursor & (expToMask(s->bucket_exp[0]) | expToMask(s->bucket_exp[1])); + size_t start_cursor = cursor; + do { + in_probe_sequence = 0; /* Set to 1 if an ever-full bucket is scanned. */ + if (!hashsetIsRehashing(s)) { + /* Emit elements at the cursor index. */ + size_t mask = expToMask(s->bucket_exp[0]); + bucket *b = &s->tables[0][cursor & mask]; + int pos; + for (pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (isPositionFilled(b, pos)) { + void *emit = emit_ref ? &b->elements[pos] : b->elements[pos]; + fn(privdata, emit); + } + } + + /* Do we need to continue scanning? */ + in_probe_sequence |= b->everfull; + + /* Advance cursor. */ + cursor = nextCursor(cursor, mask); + } else { + int table_small, table_large; + if (s->bucket_exp[0] <= s->bucket_exp[1]) { + table_small = 0; + table_large = 1; + } else { + table_small = 1; + table_large = 0; + } + + size_t mask_small = expToMask(s->bucket_exp[table_small]); + size_t mask_large = expToMask(s->bucket_exp[table_large]); + + /* Emit elements in the smaller table, if this bucket hasn't already + * been rehashed. */ + if (table_small == 0 && !cursorIsLessThan(cursor, s->rehash_idx)) { + bucket *b = &s->tables[table_small][cursor & mask_small]; + if (b->presence) { + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (isPositionFilled(b, pos)) { + void *emit = emit_ref ? &b->elements[pos] : b->elements[pos]; + fn(privdata, emit); + } + } + } + in_probe_sequence |= b->everfull; + } + + /* Iterate over indices in larger table that are the expansion of + * the index pointed to by the cursor in the smaller table. */ + do { + /* Emit elements in bigger table. */ + bucket *b = &s->tables[table_large][cursor & mask_large]; + if (b->presence) { + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (isPositionFilled(b, pos)) { + void *emit = emit_ref ? &b->elements[pos] : b->elements[pos]; + fn(privdata, emit); + } + } + } + in_probe_sequence |= b->everfull; + + /* Increment the reverse cursor not covered by the smaller mask.*/ + cursor = nextCursor(cursor, mask_large); + + /* Continue while bits covered by mask difference is non-zero */ + } while (cursor & (mask_small ^ mask_large) && cursor != start_cursor); + } + if (cursor == 0) { + cursor_passed_zero = 1; + } + } while (in_probe_sequence && !single_step && cursor != start_cursor); + hashsetResumeRehashing(s); + return cursor_passed_zero ? 0 : cursor; +} + +/* --- Iterator --- */ + +/* Initiaize a iterator, that is not allowed to insert, delete or even lookup + * elements in the hashset, because such operations can trigger incremental + * rehashing which moves elements around and confuses the iterator. Only + * hashsetNext is allowed. Each element is returned exactly once. Call + * hashsetResetIterator when you are done. See also hashsetInitSafeIterator. */ +void hashsetInitIterator(hashsetIterator *iter, hashset *s) { + iter->hashset = s; + iter->table = 0; + iter->index = -1; + iter->safe = 0; +} + +/* Initialize a safe iterator, which is allowed to modify the hash table while + * iterating. It pauses incremental rehashing to prevent elements from moving + * around. Call hashsetNext to fetch each element. You must call + * hashsetResetIterator when you are done with a safe iterator. + * + * Guarantees: + * + * - Elements that are in the hash table for the entire iteration are returned + * exactly once. + * + * - Elements that are deleted or replaced using hashsetReplace after they + * have been returned are not returned again. + * + * - Elements that are replaced using hashsetReplace before they've been + * returned by the iterator will be returned. + * + * - Elements that are inserted during the iteration may or may not be returned + * by the iterator. + */ +void hashsetInitSafeIterator(hashsetIterator *iter, hashset *s) { + hashsetInitIterator(iter, s); + iter->safe = 1; +} + +/* Resets a stack-allocated iterator. */ +void hashsetResetIterator(hashsetIterator *iter) { + if (!(iter->index == -1 && iter->table == 0)) { + if (iter->safe) { + hashsetResumeRehashing(iter->hashset); + assert(iter->hashset->pause_rehash >= 0); + } else { + assert(iter->fingerprint == hashsetFingerprint(iter->hashset)); + } + } +} + +/* Allocates and initializes an iterator. */ +hashsetIterator *hashsetCreateIterator(hashset *s) { + hashsetIterator *iter = zmalloc(sizeof(*iter)); + hashsetInitIterator(iter, s); + return iter; +} + +/* Allocates and initializes a safe iterator. */ +hashsetIterator *hashsetCreateSafeIterator(hashset *s) { + hashsetIterator *iter = hashsetCreateIterator(s); + iter->safe = 1; + return iter; +} + +/* Resets and frees the memory of an allocated iterator, i.e. one created using + * hashsetCreate(Safe)Iterator. */ +void hashsetReleaseIterator(hashsetIterator *iter) { + hashsetResetIterator(iter); + zfree(iter); +} + +/* Points elemptr to the next element and returns 1 if there is a next element. + * Returns 0 if there are not more elements. */ +int hashsetNext(hashsetIterator *iter, void **elemptr) { + while (1) { + if (iter->index == -1 && iter->table == 0) { + /* It's the first call to next. */ + if (iter->safe) { + hashsetPauseRehashing(iter->hashset); + } else { + iter->fingerprint = hashsetFingerprint(iter->hashset); + } + if (iter->hashset->tables[0] == NULL) { + /* empty hashset, we're done */ + break; + } + iter->index = 0; + if (hashsetIsRehashing(iter->hashset)) { + iter->index = iter->hashset->rehash_idx; + } + iter->pos_in_bucket = 0; + } else { + /* Advance position within bucket, or bucket index, or table. */ + iter->pos_in_bucket++; + if (iter->pos_in_bucket >= ELEMENTS_PER_BUCKET) { + iter->pos_in_bucket = 0; + iter->index = nextCursor(iter->index, expToMask(iter->hashset->bucket_exp[iter->table])); + if (iter->index == 0) { + if (hashsetIsRehashing(iter->hashset) && iter->table == 0) { + iter->table++; + } else { + /* Done. */ + break; + } + } + } + } + bucket *b = &iter->hashset->tables[iter->table][iter->index]; + if (!(b->presence & (1 << iter->pos_in_bucket))) { + /* No element here. Skip. */ + continue; + } + /* Return the element at this position. */ + if (elemptr) { + *elemptr = b->elements[iter->pos_in_bucket]; + } + return 1; + } + return 0; +} + +/* --- Random elements --- */ + +/* Points 'found' to a random element in the hash table and returns 1. Returns 0 + * if the table is empty. */ +int hashsetRandomElement(hashset *s, void **found) { + void *samples[WEAK_RANDOM_SAMPLE_SIZE]; + unsigned count = hashsetSampleElements(s, (void **)&samples, WEAK_RANDOM_SAMPLE_SIZE); + if (count == 0) return 0; + unsigned idx = random() % count; + *found = samples[idx]; + return 1; +} + +/* Points 'found' to a random element in the hash table and returns 1. Returns 0 + * if the table is empty. This one is more fair than hashsetRandomElement(). */ +int hashsetFairRandomElement(hashset *s, void **found) { + void *samples[FAIR_RANDOM_SAMPLE_SIZE]; + unsigned count = hashsetSampleElements(s, (void **)&samples, FAIR_RANDOM_SAMPLE_SIZE); + if (count == 0) return 0; + unsigned idx = random() % count; + *found = samples[idx]; + return 1; +} + +/* This function samples a sequence of elements starting at a random location in + * the hash table. + * + * The sampled elements are stored in the array 'dst' which must have space for + * at least 'count' elements.te + * + * The function returns the number of sampled elements, which is 'count' except + * if 'count' is greater than the total number of elements in the hash table. */ +unsigned hashsetSampleElements(hashset *s, void **dst, unsigned count) { + /* Adjust count. */ + if (count > hashsetSize(s)) count = hashsetSize(s); + scan_samples samples; + samples.size = count; + samples.count = 0; + samples.elements = dst; + size_t cursor = randomSizeT(); + while (samples.count < count) { + cursor = hashsetScan(s, cursor, sampleElementsScanFn, &samples, HASHSET_SCAN_SINGLE_STEP); + } + rehashStepOnReadIfNeeded(s); + return count; +} + +/* --- Stats --- */ + +#define HASHSET_STATS_VECTLEN 50 +void hashsetFreeStats(hashsetStats *stats) { + zfree(stats->clvector); + zfree(stats); +} + +void hashsetCombineStats(hashsetStats *from, hashsetStats *into) { + into->buckets += from->buckets; + into->max_chain_len = (from->max_chain_len > into->max_chain_len) ? from->max_chain_len : into->max_chain_len; + into->probe_count += from->probe_count; + into->size += from->size; + into->used += from->used; + for (int i = 0; i < HASHSET_STATS_VECTLEN; i++) { + into->clvector[i] += from->clvector[i]; + } +} + +hashsetStats *hashsetGetStatsHt(hashset *s, int table_index, int full) { + unsigned long *clvector = zcalloc(sizeof(unsigned long) * HASHSET_STATS_VECTLEN); + hashsetStats *stats = zcalloc(sizeof(hashsetStats)); + stats->table_index = table_index; + stats->clvector = clvector; + stats->buckets = numBuckets(s->bucket_exp[table_index]); + stats->size = stats->buckets * ELEMENTS_PER_BUCKET; + stats->used = s->used[table_index]; + if (!full) return stats; + /* Compute stats about probing chain lengths. */ + unsigned long chainlen = 0; + size_t mask = expToMask(s->bucket_exp[table_index]); + /* Find a suitable place to start: not in the middle of a probing chain. */ + size_t start_idx; + for (start_idx = 0; start_idx <= mask; start_idx++) { + bucket *b = &s->tables[table_index][start_idx]; + if (!b->everfull) break; + } + size_t idx = start_idx; + do { + idx = nextCursor(idx, mask); + bucket *b = &s->tables[table_index][idx]; + if (b->everfull) { + stats->probe_count++; + chainlen++; + } else { + /* End of a chain (even a zero-length chain). */ + /* Keys hashing to each bucket in this chain has a probe length + * depending on the bucket they hash to. Keys hashing to this bucket + * have probing length 0, keys hashing to the previous bucket has + * probling length 1, and so on. */ + for (unsigned long i = 0; i <= chainlen; i++) { + int index = (i < HASHSET_STATS_VECTLEN) ? i : HASHSET_STATS_VECTLEN - 1; + clvector[index]++; + } + if (chainlen > stats->max_chain_len) stats->max_chain_len = chainlen; + chainlen = 0; + } + } while (idx != start_idx); + return stats; +} + +/* Generates human readable stats. */ +size_t hashsetGetStatsMsg(char *buf, size_t bufsize, hashsetStats *stats, int full) { + if (stats->used == 0) { + return snprintf(buf, bufsize, + "Hash table %d stats (%s):\n" + "No stats available for empty hash tables\n", + stats->table_index, (stats->table_index == 0) ? "main hash table" : "rehashing target"); + } + size_t l = 0; + l += snprintf(buf + l, bufsize - l, + "Hash table %d stats (%s):\n" + " table size: %lu\n" + " number of elements: %lu\n", + stats->table_index, + (stats->table_index == 0) ? "main hash table" : "rehashing target", stats->size, + stats->used); + if (full) { + l += snprintf(buf + l, bufsize - l, + " buckets: %lu\n" + " max probing length: %lu\n" + " avg probing length: %.02f\n" + " probing length distribution:\n", + stats->buckets, stats->max_chain_len, (float)stats->probe_count / stats->buckets); + unsigned long chain_length_sum = 0; + for (unsigned long i = 0; i < HASHSET_STATS_VECTLEN - 1; i++) { + if (stats->clvector[i] == 0) continue; + if (l >= bufsize) break; + chain_length_sum += stats->clvector[i]; + l += snprintf(buf + l, bufsize - l, " %ld: %ld (%.02f%%)\n", i, stats->clvector[i], + ((float)stats->clvector[i] / stats->buckets) * 100); + } + assert(chain_length_sum == stats->buckets); + } + + /* Make sure there is a NULL term at the end. */ + buf[bufsize - 1] = '\0'; + /* Unlike snprintf(), return the number of characters actually written. */ + return strlen(buf); +} + +void hashsetGetStats(char *buf, size_t bufsize, hashset *s, int full) { + size_t l; + char *orig_buf = buf; + size_t orig_bufsize = bufsize; + + hashsetStats *mainHtStats = hashsetGetStatsHt(s, 0, full); + l = hashsetGetStatsMsg(buf, bufsize, mainHtStats, full); + hashsetFreeStats(mainHtStats); + buf += l; + bufsize -= l; + if (hashsetIsRehashing(s) && bufsize > 0) { + hashsetStats *rehashHtStats = hashsetGetStatsHt(s, 1, full); + hashsetGetStatsMsg(buf, bufsize, rehashHtStats, full); + hashsetFreeStats(rehashHtStats); + } + /* Make sure there is a NULL term at the end. */ + orig_buf[orig_bufsize - 1] = '\0'; +} + +/* --- DEBUG --- */ + +void hashsetDump(hashset *s) { + for (int table = 0; table <= 1; table++) { + printf("Table %d, used %zu, exp %d, buckets %zu, everfulls %zu\n", + table, s->used[table], s->bucket_exp[table], numBuckets(s->bucket_exp[table]), s->everfulls[table]); + for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table]); idx++) { + bucket *b = &s->tables[table][idx]; + printf("Bucket %d:%zu everfull:%d\n", table, idx, b->everfull); + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + printf(" %d ", pos); + if (isPositionFilled(b, pos)) { + printf("h2 %02x, key \"%s\"\n", b->hashes[pos], (const char *)elementGetKey(s, b->elements[pos])); + } else { + printf("(empty)\n"); + } + } + } + } +} + +void hashsetHistogram(hashset *s) { + for (int table = 0; table <= 1; table++) { + for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table]); idx++) { + bucket *b = &s->tables[table][idx]; + char c = b->presence == 0 && b->everfull ? 'X' : '0' + __builtin_popcount(b->presence); + printf("%c", c); + } + if (table == 0) printf(" "); + } + printf("\n"); +} + +void hashsetProbeMap(hashset *s) { + for (int table = 0; table <= 1; table++) { + for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table]); idx++) { + bucket *b = &s->tables[table][idx]; + char c = b->everfull ? 'X' : 'o'; + printf("%c", c); + } + if (table == 0) printf(" "); + } + printf("\n"); +} + +int hashsetLongestProbingChain(hashset *s) { + int maxlen = 0; + for (int table = 0; table <= 1; table++) { + if (s->bucket_exp[table] < 0) { + continue; /* table not used */ + } + size_t cursor = 0; + size_t mask = expToMask(s->bucket_exp[table]); + int chainlen = 0; + do { + assert(cursor <= mask); + bucket *b = &s->tables[table][cursor]; + if (b->everfull) { + if (++chainlen > maxlen) { + maxlen = chainlen; + } + } else { + chainlen = 0; + } + cursor = nextCursor(cursor, mask); + } while (cursor != 0); + } + return maxlen; +} diff --git a/src/hashset.h b/src/hashset.h new file mode 100644 index 0000000000..25e72060e6 --- /dev/null +++ b/src/hashset.h @@ -0,0 +1,165 @@ +#ifndef HASHSET_H +#define HASHSET_H + +/* Hash table implementation. + * + * This is a cache-friendly hash table implementation. For details about the + * implementation and documentation of functions, se comments in hashset.c. + * + * The elements in a hashset are of a user-defined type, but an element needs to + * contain a key. It can represent a key-value entry, or it can be just a key, + * if set semantics are desired. + * + * Terminology: + * + * hashset + * An instance of the data structure. + * + * element + * An element in the hashset. This may be of the same type as the key, + * or a struct containing a key and other fields. + * key + * The part of the element used for looking the element up in the hashset. + * May be the entire element or a struct field within the element. + * + * type + * A struct containing callbacks, such as hash function, key comparison + * function and how to get the key in an element. + */ + +#include "fmacros.h" +#include +#include + +/* --- Opaque types --- */ + +typedef struct hashset hashset; +typedef struct hashsetStats hashsetStats; + +/* --- Non-opaque types --- */ + +/* The hashsetType is a set of callbacks for a hashset. All callbacks are + * optional. With all callbacks omitted, the hashset is effectively a set of + * pointer-sized integers. */ +typedef struct { + /* If the type of an element is not the same as the type of a key used for + * lookup, this callback needs to return the key within an element. */ + const void *(*elementGetKey)(const void *element); + /* Hash function. Defaults to hashing the bits in the pointer, effectively + * treating the pointer as an integer. */ + uint64_t (*hashFunction)(const void *key); + /* Compare function, returns 0 if the keys are equal. Defaults to just + * comparing the pointers for equality. */ + int (*keyCompare)(hashset *s, const void *key1, const void *key2); + /* Callback to free an element when it's overwritten or deleted. + * Optional. */ + void (*elementDestructor)(hashset *s, void *element); + /* Callback to control when resizing should be allowed. */ + int (*resizeAllowed)(size_t moreMem, double usedRatio); + /* Invoked at the start of rehashing. Both tables are already created. */ + void (*rehashingStarted)(hashset *s); + /* Invoked at the end of rehashing. Both tables still exist and are cleaned + * up after this callback. */ + void (*rehashingCompleted)(hashset *s); + /* Allow a hashset to carry extra caller-defined metadata. The extra memory + * is initialized to 0. */ + size_t (*getMetadataSize)(void); + /* Flag to disable incremental rehashing */ + unsigned instant_rehashing : 1; +} hashsetType; + +typedef enum { + HASHSET_RESIZE_ALLOW = 0, + HASHSET_RESIZE_AVOID, + HASHSET_RESIZE_FORBID, +} hashsetResizePolicy; + +typedef void (*hashsetScanFunction)(void *privdata, void *element); + +/* Constants */ +#define HASHSET_BUCKET_SIZE 64 /* bytes */ + +/* Scan flags */ +#define HASHSET_SCAN_EMIT_REF (1 << 0) +#define HASHSET_SCAN_SINGLE_STEP (1 << 2) + +typedef struct { + hashset *hashset; + long index; + int table; + int pos_in_bucket; + uint64_t safe : 1; + /* unsafe iterator fingerprint for misuse detection. */ + uint64_t fingerprint : 63; +} hashsetIterator; + +/* --- Prototypes --- */ + +/* Hash function (global seed) */ +void hashsetSetHashFunctionSeed(const uint8_t *seed); +uint8_t *hashsetGetHashFunctionSeed(void); +uint64_t hashsetGenHashFunction(const char *buf, size_t len); +uint64_t hashsetGenCaseHashFunction(const char *buf, size_t len); + +/* Global resize policy */ +void hashsetSetResizePolicy(hashsetResizePolicy policy); + +/* Hashset instance */ +hashset *hashsetCreate(hashsetType *type); +void hashsetRelease(hashset *s); +void hashsetEmpty(hashset *s, void(callback)(hashset *)); +hashsetType *hashsetGetType(hashset *s); +void *hashsetMetadata(hashset *s); +size_t hashsetSize(hashset *s); +size_t hashsetBuckets(hashset *s); +size_t hashsetProbeCounter(hashset *s, int table); +size_t hashsetMemUsage(hashset *s); +void hashsetPauseAutoShrink(hashset *s); +void hashsetResumeAutoShrink(hashset *s); +int hashsetIsRehashing(hashset *s); +int hashsetIsRehashingPaused(hashset *s); +void hashsetRehashingInfo(hashset *s, size_t *from_size, size_t *to_size); +int hashsetRehashMicroseconds(hashset *s, uint64_t us); +int hashsetExpand(hashset *s, size_t size); +int hashsetTryExpand(hashset *s, size_t size); +int hashsetExpandIfNeeded(hashset *s); +int hashsetShrinkIfNeeded(hashset *s); +hashset *hashsetDefragInternals(hashset *s, void *(*defragfn)(void *)); + +/* Elements */ +int hashsetFind(hashset *s, const void *key, void **found); +void **hashsetFindRef(hashset *s, const void *key); +int hashsetAdd(hashset *s, void *element); +int hashsetAddOrFind(hashset *s, void *element, void **existing); +void *hashsetFindPositionForInsert(hashset *s, void *key, void **existing); +void hashsetInsertAtPosition(hashset *s, void *element, void *position); +int hashsetReplace(hashset *s, void *element); +int hashsetPop(hashset *s, const void *key, void **popped); +int hashsetDelete(hashset *s, const void *key); +void **hashsetTwoPhasePopFindRef(hashset *s, const void *key, void **position); +void hashsetTwoPhasePopDelete(hashset *s, void *position); + +/* Iteration & scan */ +size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *privdata, int flags); +void hashsetInitIterator(hashsetIterator *iter, hashset *s); +void hashsetInitSafeIterator(hashsetIterator *iter, hashset *s); +void hashsetResetIterator(hashsetIterator *iter); +hashsetIterator *hashsetCreateIterator(hashset *s); +hashsetIterator *hashsetCreateSafeIterator(hashset *s); +void hashsetReleaseIterator(hashsetIterator *iter); +int hashsetNext(hashsetIterator *iter, void **elemptr); + +/* Random elements */ +int hashsetRandomElement(hashset *s, void **found); +int hashsetFairRandomElement(hashset *s, void **found); +unsigned hashsetSampleElements(hashset *s, void **dst, unsigned count); + +/* Debug & stats */ + +void hashsetFreeStats(hashsetStats *stats); +void hashsetCombineStats(hashsetStats *from, hashsetStats *into); +hashsetStats *hashsetGetStatsHt(hashset *s, int htidx, int full); +size_t hashsetGetStatsMsg(char *buf, size_t bufsize, hashsetStats *stats, int full); +void hashsetGetStats(char *buf, size_t bufsize, hashset *s, int full); + +#endif /* HASHSET_H */ diff --git a/src/latency.c b/src/latency.c index eef1532d03..ee95859271 100644 --- a/src/latency.c +++ b/src/latency.c @@ -527,13 +527,12 @@ void fillCommandCDF(client *c, struct hdr_histogram *histogram) { /* latencyCommand() helper to produce for all commands, * a per command cumulative distribution of latencies. */ -void latencyAllCommandsFillCDF(client *c, dict *commands, int *command_with_data) { - dictIterator *di = dictGetSafeIterator(commands); - dictEntry *de; +void latencyAllCommandsFillCDF(client *c, hashset *commands, int *command_with_data) { + hashsetIterator iter; + hashsetInitSafeIterator(&iter, commands); struct serverCommand *cmd; - while ((de = dictNext(di)) != NULL) { - cmd = (struct serverCommand *)dictGetVal(de); + while (hashsetNext(&iter, (void **)&cmd)) { if (cmd->latency_histogram) { addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname)); fillCommandCDF(c, cmd->latency_histogram); @@ -541,10 +540,10 @@ void latencyAllCommandsFillCDF(client *c, dict *commands, int *command_with_data } if (cmd->subcommands) { - latencyAllCommandsFillCDF(c, cmd->subcommands_dict, command_with_data); + latencyAllCommandsFillCDF(c, cmd->subcommands_set, command_with_data); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* latencyCommand() helper to produce for a specific command set, @@ -565,19 +564,19 @@ void latencySpecificCommandsFillCDF(client *c) { command_with_data++; } - if (cmd->subcommands_dict) { - dictEntry *de; - dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict); + if (cmd->subcommands_set) { + hashsetIterator iter; + hashsetInitSafeIterator(&iter, cmd->subcommands_set); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *sub = dictGetVal(de); + struct serverCommand *sub; + while (hashsetNext(&iter, (void **)&sub)) { if (sub->latency_histogram) { addReplyBulkCBuffer(c, sub->fullname, sdslen(sub->fullname)); fillCommandCDF(c, sub->latency_histogram); command_with_data++; } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } } setDeferredMapLen(c, replylen, command_with_data); diff --git a/src/module.c b/src/module.c index 2884239200..c83dcaae6b 100644 --- a/src/module.c +++ b/src/module.c @@ -1297,8 +1297,8 @@ int VM_CreateCommand(ValkeyModuleCtx *ctx, cp->serverCmd->arity = cmdfunc ? -1 : -2; /* Default value, can be changed later via dedicated API */ /* Drain IO queue before modifying commands dictionary to prevent concurrent access while modifying it. */ drainIOThreadsQueue(); - serverAssert(dictAdd(server.commands, sdsdup(declared_name), cp->serverCmd) == DICT_OK); - serverAssert(dictAdd(server.orig_commands, sdsdup(declared_name), cp->serverCmd) == DICT_OK); + serverAssert(hashsetAdd(server.commands, cp->serverCmd)); + serverAssert(hashsetAdd(server.orig_commands, cp->serverCmd)); cp->serverCmd->id = ACLGetCommandID(declared_name); /* ID used for ACL. */ return VALKEYMODULE_OK; } @@ -1430,7 +1430,7 @@ int VM_CreateSubcommand(ValkeyModuleCommand *parent, /* Check if the command name is busy within the parent command. */ sds declared_name = sdsnew(name); - if (parent_cmd->subcommands_dict && lookupSubcommand(parent_cmd, declared_name) != NULL) { + if (parent_cmd->subcommands_set && lookupSubcommand(parent_cmd, declared_name) != NULL) { sdsfree(declared_name); return VALKEYMODULE_ERR; } @@ -1440,7 +1440,7 @@ int VM_CreateSubcommand(ValkeyModuleCommand *parent, moduleCreateCommandProxy(parent->module, declared_name, fullname, cmdfunc, flags, firstkey, lastkey, keystep); cp->serverCmd->arity = -2; - commandAddSubcommand(parent_cmd, cp->serverCmd, name); + commandAddSubcommand(parent_cmd, cp->serverCmd); return VALKEYMODULE_OK; } @@ -12059,20 +12059,20 @@ int moduleFreeCommand(struct ValkeyModule *module, struct serverCommand *cmd) { moduleFreeArgs(cmd->args, cmd->num_args); zfree(cp); - if (cmd->subcommands_dict) { - dictEntry *de; - dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *sub = dictGetVal(de); + if (cmd->subcommands_set) { + hashsetIterator iter; + hashsetInitSafeIterator(&iter, cmd->subcommands_set); + struct serverCommand *sub; + while (hashsetNext(&iter, (void **)&sub)) { if (moduleFreeCommand(module, sub) != C_OK) continue; - serverAssert(dictDelete(cmd->subcommands_dict, sub->declared_name) == DICT_OK); + serverAssert(hashsetDelete(cmd->subcommands_set, sub->declared_name)); sdsfree((sds)sub->declared_name); sdsfree(sub->fullname); zfree(sub); } - dictReleaseIterator(di); - dictRelease(cmd->subcommands_dict); + hashsetResetIterator(&iter); + hashsetRelease(cmd->subcommands_set); } return C_OK; @@ -12082,19 +12082,19 @@ void moduleUnregisterCommands(struct ValkeyModule *module) { /* Drain IO queue before modifying commands dictionary to prevent concurrent access while modifying it. */ drainIOThreadsQueue(); /* Unregister all the commands registered by this module. */ - dictIterator *di = dictGetSafeIterator(server.commands); - dictEntry *de; - while ((de = dictNext(di)) != NULL) { - struct serverCommand *cmd = dictGetVal(de); + hashsetIterator iter; + hashsetInitSafeIterator(&iter, server.commands); + struct serverCommand *cmd; + while (hashsetNext(&iter, (void **)&cmd)) { if (moduleFreeCommand(module, cmd) != C_OK) continue; - serverAssert(dictDelete(server.commands, cmd->fullname) == DICT_OK); - serverAssert(dictDelete(server.orig_commands, cmd->fullname) == DICT_OK); + serverAssert(hashsetDelete(server.commands, cmd->fullname)); + serverAssert(hashsetDelete(server.orig_commands, cmd->fullname)); sdsfree((sds)cmd->declared_name); sdsfree(cmd->fullname); zfree(cmd); } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* We parse argv to add sds "NAME VALUE" pairs to the server.module_configs_queue list of configs. diff --git a/src/server.c b/src/server.c index ab95f84346..6e2673a1fd 100644 --- a/src/server.c +++ b/src/server.c @@ -300,13 +300,18 @@ size_t dictSdsEmbedKey(unsigned char *buf, size_t buf_len, const void *key, uint return sdscopytobuffer(buf, buf_len, (sds)key, key_offset); } -/* A case insensitive version used for the command lookup table and other - * places where case insensitive non binary-safe comparison is needed. */ +/* Case insensitive non binary-safe comparison */ int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2) { UNUSED(d); return strcasecmp(key1, key2) == 0; } +/* Case insensitive key comparison */ +int hashsetStringKeyCaseCompare(hashset *hs, const void *key1, const void *key2) { + UNUSED(hs); + return strcasecmp(key1, key2); +} + void dictObjectDestructor(dict *d, void *val) { UNUSED(d); if (val == NULL) return; /* Lazy freeing will set value to NULL. */ @@ -430,6 +435,16 @@ int dictResizeAllowed(size_t moreMem, double usedRatio) { } } +const void *hashsetCommandGetKey(const void *element) { + struct serverCommand *command = (struct serverCommand *)element; + return command->fullname; +} + +const void *hashsetSubcommandGetKey(const void *element) { + struct serverCommand *command = (struct serverCommand *)element; + return command->declared_name; +} + /* Generic hash table type where keys are Objects, Values * dummy pointers. */ dictType objectKeyPointerValueDictType = { @@ -502,16 +517,17 @@ dictType kvstoreExpiresDictType = { kvstoreDictMetadataSize, }; -/* Command table. sds string -> command struct pointer. */ -dictType commandTableDictType = { - dictSdsCaseHash, /* hash function */ - NULL, /* key dup */ - dictSdsKeyCaseCompare, /* key compare */ - dictSdsDestructor, /* key destructor */ - NULL, /* val destructor */ - NULL, /* allow to expand */ - .no_incremental_rehash = 1, /* no incremental rehash as the command table may be accessed from IO threads. */ -}; +/* Command set, hashed by sds string, stores serverCommand structs. */ +hashsetType commandSetType = {.elementGetKey = hashsetCommandGetKey, + .hashFunction = dictSdsCaseHash, + .keyCompare = hashsetStringKeyCaseCompare, + .instant_rehashing = 1}; + +/* Command set, hashed by char* string, stores serverCommand structs. */ +hashsetType subcommandSetType = {.elementGetKey = hashsetSubcommandGetKey, + .hashFunction = dictCStrCaseHash, + .keyCompare = hashsetStringKeyCaseCompare, + .instant_rehashing = 1}; /* Hash type hash table (note that small hashes are represented with listpacks) */ dictType hashDictType = { @@ -2115,8 +2131,8 @@ void initServerConfig(void) { /* Command table -- we initialize it here as it is part of the * initial configuration, since command names may be changed via * valkey.conf using the rename-command directive. */ - server.commands = dictCreate(&commandTableDictType); - server.orig_commands = dictCreate(&commandTableDictType); + server.commands = hashsetCreate(&commandSetType); + server.orig_commands = hashsetCreate(&commandSetType); populateCommandTable(); /* Debugging */ @@ -2960,13 +2976,13 @@ sds catSubCommandFullname(const char *parent_name, const char *sub_name) { return sdscatfmt(sdsempty(), "%s|%s", parent_name, sub_name); } -void commandAddSubcommand(struct serverCommand *parent, struct serverCommand *subcommand, const char *declared_name) { - if (!parent->subcommands_dict) parent->subcommands_dict = dictCreate(&commandTableDictType); +void commandAddSubcommand(struct serverCommand *parent, struct serverCommand *subcommand) { + if (!parent->subcommands_set) parent->subcommands_set = hashsetCreate(&subcommandSetType); subcommand->parent = parent; /* Assign the parent command */ subcommand->id = ACLGetCommandID(subcommand->fullname); /* Assign the ID used for ACL. */ - serverAssert(dictAdd(parent->subcommands_dict, sdsnew(declared_name), subcommand) == DICT_OK); + serverAssert(hashsetAdd(parent->subcommands_set, subcommand)); } /* Set implicit ACl categories (see comment above the definition of @@ -3018,7 +3034,7 @@ int populateCommandStructure(struct serverCommand *c) { sub->fullname = catSubCommandFullname(c->declared_name, sub->declared_name); if (populateCommandStructure(sub) == C_ERR) continue; - commandAddSubcommand(c, sub, sub->declared_name); + commandAddSubcommand(c, sub); } } @@ -3042,22 +3058,20 @@ void populateCommandTable(void) { c->fullname = sdsnew(c->declared_name); if (populateCommandStructure(c) == C_ERR) continue; - retval1 = dictAdd(server.commands, sdsdup(c->fullname), c); + retval1 = hashsetAdd(server.commands, c); /* Populate an additional dictionary that will be unaffected * by rename-command statements in valkey.conf. */ - retval2 = dictAdd(server.orig_commands, sdsdup(c->fullname), c); - serverAssert(retval1 == DICT_OK && retval2 == DICT_OK); + retval2 = hashsetAdd(server.orig_commands, c); + serverAssert(retval1 && retval2); } } -void resetCommandTableStats(dict *commands) { +void resetCommandTableStats(hashset *commands) { struct serverCommand *c; - dictEntry *de; - dictIterator *di; + hashsetIterator iter; - di = dictGetSafeIterator(commands); - while ((de = dictNext(di)) != NULL) { - c = (struct serverCommand *)dictGetVal(de); + hashsetInitSafeIterator(&iter, commands); + while (hashsetNext(&iter, (void **)&c)) { c->microseconds = 0; c->calls = 0; c->rejected_calls = 0; @@ -3066,9 +3080,9 @@ void resetCommandTableStats(dict *commands) { hdr_close(c->latency_histogram); c->latency_histogram = NULL; } - if (c->subcommands_dict) resetCommandTableStats(c->subcommands_dict); + if (c->subcommands_set) resetCommandTableStats(c->subcommands_set); } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } void resetErrorTableStats(void) { @@ -3115,13 +3129,16 @@ void serverOpArrayFree(serverOpArray *oa) { /* ====================== Commands lookup and execution ===================== */ int isContainerCommandBySds(sds s) { - struct serverCommand *base_cmd = dictFetchValue(server.commands, s); - int has_subcommands = base_cmd && base_cmd->subcommands_dict; + struct serverCommand *base_cmd; + int found_command = hashsetFind(server.commands, s, (void **)&base_cmd); + int has_subcommands = found_command && base_cmd->subcommands_set; return has_subcommands; } struct serverCommand *lookupSubcommand(struct serverCommand *container, sds sub_name) { - return dictFetchValue(container->subcommands_dict, sub_name); + struct serverCommand *subcommand = NULL; + hashsetFind(container->subcommands_set, sub_name, (void **)&subcommand); + return subcommand; } /* Look up a command by argv and argc @@ -3132,9 +3149,10 @@ struct serverCommand *lookupSubcommand(struct serverCommand *container, sds sub_ * name (e.g. in COMMAND INFO) rather than to find the command * a user requested to execute (in processCommand). */ -struct serverCommand *lookupCommandLogic(dict *commands, robj **argv, int argc, int strict) { - struct serverCommand *base_cmd = dictFetchValue(commands, argv[0]->ptr); - int has_subcommands = base_cmd && base_cmd->subcommands_dict; +struct serverCommand *lookupCommandLogic(hashset *commands, robj **argv, int argc, int strict) { + struct serverCommand *base_cmd = NULL; + int found_command = hashsetFind(commands, argv[0]->ptr, (void **)&base_cmd); + int has_subcommands = found_command && base_cmd->subcommands_set; if (argc == 1 || !has_subcommands) { if (strict && argc != 1) return NULL; /* Note: It is possible that base_cmd->proc==NULL (e.g. CONFIG) */ @@ -3150,7 +3168,7 @@ struct serverCommand *lookupCommand(robj **argv, int argc) { return lookupCommandLogic(server.commands, argv, argc, 0); } -struct serverCommand *lookupCommandBySdsLogic(dict *commands, sds s) { +struct serverCommand *lookupCommandBySdsLogic(hashset *commands, sds s) { int argc, j; sds *strings = sdssplitlen(s, sdslen(s), "|", 1, &argc); if (strings == NULL) return NULL; @@ -3177,7 +3195,7 @@ struct serverCommand *lookupCommandBySds(sds s) { return lookupCommandBySdsLogic(server.commands, s); } -struct serverCommand *lookupCommandByCStringLogic(dict *commands, const char *s) { +struct serverCommand *lookupCommandByCStringLogic(hashset *commands, const char *s) { struct serverCommand *cmd; sds name = sdsnew(s); @@ -4809,23 +4827,24 @@ void addReplyCommandSubCommands(client *c, struct serverCommand *cmd, void (*reply_function)(client *, struct serverCommand *), int use_map) { - if (!cmd->subcommands_dict) { + if (!cmd->subcommands_set) { addReplySetLen(c, 0); return; } if (use_map) - addReplyMapLen(c, dictSize(cmd->subcommands_dict)); + addReplyMapLen(c, hashsetSize(cmd->subcommands_set)); else - addReplyArrayLen(c, dictSize(cmd->subcommands_dict)); - dictEntry *de; - dictIterator *di = dictGetSafeIterator(cmd->subcommands_dict); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *sub = (struct serverCommand *)dictGetVal(de); + addReplyArrayLen(c, hashsetSize(cmd->subcommands_set)); + + hashsetIterator iter; + struct serverCommand *sub; + hashsetInitSafeIterator(&iter, cmd->subcommands_set); + while (hashsetNext(&iter, (void **)&sub)) { if (use_map) addReplyBulkCBuffer(c, sub->fullname, sdslen(sub->fullname)); reply_function(c, sub); } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* Output the representation of a server command. Used by the COMMAND command and COMMAND INFO. */ @@ -4871,7 +4890,7 @@ void addReplyCommandDocs(client *c, struct serverCommand *cmd) { if (cmd->reply_schema) maplen++; #endif if (cmd->args) maplen++; - if (cmd->subcommands_dict) maplen++; + if (cmd->subcommands_set) maplen++; addReplyMapLen(c, maplen); if (cmd->summary) { @@ -4921,7 +4940,7 @@ void addReplyCommandDocs(client *c, struct serverCommand *cmd) { addReplyBulkCString(c, "arguments"); addReplyCommandArgList(c, cmd->args, cmd->num_args); } - if (cmd->subcommands_dict) { + if (cmd->subcommands_set) { addReplyBulkCString(c, "subcommands"); addReplyCommandSubCommands(c, cmd, addReplyCommandDocs, 1); } @@ -4978,20 +4997,20 @@ void getKeysSubcommand(client *c) { /* COMMAND (no args) */ void commandCommand(client *c) { - dictIterator *di; - dictEntry *de; + hashsetIterator iter; + struct serverCommand *cmd; - addReplyArrayLen(c, dictSize(server.commands)); - di = dictGetIterator(server.commands); - while ((de = dictNext(di)) != NULL) { - addReplyCommandInfo(c, dictGetVal(de)); + addReplyArrayLen(c, hashsetSize(server.commands)); + hashsetInitIterator(&iter, server.commands); + while (hashsetNext(&iter, (void **)&cmd)) { + addReplyCommandInfo(c, cmd); } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* COMMAND COUNT */ void commandCountCommand(client *c) { - addReplyLongLong(c, dictSize(server.commands)); + addReplyLongLong(c, hashsetSize(server.commands)); } typedef enum { @@ -5037,39 +5056,39 @@ int shouldFilterFromCommandList(struct serverCommand *cmd, commandListFilter *fi } /* COMMAND LIST FILTERBY (MODULE |ACLCAT |PATTERN ) */ -void commandListWithFilter(client *c, dict *commands, commandListFilter filter, int *numcmds) { - dictEntry *de; - dictIterator *di = dictGetIterator(commands); +void commandListWithFilter(client *c, hashset *commands, commandListFilter filter, int *numcmds) { + hashsetIterator iter; + hashsetInitIterator(&iter, commands); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *cmd = dictGetVal(de); + struct serverCommand *cmd; + while (hashsetNext(&iter, (void **)&cmd)) { if (!shouldFilterFromCommandList(cmd, &filter)) { addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname)); (*numcmds)++; } - if (cmd->subcommands_dict) { - commandListWithFilter(c, cmd->subcommands_dict, filter, numcmds); + if (cmd->subcommands_set) { + commandListWithFilter(c, cmd->subcommands_set, filter, numcmds); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* COMMAND LIST */ -void commandListWithoutFilter(client *c, dict *commands, int *numcmds) { - dictEntry *de; - dictIterator *di = dictGetIterator(commands); +void commandListWithoutFilter(client *c, hashset *commands, int *numcmds) { + hashsetIterator iter; + struct serverCommand *cmd; + hashsetInitIterator(&iter, commands); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *cmd = dictGetVal(de); + while (hashsetNext(&iter, (void **)&cmd)) { addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname)); (*numcmds)++; - if (cmd->subcommands_dict) { - commandListWithoutFilter(c, cmd->subcommands_dict, numcmds); + if (cmd->subcommands_set) { + commandListWithoutFilter(c, cmd->subcommands_set, numcmds); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } /* COMMAND LIST [FILTERBY (MODULE |ACLCAT |PATTERN )] */ @@ -5118,14 +5137,14 @@ void commandInfoCommand(client *c) { int i; if (c->argc == 2) { - dictIterator *di; - dictEntry *de; - addReplyArrayLen(c, dictSize(server.commands)); - di = dictGetIterator(server.commands); - while ((de = dictNext(di)) != NULL) { - addReplyCommandInfo(c, dictGetVal(de)); + hashsetIterator iter; + struct serverCommand *cmd; + addReplyArrayLen(c, hashsetSize(server.commands)); + hashsetInitIterator(&iter, server.commands); + while (hashsetNext(&iter, (void **)&cmd)) { + addReplyCommandInfo(c, cmd); } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } else { addReplyArrayLen(c, c->argc - 2); for (i = 2; i < c->argc; i++) { @@ -5139,16 +5158,15 @@ void commandDocsCommand(client *c) { int i; if (c->argc == 2) { /* Reply with an array of all commands */ - dictIterator *di; - dictEntry *de; - addReplyMapLen(c, dictSize(server.commands)); - di = dictGetIterator(server.commands); - while ((de = dictNext(di)) != NULL) { - struct serverCommand *cmd = dictGetVal(de); + hashsetIterator iter; + struct serverCommand *cmd; + addReplyMapLen(c, hashsetSize(server.commands)); + hashsetInitIterator(&iter, server.commands); + while (hashsetNext(&iter, (void **)&cmd)) { addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname)); addReplyCommandDocs(c, cmd); } - dictReleaseIterator(di); + hashsetResetIterator(&iter); } else { /* Reply with an array of the requested commands (if we find them) */ int numcmds = 0; @@ -5268,14 +5286,12 @@ const char *getSafeInfoString(const char *s, size_t len, char **tmp) { return memmapchars(new, len, unsafe_info_chars, unsafe_info_chars_substs, sizeof(unsafe_info_chars) - 1); } -sds genValkeyInfoStringCommandStats(sds info, dict *commands) { +sds genValkeyInfoStringCommandStats(sds info, hashset *commands) { struct serverCommand *c; - dictEntry *de; - dictIterator *di; - di = dictGetSafeIterator(commands); - while ((de = dictNext(di)) != NULL) { + hashsetIterator iter; + hashsetInitSafeIterator(&iter, commands); + while (hashsetNext(&iter, (void **)&c)) { char *tmpsafe; - c = (struct serverCommand *)dictGetVal(de); if (c->calls || c->failed_calls || c->rejected_calls) { info = sdscatprintf(info, "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f" @@ -5285,11 +5301,11 @@ sds genValkeyInfoStringCommandStats(sds info, dict *commands) { c->rejected_calls, c->failed_calls); if (tmpsafe != NULL) zfree(tmpsafe); } - if (c->subcommands_dict) { - info = genValkeyInfoStringCommandStats(info, c->subcommands_dict); + if (c->subcommands_set) { + info = genValkeyInfoStringCommandStats(info, c->subcommands_set); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); return info; } @@ -5306,24 +5322,22 @@ sds genValkeyInfoStringACLStats(sds info) { return info; } -sds genValkeyInfoStringLatencyStats(sds info, dict *commands) { +sds genValkeyInfoStringLatencyStats(sds info, hashset *commands) { struct serverCommand *c; - dictEntry *de; - dictIterator *di; - di = dictGetSafeIterator(commands); - while ((de = dictNext(di)) != NULL) { + hashsetIterator iter; + hashsetInitSafeIterator(&iter, commands); + while (hashsetNext(&iter, (void **)&c)) { char *tmpsafe; - c = (struct serverCommand *)dictGetVal(de); if (c->latency_histogram) { info = fillPercentileDistributionLatencies( info, getSafeInfoString(c->fullname, sdslen(c->fullname), &tmpsafe), c->latency_histogram); if (tmpsafe != NULL) zfree(tmpsafe); } - if (c->subcommands_dict) { - info = genValkeyInfoStringLatencyStats(info, c->subcommands_dict); + if (c->subcommands_set) { + info = genValkeyInfoStringLatencyStats(info, c->subcommands_set); } } - dictReleaseIterator(di); + hashsetResetIterator(&iter); return info; } diff --git a/src/server.h b/src/server.h index 4fad8d2508..cd0eca3a51 100644 --- a/src/server.h +++ b/src/server.h @@ -67,6 +67,7 @@ typedef long long ustime_t; /* microsecond time type. */ #include "ae.h" /* Event driven programming library */ #include "sds.h" /* Dynamic safe strings */ #include "dict.h" /* Hash tables */ +#include "hashset.h" /* Hash set */ #include "kvstore.h" /* Slot-based hash table */ #include "adlist.h" /* Linked lists */ #include "zmalloc.h" /* total memory usage aware version of malloc/free */ @@ -1658,8 +1659,8 @@ struct valkeyServer { int hz; /* serverCron() calls frequency in hertz */ int in_fork_child; /* indication that this is a fork child */ serverDb *db; - dict *commands; /* Command table */ - dict *orig_commands; /* Command table before command renaming. */ + hashset *commands; /* Command table */ + hashset *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; _Atomic AeIoState io_poll_state; /* Indicates the state of the IO polling. */ int io_ae_fired_events; /* Number of poll events received by the IO thread. */ @@ -2539,7 +2540,7 @@ struct serverCommand { * still maintained (if applicable) so that * we can still support the reply format of * COMMAND INFO and COMMAND GETKEYS */ - dict *subcommands_dict; /* A dictionary that holds the subcommands, the key is the subcommand sds name + hashset *subcommands_set; /* A set that holds the subcommands, the key is the subcommand sds name * (not the fullname), and the value is the serverCommand structure pointer. */ struct serverCommand *parent; struct ValkeyModuleCommand *module_cmd; /* A pointer to the module command data (NULL if native command) */ @@ -3268,9 +3269,9 @@ int changeListener(connListener *listener); void closeListener(connListener *listener); struct serverCommand *lookupSubcommand(struct serverCommand *container, sds sub_name); struct serverCommand *lookupCommand(robj **argv, int argc); -struct serverCommand *lookupCommandBySdsLogic(dict *commands, sds s); +struct serverCommand *lookupCommandBySdsLogic(hashset *commands, sds s); struct serverCommand *lookupCommandBySds(sds s); -struct serverCommand *lookupCommandByCStringLogic(dict *commands, const char *s); +struct serverCommand *lookupCommandByCStringLogic(hashset *commands, const char *s); struct serverCommand *lookupCommandByCString(const char *s); struct serverCommand *lookupCommandOrOriginal(robj **argv, int argc); int commandCheckExistence(client *c, sds *err); @@ -3304,7 +3305,7 @@ void serverLogRawFromHandler(int level, const char *msg); void usage(void); void updateDictResizePolicy(void); void populateCommandTable(void); -void resetCommandTableStats(dict *commands); +void resetCommandTableStats(hashset *commands); void resetErrorTableStats(void); void adjustOpenFilesLimit(void); void incrementErrorCount(const char *fullerr, size_t namelen); @@ -4001,7 +4002,7 @@ int memtest_preserving_test(unsigned long *m, size_t bytes, int passes); void mixDigest(unsigned char *digest, const void *ptr, size_t len); void xorDigest(unsigned char *digest, const void *ptr, size_t len); sds catSubCommandFullname(const char *parent_name, const char *sub_name); -void commandAddSubcommand(struct serverCommand *parent, struct serverCommand *subcommand, const char *declared_name); +void commandAddSubcommand(struct serverCommand *parent, struct serverCommand *subcommand); void debugDelay(int usec); void killThreads(void); void makeThreadKillable(void); diff --git a/src/unit/test_files.h b/src/unit/test_files.h index cd2e0c5b92..3089be028d 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -19,6 +19,18 @@ int test_dictDisableResizeReduceTo3(int argc, char **argv, int flags); int test_dictDeleteOneKeyTriggerResizeAgain(int argc, char **argv, int flags); int test_dictBenchmark(int argc, char **argv, int flags); int test_endianconv(int argc, char *argv[], int flags); +int test_cursor(int argc, char **argv, int flags); +int test_set_hash_function_seed(int argc, char **argv, int flags); +int test_add_find_delete(int argc, char **argv, int flags); +int test_add_find_delete_avoid_resize(int argc, char **argv, int flags); +int test_instant_rehashing(int argc, char **argv, int flags); +int test_probing_chain_length(int argc, char **argv, int flags); +int test_two_phase_insert_and_pop(int argc, char **argv, int flags); +int test_scan(int argc, char **argv, int flags); +int test_iterator(int argc, char **argv, int flags); +int test_safe_iterator(int argc, char **argv, int flags); +int test_random_element(int argc, char **argv, int flags); +int test_full_probe(int argc, char **argv, int flags); int test_intsetValueEncodings(int argc, char **argv, int flags); int test_intsetBasicAdding(int argc, char **argv, int flags); int test_intsetLargeNumberRandomAdd(int argc, char **argv, int flags); @@ -153,6 +165,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}}; unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}}; unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}}; unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}}; +unitTest __test_hashset_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_probing_chain_length", test_probing_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_random_element", test_random_element}, {"test_full_probe", test_full_probe}, {NULL, NULL}}; unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}}; unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}}; @@ -172,6 +185,7 @@ struct unitTestSuite { {"test_crc64combine.c", __test_crc64combine_c}, {"test_dict.c", __test_dict_c}, {"test_endianconv.c", __test_endianconv_c}, + {"test_hashset.c", __test_hashset_c}, {"test_intset.c", __test_intset_c}, {"test_kvstore.c", __test_kvstore_c}, {"test_listpack.c", __test_listpack_c}, diff --git a/src/unit/test_hashset.c b/src/unit/test_hashset.c new file mode 100644 index 0000000000..c27d6b3edd --- /dev/null +++ b/src/unit/test_hashset.c @@ -0,0 +1,630 @@ +#include "../hashset.h" +#include "test_help.h" +#include "../mt19937-64.h" + +#include +#include +#include +#include +#include + + +/* From util.c: getRandomBytes to seed hash function. */ +void getRandomBytes(unsigned char *p, size_t len); + +/* Init hash function salt and seed random generator. */ +static void randomSeed(void) { + unsigned long long seed; + getRandomBytes((void *)&seed, sizeof(seed)); + init_genrand64(seed); + srandom((unsigned)seed); +} + +/* An element holding a string key and a string value in one allocation. */ +typedef struct { + unsigned int keysize; /* Sizes, including null-terminator */ + unsigned int valsize; + char data[]; /* key and value */ +} keyval; + +static keyval *create_keyval(const char *key, const char *val) { + size_t keysize = strlen(key) + 1; + size_t valsize = strlen(val) + 1; + keyval *e = malloc(sizeof(keyval) + keysize + valsize); + e->keysize = keysize; + e->valsize = valsize; + memcpy(e->data, key, keysize); + memcpy(e->data + keysize, val, valsize); + return e; +} + +static const void *getkey(const void *element) { + const keyval *e = element; + return e->data; +} + +static const void *getval(const void *element) { + const keyval *e = element; + return e->data + e->keysize; +} + +static uint64_t hashfunc(const void *key) { + return hashsetGenHashFunction(key, strlen(key)); +} + +static int keycmp(hashset *ht, const void *key1, const void *key2) { + (void)ht; + return strcmp(key1, key2); +} + +static void freekeyval(hashset *ht, void *keyval) { + (void)ht; + free(keyval); +} + +/* Hashset type used for some of the tests. */ +static hashsetType keyval_type = { + .elementGetKey = getkey, + .hashFunction = hashfunc, + .keyCompare = keycmp, + .elementDestructor = freekeyval, +}; + +/* Callback for testing hashsetEmpty(). */ +static long empty_callback_call_counter; +void emptyCallback(hashset *s) { + UNUSED(s); + empty_callback_call_counter++; +} + +/* Prototypes for debugging */ +void hashsetDump(hashset *s); +void hashsetHistogram(hashset *s); +void hashsetProbeMap(hashset *s); +int hashsetLongestProbingChain(hashset *s); +size_t nextCursor(size_t v, size_t mask); + +int test_cursor(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + TEST_ASSERT(nextCursor(0x0000, 0xffff) == 0x8000); + TEST_ASSERT(nextCursor(0x8000, 0xffff) == 0x4000); + TEST_ASSERT(nextCursor(0x4001, 0xffff) == 0xc001); + TEST_ASSERT(nextCursor(0xffff, 0xffff) == 0x0000); + return 0; +} + +int test_set_hash_function_seed(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + randomSeed(); + return 0; +} + +static void add_find_delete_test_helper(int flags) { + int count = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 200; + hashset *s = hashsetCreate(&keyval_type); + int j; + + /* Add */ + for (j = 0; j < count; j++) { + char key[32], val[32]; + snprintf(key, sizeof(key), "%d", j); + snprintf(val, sizeof(val), "%d", count - j + 42); + keyval *e = create_keyval(key, val); + assert(hashsetAdd(s, e)); + } + + if (count < 1000) { + printf("Bucket fill: "); + hashsetHistogram(s); + } + + /* Find */ + for (j = 0; j < count; j++) { + char key[32], val[32]; + snprintf(key, sizeof(key), "%d", j); + snprintf(val, sizeof(val), "%d", count - j + 42); + keyval *e; + assert(hashsetFind(s, key, (void **)&e)); + assert(!strcmp(val, getval(e))); + } + + /* Delete half of them */ + for (j = 0; j < count / 2; j++) { + char key[32]; + snprintf(key, sizeof(key), "%d", j); + if (j % 3 == 0) { + /* Test hashsetPop */ + char val[32]; + snprintf(val, sizeof(val), "%d", count - j + 42); + keyval *e; + assert(hashsetPop(s, key, (void **)&e)); + assert(!strcmp(val, getval(e))); + free(e); + } else { + assert(hashsetDelete(s, key)); + } + } + + /* Empty, i.e. delete remaining elements, with progress callback. */ + empty_callback_call_counter = 0; + hashsetEmpty(s, emptyCallback); + assert(empty_callback_call_counter > 0); + + /* Release memory */ + hashsetRelease(s); +} + +int test_add_find_delete(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + add_find_delete_test_helper(flags); + return 0; +} + +int test_add_find_delete_avoid_resize(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + hashsetSetResizePolicy(HASHSET_RESIZE_AVOID); + add_find_delete_test_helper(flags); + hashsetSetResizePolicy(HASHSET_RESIZE_ALLOW); + return 0; +} + +int test_instant_rehashing(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + long count = 200; + + /* A set of longs, i.e. pointer-sized values. */ + hashsetType type = {.instant_rehashing = 1}; + hashset *s = hashsetCreate(&type); + long j; + + /* Populate and check that rehashing is never ongoing. */ + for (j = 0; j < count; j++) { + assert(hashsetAdd(s, (void *)j)); + assert(!hashsetIsRehashing(s)); + } + + /* Delete and check that rehashing is never ongoing. */ + for (j = 0; j < count; j++) { + assert(hashsetDelete(s, (void *)j)); + assert(!hashsetIsRehashing(s)); + } + + hashsetRelease(s); + return 0; +} + + +int test_probing_chain_length(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + unsigned long count = 1000000; + + /* A set of longs, i.e. pointer-sized integer values. */ + hashsetType type = {0}; + hashset *s = hashsetCreate(&type); + unsigned long j; + for (j = 0; j < count; j++) { + assert(hashsetAdd(s, (void *)j)); + } + /* If it's rehashing, add a few more until rehashing is complete. */ + while (hashsetIsRehashing(s)) { + j++; + assert(hashsetAdd(s, (void *)j)); + } + TEST_ASSERT(j < count * 2); + int max_chainlen_not_rehashing = hashsetLongestProbingChain(s); + TEST_ASSERT(max_chainlen_not_rehashing < 100); + + /* Add more until rehashing starts again. */ + while (!hashsetIsRehashing(s)) { + j++; + assert(hashsetAdd(s, (void *)j)); + } + TEST_ASSERT(j < count * 2); + int max_chainlen_rehashing = hashsetLongestProbingChain(s); + TEST_ASSERT(max_chainlen_rehashing < 100); + + hashsetRelease(s); + return 0; +} + +int test_two_phase_insert_and_pop(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + int count = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 200; + hashset *s = hashsetCreate(&keyval_type); + int j; + + /* hashsetFindPositionForInsert + hashsetInsertAtPosition */ + for (j = 0; j < count; j++) { + char key[32], val[32]; + snprintf(key, sizeof(key), "%d", j); + snprintf(val, sizeof(val), "%d", count - j + 42); + void *position = hashsetFindPositionForInsert(s, key, NULL); + assert(position != NULL); + keyval *e = create_keyval(key, val); + hashsetInsertAtPosition(s, e, position); + } + + if (count < 1000) { + printf("Bucket fill: "); + hashsetHistogram(s); + } + + /* Check that all elements were inserted. */ + for (j = 0; j < count; j++) { + char key[32], val[32]; + snprintf(key, sizeof(key), "%d", j); + snprintf(val, sizeof(val), "%d", count - j + 42); + keyval *e; + assert(hashsetFind(s, key, (void **)&e)); + assert(!strcmp(val, getval(e))); + } + + /* Test two-phase pop. */ + for (j = 0; j < count; j++) { + char key[32], val[32]; + snprintf(key, sizeof(key), "%d", j); + snprintf(val, sizeof(val), "%d", count - j + 42); + void *position; + size_t size_before_find = hashsetSize(s); + void **ref = hashsetTwoPhasePopFindRef(s, key, &position); + assert(ref != NULL); + keyval *e = *ref; + assert(!strcmp(val, getval(e))); + assert(hashsetSize(s) == size_before_find); + hashsetTwoPhasePopDelete(s, position); + assert(hashsetSize(s) == size_before_find - 1); + } + assert(hashsetSize(s) == 0); + + hashsetRelease(s); + return 0; +} + +typedef struct { + long count; + uint8_t element_seen[]; +} scandata; + +void scanfn(void *privdata, void *element) { + scandata *data = (scandata *)privdata; + unsigned long j = (unsigned long)element; + data->element_seen[j]++; + data->count++; +} + +int test_scan(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + long num_elements = (flags & UNIT_TEST_LARGE_MEMORY) ? 1000000 : 200000; + int num_rounds = (flags & UNIT_TEST_ACCURATE) ? 20 : 5; + + /* A set of longs, i.e. pointer-sized values. */ + hashsetType type = {0}; + long j; + + for (int round = 0; round < num_rounds; round++) { + /* First round count = num_elements, then some more. */ + long count = num_elements * (1 + 2 * (double)round / num_rounds); + + /* Seed, to make sure each round is different. */ + randomSeed(); + + /* Populate */ + hashset *s = hashsetCreate(&type); + for (j = 0; j < count; j++) { + assert(hashsetAdd(s, (void *)j)); + } + + /* Scan */ + scandata *data = calloc(1, sizeof(scandata) + count); + long max_elements_per_cycle = 0; + unsigned num_cycles = 0; + long scanned_count = 0; + size_t cursor = 0; + do { + data->count = 0; + cursor = hashsetScan(s, cursor, scanfn, data, 0); + if (data->count > max_elements_per_cycle) { + max_elements_per_cycle = data->count; + } + scanned_count += data->count; + data->count = 0; + num_cycles++; + } while (cursor != 0); + + /* Verify every element was returned at least once, but no more than + * twice. Elements can be returned twice due to probing chains wrapping + * around scan cursor zero. */ + TEST_ASSERT(scanned_count >= count); + TEST_ASSERT(scanned_count < count * 2); + for (j = 0; j < count; j++) { + assert(data->element_seen[j] >= 1); + assert(data->element_seen[j] <= 2); + } + + /* Verify some stuff, but just print it for now. */ + printf("Scanned: %lu; ", count); + printf("duplicates emitted: %lu; ", scanned_count - count); + printf("max emitted per call: %ld; ", max_elements_per_cycle); + printf("avg emitted per call: %.2lf\n", (double)count / num_cycles); + + /* Cleanup */ + hashsetRelease(s); + free(data); + } + return 0; +} + +int test_iterator(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + long count = 2000000; + + /* A set of longs, i.e. pointer-sized values. */ + hashsetType type = {0}; + hashset *s = hashsetCreate(&type); + long j; + + /* Populate */ + for (j = 0; j < count; j++) { + assert(hashsetAdd(s, (void *)j)); + } + + /* Iterate */ + uint8_t element_returned[count]; + memset(element_returned, 0, sizeof element_returned); + long num_returned = 0; + hashsetIterator iter; + hashsetInitIterator(&iter, s); + while (hashsetNext(&iter, (void **)&j)) { + num_returned++; + assert(j >= 0 && j < count); + element_returned[j]++; + } + hashsetResetIterator(&iter); + + /* Check that all elements were returned exactly once. */ + TEST_ASSERT(num_returned == count); + for (j = 0; j < count; j++) { + if (element_returned[j] != 1) { + printf("Element %ld returned %d times\n", j, element_returned[j]); + return 0; + } + } + + hashsetRelease(s); + return 0; +} + +int test_safe_iterator(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + long count = 1000; + + /* A set of longs, i.e. pointer-sized values. */ + hashsetType type = {0}; + hashset *s = hashsetCreate(&type); + long j; + + /* Populate */ + for (j = 0; j < count; j++) { + assert(hashsetAdd(s, (void *)j)); + } + + /* Iterate */ + uint8_t element_returned[count * 2]; + memset(element_returned, 0, sizeof element_returned); + long num_returned = 0; + hashsetIterator iter; + hashsetInitSafeIterator(&iter, s); + while (hashsetNext(&iter, (void **)&j)) { + num_returned++; + if (j < 0 || j >= count * 2) { + printf("Element %ld returned, max == %ld. Num returned: %ld\n", j, count * 2 - 1, num_returned); + printf("Safe %d, table %d, index %lu, pos in bucket %d, rehashing? %d\n", iter.safe, iter.table, iter.index, + iter.pos_in_bucket, !hashsetIsRehashing(s)); + hashsetHistogram(s); + exit(1); + } + assert(j >= 0 && j < count * 2); + element_returned[j]++; + if (j % 4 == 0) { + assert(hashsetDelete(s, (void *)j)); + } + /* Add elements x if count <= x < count * 2) */ + if (j < count) { + assert(hashsetAdd(s, (void *)(j + count))); + } + } + hashsetResetIterator(&iter); + + /* Check that all elements present during the whole iteration were returned + * exactly once. (Some are deleted after being returned.) */ + TEST_ASSERT(num_returned >= count); + for (j = 0; j < count; j++) { + if (element_returned[j] != 1) { + printf("Element %ld returned %d times\n", j, element_returned[j]); + return 0; + } + } + /* Check that elements inserted during the iteration were returned at most + * once. */ + unsigned long num_optional_returned; + for (j = count; j < count * 2; j++) { + assert(element_returned[j] <= 1); + num_optional_returned += element_returned[j]; + } + printf("Safe iterator returned %lu of the %lu elements inserted while iterating.\n", num_optional_returned, count); + + hashsetRelease(s); + return 0; +} + +int test_random_element(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + randomSeed(); + + long count = (flags & UNIT_TEST_LARGE_MEMORY) ? 7000 : 400; + long num_rounds = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 10000; + + /* A set of longs, i.e. pointer-sized values. */ + hashsetType type = {0}; + hashset *s = hashsetCreate(&type); + + /* Populate */ + for (long j = 0; j < count; j++) { + assert(hashsetAdd(s, (void *)j)); + } + + /* Pick elements, and count how many times each element is picked. */ + unsigned times_picked[count]; + memset(times_picked, 0, sizeof(times_picked)); + for (long i = 0; i < num_rounds; i++) { + long element; + assert(hashsetFairRandomElement(s, (void **)&element)); + assert(element >= 0 && element < count); + times_picked[element]++; + } + hashsetRelease(s); + + /* Fairness measurement + * -------------------- + * + * Selecting a single random element: For any element in the hash table, let + * X=1 if the we selected the element (success) and X=0 otherwise. With m + * elements, our element is sepected with probability p = 1/m, the expected + * value is E(X) = 1/m, E(X^2) = 1/m and the variance: + * + * Var(X) = E(X^2) - (E(X))^2 = 1/m - 1/(m^2) = (1/m) * (1 - 1/m). + * + * Repeating the selection of a random element: Let's repeat the experiment + * n times and let Y be the number of times our element was selected. This + * is a binomial distribution. + * + * Y = X_1 + X_2 + ... + X_n + * E(Y) = n/m + * + * The variance of a sum of independent random variables is the sum of the + * variances, so Y has variance np(1−p). + * + * Var(Y) = npq = np(1 - p) = (n/m) * (1 - 1/m) = n * (m - 1) / (m * m) + */ + double m = (double)count, n = (double)num_rounds; + double expected = n / m; /* E(Y) */ + double variance = n * (m - 1) / (m * m); /* Var(Y) */ + double std_dev = sqrt(variance); + + /* With large n, the distribution approaches a normal distribution and we + * can use p68 = within 1 std dev, p95 = within 2 std dev, p99.7 = within 3 + * std dev. */ + long p68 = 0, p95 = 0, p99 = 0, p4dev = 0, p5dev = 0; + for (long j = 0; j < count; j++) { + double dev = expected - times_picked[j]; + p68 += (dev >= -std_dev && dev <= std_dev); + p95 += (dev >= -std_dev * 2 && dev <= std_dev * 2); + p99 += (dev >= -std_dev * 3 && dev <= std_dev * 3); + p4dev += (dev >= -std_dev * 4 && dev <= std_dev * 4); + p5dev += (dev >= -std_dev * 5 && dev <= std_dev * 5); + } + printf("Random element fairness test\n"); + printf(" Pick one of %ld elements, %ld times.\n", count, num_rounds); + printf(" Expecting each element to be picked %.2lf times, std dev %.3lf.\n", expected, std_dev); + printf(" Within 1 std dev (p68) = %.2lf%%\n", 100 * p68 / m); + printf(" Within 2 std dev (p95) = %.2lf%%\n", 100 * p95 / m); + printf(" Within 3 std dev (p99) = %.2lf%%\n", 100 * p99 / m); + printf(" Within 4 std dev = %.2lf%%\n", 100 * p4dev / m); + printf(" Within 5 std dev = %.2lf%%\n", 100 * p5dev / m); + + /* Conclusion? The number of trials (n) relative to the probabilities (p and + * 1 − p) must be sufficiently large (n * p ≥ 5 and n * (1 − p) ≥ 5) to + * approximate a binomial distribution with a normal distribution. */ + if (n / m >= 5 && n * (1 - 1 / m) >= 5) { + TEST_ASSERT_MESSAGE("Too unfair randomness", 100 * p99 / m >= 60.0); + } else { + printf("To uncertain numbers to draw any conclusions about fairness.\n"); + } + return 0; +} + +typedef struct { + size_t capacity; + size_t count; + long elements[]; +} sampledata; + +void sample_scanfn(void *privdata, void *element) { + sampledata *data = (sampledata *)privdata; + if (data->count == data->capacity) return; + long j = (long)element; + data->elements[data->count++] = j; +} + +int test_full_probe(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + randomSeed(); + + long count = 42; /* 75% of 8 buckets (7 elements per bucket). */ + long num_rounds = (flags & UNIT_TEST_ACCURATE) ? 100000 : 1000; + + /* A set of longs, i.e. pointer-sized values. */ + hashsetType type = {0}; + hashset *s = hashsetCreate(&type); + + /* Populate */ + for (long j = 0; j < count; j++) { + assert(hashsetAdd(s, (void *)j)); + } + + /* Scan and delete (simulates eviction), then add some more, repeat. */ + size_t cursor = 0; + size_t max_samples = 30; /* at least the size of a bucket */ + sampledata *data = calloc(1, sizeof(sampledata) + sizeof(long) * max_samples); + data->capacity = max_samples; + + for (int r = 0; r < num_rounds; r++) { + size_t probes = hashsetProbeCounter(s, 0); + size_t buckets = hashsetBuckets(s); + assert(probes < buckets); + + /* Empty the next buckets. */ + data->count = 0; + cursor = hashsetScan(s, cursor, sample_scanfn, data, HASHSET_SCAN_SINGLE_STEP); + long n = data->count; + for (long i = 0; i < n; i++) { + int deleted = hashsetDelete(s, (void *)data->elements[i]); + if (!deleted) n--; /* Duplicate retuned by scan. */ + } + + /* Add the same number of elements back */ + while (n > 0) { + n -= hashsetAdd(s, (void *)random()); + } + } + hashsetRelease(s); + return 0; +}