diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1e5ab30..639c618 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,12 @@ name: CI on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + branches: [master] + +env: + BUILD_DIR: ${{ github.workspace }}/build jobs: build: @@ -14,35 +17,29 @@ jobs: matrix: os: [ubuntu-latest] compiler: [gcc, clang] - build_type: [debug, release] + build_type: [Debug, Release] + steps: - - uses: actions/checkout@v4 - - name: install-clang - if: matrix.compiler == 'clang' - run: | - sudo apt-get install -y clang - sudo update-alternatives --remove-all cc - sudo update-alternatives --install /usr/bin/cc cc /usr/bin/clang 14 - - name: configure-debug - if: matrix.build_type == 'debug' - run: | - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Debug -DHASHMAP_BUILD_TESTS=ON -DHASHMAP_BUILD_EXAMPLES=ON ${GITHUB_WORKSPACE} - - name: configure-release - if: matrix.build_type == 'release' - run: | - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release -DHASHMAP_BUILD_TESTS=ON -DHASHMAP_BUILD_EXAMPLES=ON ${GITHUB_WORKSPACE} - - name: make - run: | - make VERBOSE=1 -C build - - name: install - run: | - cd build - sudo make install - - name: test - run: | - cd build - ctest --output-on-failure + - uses: actions/checkout@v4 + + - name: Create build directory + run: mkdir -p ${{ env.BUILD_DIR }} + + - name: Install Clang + if: matrix.compiler == 'clang' + run: | + sudo apt-get install -y clang + sudo update-alternatives --remove-all cc + sudo update-alternatives --install /usr/bin/cc cc /usr/bin/clang 15 + + - name: Configure CMake + run: cmake -B ${{ env.BUILD_DIR }} -S ${{ github.workspace }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DHASHMAP_BUILD_TESTS=ON -DHASHMAP_BUILD_EXAMPLES=ON + + - name: Build + run: cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build_type }} + + - name: Install + run: sudo cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build_type }} + + - name: Test + run: ctest --output-on-failure --test-dir ${{ env.BUILD_DIR }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d98965..3a8579a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.5) project(hashmap VERSION 2.0.0 LANGUAGES C) -set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD 11) ############################################## # Build options @@ -108,7 +108,7 @@ export(PACKAGE HashMap) if(HASHMAP_BUILD_TESTS) enable_testing() - add_subdirectory(test) + add_subdirectory(tests) endif() ############################################## diff --git a/README.md b/README.md index f8bb87c..67c0e38 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,19 @@ # hashmap + [![ci](https://github.com/DavidLeeds/hashmap/workflows/CI/badge.svg)](https://github.com/DavidLeeds/hashmap/actions/workflows/ci.yml) Templated type-safe hashmap implementation in C using open addressing and linear probing for collision resolution. ## Summary -This project came into existence because there are a notable lack of flexible and easy to use data structures available in C. C data structures with efficient, type-safe interfaces are virtually non-existent. Higher level languages have built-in libraries and templated classes, but plenty of embedded projects or higher level libraries are implemented in C. When it is undesireable to depend on a bulky library like Glib or grapple with a restrictive license agreement, this is the library for you. +This project came into existence because there are a notable lack of flexible and easy to use data structures available in C. C data structures with efficient, type-safe interfaces are virtually non-existent. Higher level languages have built-in libraries and templated classes, but plenty of embedded projects or higher level libraries are implemented in C. When it is undesireable to depend on a bulky library like Glib or grapple with a restrictive license agreement, this is the library for you. ## Goals + * **To scale gracefully to the full capacity of the numeric primitives in use.** We should be able to load enough entries to consume all memory on the system without hitting any bugs relating to integer overflows. Lookups on a hashtable with a hundreds of millions of entries should be performed in close to constant time, no different than lookups in a hashtable with 20 entries. Automatic rehashing occurs and maintains a load factor of 0.75 or less. * **To provide a clean and easy-to-use interface.** C data structures often struggle to strike a balance between flexibility and ease of use. To this end, I wrapped a generic C backend implementation with light-weight pre-processor macros to create a templated interface that enables the compiler to type-check all function arguments and return values. All required type information is encoded in the hashmap declaration using the`HASHMAP()` macro. Unlike with header-only macro libraries, there is no code duplication or performance disadvantage over a traditional library with a non-type-safe `void *` interface. * **To enable easy iteration and safe entry removal during iteration.** Applications often need these features, and the data structure should not hold them back. Easy to use `hashmap_foreach()` macros and a more flexible iterator interface are provided. This hashmap also uses an open addressing scheme, which has superior iteration performance to a similar hashmap implemented using separate chaining (buckets with linked lists). This is because fewer instructions are needed per iteration, and array traversal has superior cache performance than linked list traversal. -* **To use a very unrestrictive software license.** Using no license was an option, but I wanted to allow the code to be tracked, simply for my own edification. I chose the MIT license because it is the most common open source license in use, and it grants full rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell the code. Basically, take this code and do what you want with it. Just be nice and leave the license comment and my name at top of the file. Feel free to add your name if you are modifying and redistributing. +* **To use an unrestrictive software license.** I chose the MIT license because it is the most common open source license in use, and it grants full rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell the code. Basically, take this code and do what you want with it. Just be nice and leave the license comment and my name at top of the file. Feel free to add your name as a contributor if you are significantly modifying and redistributing. ## API Examples @@ -28,13 +30,15 @@ HASHMAP(uint64_t, struct my_value) map2; ``` The structure defined by the `HASHMAP()` macro may be used directly, or named using `typedef`. For example: + ```C typedef HASHMAP(char, struct my_value) value_map_t; ``` ### Initialization and cleanup -Maps must be initialized with a key hash function and a key comparator. +Maps must be initialized with a key hash function and a key comparator. + ```C /* Initialize the map structure */ hashmap_init(&map, my_key_hash, my_key_compare); @@ -46,6 +50,7 @@ hashmap_cleanup(&map); ``` This library provides some hash functions, so you may not have to write your own: + * [hashmap_hash_string()](https://github.com/DavidLeeds/hashmap/blob/137d60b3818c22c79d2be5560150eb2eff981a68/include/hashmap_base.h#L54) - Case sensitive string hash * [hashmap_hash_string_i()](https://github.com/DavidLeeds/hashmap/blob/137d60b3818c22c79d2be5560150eb2eff981a68/include/hashmap_base.h#L55) - Case insensitive string hash * [hashmap_hash_default()](https://github.com/DavidLeeds/hashmap/blob/137d60b3818c22c79d2be5560150eb2eff981a68/include/hashmap_base.h#L53) - Hash function for arbitrary bytes that can be used by a user-defined hash function @@ -59,15 +64,32 @@ hashmap_init(&map, hashmap_hash_string, strcmp); Note that memory associated with map keys and values is not managed by the map, so you may need to free this before calling `hashmap_cleanup()`. Keys are often stored in the same structure as the value, but it is possible to have the map manage key memory allocation internally, by calling `hashmap_set_key_alloc_funcs()`. - -### Value insertion and access +### Value insertion ```C -/* Insert a my_value (fails and returns -EEXIST if the key already exists) */ -int result = hashmap_put(&map, "KeyABC", val); +struct my_value *val = /* ... */; + +/* Add a my_value (fails and returns -EEXIST if the key already exists) */ +int result1 = hashmap_put(&map, "KeyABC", val); +/* Add or update a my_value (assigns previous value to old_data if the key already exists) */ +struct my_value *old_val; +int result2 = hashmap_insert(&map, "KeyABC", val, &old_val); +``` + +### Value access + +```C /* Access the value with a given key */ -struct my_value *val = hashmap_get(&map, "KeyABC"); +struct my_value *val1 = hashmap_get(&map, "KeyABC"); + +/* Access the key or value with an iterator */ +HASHMAP_ITER(map) iter = hashmap_iter_find(&map, "keyABC"); +const char *key = hashmap_iter_get_key(&iter); +struct my_value *val2 = hashmap_iter_get_data(&iter); + +/* Check if an entry with the given key exists */ +bool present = hashmap_contains(&map, "KeyABC"); ``` ### Value removal @@ -76,10 +98,14 @@ struct my_value *val = hashmap_get(&map, "KeyABC"); /* Erase the entry with the given key */ struct my_value *val = hashmap_remove(&map, "KeyABC"); +/* Erase the entry with an iterator */ +HASHMAP_ITER(map) iter = hashmap_iter_find(&map, "keyABC"); +hashmap_iter_remove(&iter); + /* Erase all entries */ hashmap_clear(&map); -/* Erase all entries and reset the hash table to its initial size */ +/* Erase all entries and reset the hash table heap allocation to its initial size */ hashmap_reset(&map); ``` @@ -108,36 +134,50 @@ hashmap_foreach_data(val, &map) { ``` The above iteration macros are only safe for read-only access. To safely remove the current element during iteration, use the macros with a `_safe` suffix. These require an additional pointer parameter. For example: + ```C const char *key; struct my_value *val; -void *temp; +void *pos; /* Okay */ -hashmap_foreach_key_safe(key, &map, temp) { +hashmap_foreach_key_safe(key, &map, pos) { hashmap_remove(&map, key); } ``` Iteration using the iterator interface. + ```C HASHMAP_ITER(map) it; -for (it = hashmap_iter(&map); hashmap_iter_valid(&it); hashmap_iter_next(&it) { - /* - * Access entry using: - * hashmap_iter_get_key() - * hashmap_iter_get_data() - * hashmap_iter_set_data() - */ +for (it = hashmap_iter(&map); hashmap_iter_valid(&it); hashmap_iter_next(&it)) { + /* + * Access entry using: + * hashmap_iter_get_key() + * hashmap_iter_get_data() + * hashmap_iter_set_data() + */ } ``` ### Additional examples -Are located in the `examples` directory in the source tree. + +Are located in the [examples](https://github.com/DavidLeeds/hashmap/tree/master/examples) directory in the source tree. ## How to Build and Install -This project uses CMake to orchestrate the build and installallation process. To build and install on your host system, follow these easy steps: + +This project uses CMake to orchestrate the build and installallation process. + +### CMake Options + +* `HASHMAP_BUILD_TESTS` - Set to `ON` to generate unit tests. +* `HASHMAP_BUILD_EXAMPLES` - Set to `ON` to build example code. + +### How to build from source + +To build and install on your host system, follow these easy steps: + 1. `git clone https://github.com/DavidLeeds/hashmap.git` - download the source 2. `mkdir build-hashmap && cd build-hashmap` - create a build directory outside the source tree 3. `cmake ../hashmap` - run CMake to setup the build @@ -145,11 +185,28 @@ This project uses CMake to orchestrate the build and installallation process. To 5. `make test` - run the unit tests (if enabled) 6. `sudo make install` - _OPTIONAL_ install the library on this system -##### CMake Options +### How to integrate with an existing CMake project -* `HASHMAP_BUILD_TESTS` - Set to `ON` to generate unit tests. -* `HASHMAP_BUILD_EXAMPLES` - Set to `ON` to build example code. +Clone and build this repository: + +```cmake +include(FetchContent) + +FetchContent_Declare( + hashmap + GIT_REPOSITORY https://github.com/DavidLeeds/hashmap.git + GIT_SHALLOW ON +) +FetchContent_MakeAvailable(hashmap) +``` + +Add `HashMap::HashMap` as a dependnecy, e.g.: + +```cmake +add_executable(my_app main.c) +target_link_libraries(my_app PRIVATE HashMap::HashMap) +``` ## Contibutions and Questions -I welcome all questions and contributions. Feel free to e-mail me, or put up a pull request. The core algorithm is stable, but I'm happy to consider CMake improvements, compiler compatibility fixes, or API additions. +I welcome all questions and contributions. Feel free to e-mail me, or put up a pull request. The core algorithm is stable, but I'm happy to consider CMake improvements, compiler compatibility fixes, or API additions. diff --git a/clib.json b/clib.json index 45beea3..ccbb653 100644 --- a/clib.json +++ b/clib.json @@ -1,8 +1,8 @@ { "name": "templated-hashmap", - "version": "v2.0.3", + "version": "v2.1.0", "repo": "DavidLeeds/hashmap", - "description": " Templated type-safe hashmap implementation in C using open addressing and linear probing for collision resolution. ", + "description": " Templated type-safe hashmap implementation in C using open addressing and linear probing for collision resolution.", "keywords": ["hashmap", "dictionary", "templated"], "license": "MIT", "src": ["src/hashmap.c", "include/hashmap.h", "include/hashmap_base.h"] diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 3cd2e1d..7057006 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 3.5) # Hashmap example add_executable(hashmap_example hashmap_example.c) -target_compile_options(hashmap_example PRIVATE $<$:-Wall -Werror>) +target_compile_options(hashmap_example PRIVATE -Wall -Werror) target_link_libraries(hashmap_example PRIVATE HashMap::HashMap) diff --git a/include/hashmap.h b/include/hashmap.h index 760042a..d87785e 100644 --- a/include/hashmap.h +++ b/include/hashmap.h @@ -33,30 +33,30 @@ extern "C" { ((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) && \ ((data) = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); \ hashmap_iter_next(&__HASHMAP_UNIQUE(x, it))) -#define __HASHMAP_FOREACH_SAFE(x, key, data, h, temp_ptr) \ +#define __HASHMAP_FOREACH_SAFE(x, key, data, h, pos) \ for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ - ((temp_ptr) = (void *)((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it)))) && \ + ((pos) = (void *)((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it)))) && \ ((data) = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); \ - ((temp_ptr) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ + ((pos) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ hashmap_iter_next(&__HASHMAP_UNIQUE(x, it)) : __HASHMAP_ITER_RESET(&__HASHMAP_UNIQUE(x, it))) #define __HASHMAP_FOREACH_KEY(x, key, h) \ for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ (key = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))); \ hashmap_iter_next(&__HASHMAP_UNIQUE(x, it))) -#define __HASHMAP_FOREACH_KEY_SAFE(x, key, h, temp_ptr) \ +#define __HASHMAP_FOREACH_KEY_SAFE(x, key, h, pos) \ for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ - ((temp_ptr) = (void *)((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it)))); \ - ((temp_ptr) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ + ((pos) = (void *)((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it)))); \ + ((pos) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ hashmap_iter_next(&__HASHMAP_UNIQUE(x, it)) : __HASHMAP_ITER_RESET(&__HASHMAP_UNIQUE(x, it))) #define __HASHMAP_FOREACH_DATA(x, data, h) \ for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ (data = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); \ hashmap_iter_next(&__HASHMAP_UNIQUE(x, it))) -#define __HASHMAP_FOREACH_DATA_SAFE(x, data, h, temp_ptr) \ +#define __HASHMAP_FOREACH_DATA_SAFE(x, data, h, pos) \ for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ - ((temp_ptr) = (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) && \ + ((pos) = (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) && \ ((data) = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); \ - ((temp_ptr) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ + ((pos) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ hashmap_iter_next(&__HASHMAP_UNIQUE(x, it)) : __HASHMAP_ITER_RESET(&__HASHMAP_UNIQUE(x, it))) @@ -153,7 +153,7 @@ extern "C" { } while (0) /* - * Return the number of entries in the hash map. + * Return the number of entries in the hashmap. * * Parameters: * const HASHMAP(, ) *h - hashmap pointer @@ -161,6 +161,15 @@ extern "C" { #define hashmap_size(h) \ ((typeof((h)->map_base.size))(h)->map_base.size) +/* + * Return true if the hashmap is empty. + * + * Parameters: + * const HASHMAP(, ) *h - hashmap pointer + */ +#define hashmap_empty(h) \ + (hashmap_size(h) == 0) + /* * Set the hashmap's initial allocation size such that no rehashes are * required to fit the specified number of entries. @@ -175,8 +184,19 @@ extern "C" { hashmap_base_reserve(&(h)->map_base, capacity) /* - * Add a new entry to the hashmap. If an entry with a matching key - * already exists -EEXIST is returned. + * Get the hashmap's present allocation size. + * + * Parameters: + * HASHMAP(, ) *h - hashmap pointer + * + * Returns 0 on success, or -errno on failure. + */ +#define hashmap_capacity(h) \ + ((typeof((h)->map_base.table_size))(h)->map_base.table_size) + +/* + * Add a new entry to the hashmap. If an entry with a matching key is already + * present, -EEXIST is returned. * * Parameters: * HASHMAP(, ) *h - hashmap pointer @@ -191,6 +211,26 @@ extern "C" { hashmap_base_put(&(h)->map_base, (const void *)__map_key, (void *)__map_data); \ }) +/* + * Add a new entry to the hashmap, or update an existing entry. If an entry + * with a matching key is already present, its data is updated. If old_data + * is non-null, the previous data pointer is assigned to it. + * + * Parameters: + * HASHMAP(, ) *h - hashmap pointer + * *key - pointer to the entry's key + * *data - pointer to the entry's data + * **old_data - optional pointer to assign the previous data to + * + * Returns 1 on add, 0 on update, or -errno on failure. + */ +#define hashmap_insert(h, key, data, old_data) ({ \ + typeof((h)->map_types->t_key) __map_key = (key); \ + typeof((h)->map_types->t_data) __map_data = (data); \ + typeof((h)->map_types->t_data) *__map_old_data = (old_data); \ + hashmap_base_insert(&(h)->map_base, (const void *)__map_key, (void *)__map_data, (void **)__map_old_data); \ +}) + /* * Do a constant-time lookup of a hashmap entry. * @@ -205,6 +245,15 @@ extern "C" { (typeof((h)->map_types->t_data))hashmap_base_get(&(h)->map_base, (const void *)__map_key); \ }) +/* + * Return true if the hashmap contains an entry with the specified key. + * + * Parameters: + * const HASHMAP(, ) *h - hashmap pointer + */ +#define hashmap_contains(h, key) \ + (hashmap_get(h, key) != NULL) + /* * Remove an entry with the specified key from the map. * @@ -271,6 +320,20 @@ extern "C" { #define hashmap_iter_next(iter) \ hashmap_base_iter_next((iter)->iter_map, &(iter)->iter_pos) +/* + * This function behaves like hashmap_get(), but returns an iterator. + * This provides an efficient way to access and remove an entry without + * performing two lookups. + * + * Parameters: + * HASHMAP(, ) *h - hashmap pointer + * *key - pointer to the key to lookup + * + * Returns a valid iterator if the key exists, otherwise an invalid iterator. + */ +#define hashmap_iter_find(h, key) \ + ((HASHMAP_ITER(*(h))){ &(h)->map_base, hashmap_base_iter_find(&(h)->map_base, key) }) + /* * Remove the hashmap entry pointed to by this iterator and advance the * iterator to the next entry. @@ -310,7 +373,7 @@ extern "C" { */ #define hashmap_iter_set_data(iter, data) ({ \ (typeof((iter)->iter_types->t_data)) __map_data = (data); \ - hashmap_base_iter_set_data((iter)->iter_pos), (void *)__map_data); \ + hashmap_base_iter_set_data((iter)->iter_pos), (void *)__map_data); \ }) /* @@ -336,10 +399,10 @@ extern "C" { * const *key - key pointer assigned on each iteration * *data - data pointer assigned on each iteration * HASHMAP(, ) *h - hashmap pointer - * void *temp_ptr - opaque pointer assigned on each iteration + * void *pos - opaque pointer assigned on each iteration */ -#define hashmap_foreach_safe(key, data, h, temp_ptr) \ - __HASHMAP_FOREACH_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (key), (data), (h), (temp_ptr)) +#define hashmap_foreach_safe(key, data, h, pos) \ + __HASHMAP_FOREACH_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (key), (data), (h), (pos)) /* * Convenience macro to iterate through the keys of a hashmap. @@ -362,10 +425,10 @@ extern "C" { * Parameters: * const *key - key pointer assigned on each iteration * HASHMAP(, ) *h - hashmap pointer - * void *temp_ptr - opaque pointer assigned on each iteration + * void *pos - opaque pointer assigned on each iteration */ -#define hashmap_foreach_key_safe(key, h, temp_ptr) \ - __HASHMAP_FOREACH_KEY_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (key), (h), (temp_ptr)) +#define hashmap_foreach_key_safe(key, h, pos) \ + __HASHMAP_FOREACH_KEY_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (key), (h), (pos)) /* * Convenience macro to iterate through the data of a hashmap. @@ -388,10 +451,10 @@ extern "C" { * Parameters: * *data - data pointer assigned on each iteration * HASHMAP(, ) *h - hashmap pointer - * void *temp_ptr - opaque pointer assigned on each iteration + * void *pos - opaque pointer assigned on each iteration */ -#define hashmap_foreach_data_safe(data, h, temp_ptr) \ - __HASHMAP_FOREACH_DATA_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (data), (h), (temp_ptr)) +#define hashmap_foreach_data_safe(data, h, pos) \ + __HASHMAP_FOREACH_DATA_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (data), (h), (pos)) /* * Return the load factor. diff --git a/include/hashmap_base.h b/include/hashmap_base.h index a37725d..744331c 100644 --- a/include/hashmap_base.h +++ b/include/hashmap_base.h @@ -27,11 +27,12 @@ void hashmap_base_init(struct hashmap_base *hb, void hashmap_base_cleanup(struct hashmap_base *hb); void hashmap_base_set_key_alloc_funcs(struct hashmap_base *hb, - void *(*key_dup_func)(const void *), void (*key_free_func)(void *)); + void *(*key_dup_func)(const void *), void (*key_free_func)(void *)); int hashmap_base_reserve(struct hashmap_base *hb, size_t capacity); int hashmap_base_put(struct hashmap_base *hb, const void *key, void *data); +int hashmap_base_insert(struct hashmap_base *hb, const void *key, void *data, void **old_data); void *hashmap_base_get(const struct hashmap_base *hb, const void *key); void *hashmap_base_remove(struct hashmap_base *hb, const void *key); @@ -42,6 +43,7 @@ struct hashmap_entry *hashmap_base_iter(const struct hashmap_base *hb, const struct hashmap_entry *pos); bool hashmap_base_iter_valid(const struct hashmap_base *hb, const struct hashmap_entry *iter); bool hashmap_base_iter_next(const struct hashmap_base *hb, struct hashmap_entry **iter); +struct hashmap_entry *hashmap_base_iter_find(const struct hashmap_base *hb, const void *key); bool hashmap_base_iter_remove(struct hashmap_base *hb, struct hashmap_entry **iter); const void *hashmap_base_iter_get_key(const struct hashmap_entry *iter); void *hashmap_base_iter_get_data(const struct hashmap_entry *iter); @@ -55,4 +57,3 @@ double hashmap_base_collisions_variance(const struct hashmap_base *hb); size_t hashmap_hash_default(const void *data, size_t len); size_t hashmap_hash_string(const char *key); size_t hashmap_hash_string_i(const char *key); - diff --git a/src/hashmap.c b/src/hashmap.c index c388d9a..7a0a503 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -77,7 +77,7 @@ static inline size_t hashmap_calc_index(const struct hashmap_base *hb, const voi static struct hashmap_entry *hashmap_entry_get_populated(const struct hashmap_base *hb, const struct hashmap_entry *entry) { - if (hb->size > 0) { + if (hb->size > 0 && entry >= hb->table) { for (; entry < &hb->table[hb->table_size]; ++entry) { if (entry->key) { return (struct hashmap_entry *)entry; @@ -119,7 +119,7 @@ static struct hashmap_entry *hashmap_entry_find(const struct hashmap_base *hb, /* * Removes the specified entry and processes the following entries to - * keep the chain contiguous. This is a required step for hash maps + * keep the chain contiguous. This is a required step for hashmaps * using linear probing. */ static void hashmap_entry_remove(struct hashmap_base *hb, struct hashmap_entry *removed_entry) @@ -134,6 +134,7 @@ static void hashmap_entry_remove(struct hashmap_base *hb, struct hashmap_entry * if (hb->key_free) { hb->key_free(removed_entry->key); } + --hb->size; /* Fill the free slot in the chain */ @@ -183,12 +184,8 @@ static int hashmap_rehash(struct hashmap_base *hb, size_t table_size) hb->table_size = table_size; hb->table = new_table; - if (!old_table) { - return 0; - } - /* Rehash */ - for (entry = old_table; entry < &old_table[old_size]; ++entry) { + for (entry = old_table; entry < old_table + old_size; ++entry) { if (!entry->key) { continue; } @@ -261,6 +258,8 @@ void hashmap_base_set_key_alloc_funcs(struct hashmap_base *hb, void *(*key_dup_func)(const void *), void (*key_free_func)(void *)) { + assert(hb->size == 0); + hb->key_dup = key_dup_func; hb->key_free = key_free_func; } @@ -293,7 +292,7 @@ int hashmap_base_reserve(struct hashmap_base *hb, size_t capacity) /* * Add a new entry to the hashmap. If an entry with a matching key - * already exists -EEXIST is returned. + * is already present, -EEXIST is returned. * Returns 0 on success, or -errno on failure. */ int hashmap_base_put(struct hashmap_base *hb, const void *key, void *data) @@ -345,6 +344,70 @@ int hashmap_base_put(struct hashmap_base *hb, const void *key, void *data) return 0; } +/* + * Add a new entry to the hashmap, or update an existing entry. If an entry + * with a matching key is already present, its data is updated. If old_data + * is non-null, the previous data pointer is assigned to it. + * Returns 1 on add, 0 on update, or -errno on failure. + */ +int hashmap_base_insert(struct hashmap_base *hb, const void *key, void *data, void **old_data) +{ + struct hashmap_entry *entry; + size_t table_size; + int r = 0; + + if (!key || !data) { + return -EINVAL; + } + + /* Preemptively rehash with 2x capacity if load factor is approaching 0.75 */ + table_size = hashmap_calc_table_size(hb, hb->size); + if (table_size > hb->table_size) { + r = hashmap_rehash(hb, table_size); + } + + /* Get the entry for this key */ + entry = hashmap_entry_find(hb, key, true); + if (!entry) { + /* + * Cannot find an empty slot. Either out of memory, + * or hash or compare functions are malfunctioning. + */ + if (r < 0) { + /* Return rehash error, if set */ + return r; + } + return -EADDRNOTAVAIL; + } + + if (!entry->key) { + /* Adding a new entry */ + if (hb->key_dup) { + /* Allocate copy of key to simplify memory management */ + entry->key = hb->key_dup(key); + if (!entry->key) { + return -ENOMEM; + } + } else { + entry->key = (void *)key; + } + ++hb->size; + r = 1; + } + + /* Assign the previous data pointer if data was updated, otherwise NULL */ + if (old_data) { + if (data == entry->data) { + *old_data = NULL; + } else { + *old_data = entry->data; + } + } + + entry->data = data; + return r; +} + /* * Return the data pointer, or NULL if no entry exists. */ @@ -436,7 +499,7 @@ struct hashmap_entry *hashmap_base_iter(const struct hashmap_base *hb, */ bool hashmap_base_iter_valid(const struct hashmap_base *hb, const struct hashmap_entry *iter) { - return hb && iter && iter->key && iter >= hb->table && iter < &hb->table[hb->table_size]; + return hb && iter && iter->key && iter >= hb->table && iter < hb->table + hb->table_size; } /* @@ -448,7 +511,20 @@ bool hashmap_base_iter_next(const struct hashmap_base *hb, struct hashmap_entry if (!*iter) { return false; } - return (*iter = hashmap_entry_get_populated(hb, *iter + 1)) != NULL; + *iter = hashmap_entry_get_populated(hb, *iter + 1); + return *iter != NULL; +} + +/* + * Returns an iterator to the hashmap entry with the specified key. + * Returns NULL if there is no matching entry. + */ +struct hashmap_entry *hashmap_base_iter_find(const struct hashmap_base *hb, const void *key) +{ + if (!key) { + return NULL; + } + return hashmap_entry_find(hb, key, false); } /* @@ -465,7 +541,8 @@ bool hashmap_base_iter_remove(struct hashmap_base *hb, struct hashmap_entry **it /* Remove entry if iterator is valid */ hashmap_entry_remove(hb, *iter); } - return (*iter = hashmap_entry_get_populated(hb, *iter)) != NULL; + *iter = hashmap_entry_get_populated(hb, *iter); + return *iter != NULL; } /* diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt deleted file mode 100644 index e8f53bc..0000000 --- a/test/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cmake_minimum_required(VERSION 3.5) - -# Hashmap unit test -add_executable(hashmap_test hashmap_test.c) -target_compile_options(hashmap_test PRIVATE $<$:-Wall -Werror>) -target_link_libraries(hashmap_test PRIVATE HashMap::HashMap) - -# Register with CTest -add_test(NAME hashmap_test COMMAND hashmap_test) - diff --git a/test/hashmap_test.c b/test/hashmap_test.c deleted file mode 100644 index 73949aa..0000000 --- a/test/hashmap_test.c +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Copyright (c) 2016-2020 David Leeds - * - * Hashmap is free software; you can redistribute it and/or modify - * it under the terms of the MIT license. See LICENSE for details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0])) - -#define TEST_NUM_KEYS 196607 /* Results in max load factor */ -#define TEST_KEY_STR_LEN 32 - -void **keys_str_random; -void **keys_str_sequential; -void **keys_int_random; -void **keys_int_sequential; - -HASHMAP(char, void) str_map; -HASHMAP(uint64_t, uint64_t) int_map; -typedef HASHMAP(void, void) hashmap_void_t; - -struct test { - const char *name; - const char *description; - bool (*run)(hashmap_void_t *map, void **keys); - bool pre_load; -}; - - - -uint64_t time_mono_us(void) -{ - struct timespec now; - - if (clock_gettime(CLOCK_MONOTONIC, &now)) { - assert(0); - } - return ((uint64_t)now.tv_sec) * 1000000 + (uint64_t)(now.tv_nsec / 1000); -} - -void **test_keys_alloc(size_t num) -{ - void **keys; - - keys = (void **)calloc(num, sizeof(void *)); - if (!keys) { - printf("malloc failed\n"); - exit(1); - } - return keys; -} - -void *test_key_alloc_random_str(void) -{ - size_t i; - unsigned num; - char *key; - - key = (char *)malloc(TEST_KEY_STR_LEN + 1); - if (!key) { - printf("malloc failed\n"); - exit(1); - } - for (i = 0; i < TEST_KEY_STR_LEN; ++i) { - num = random(); - num = (num % 96) + 32; /* ASCII printable only */ - key[i] = (char)num; - } - key[TEST_KEY_STR_LEN] = '\0'; - return key; -} -void *test_key_alloc_random_int(void) -{ - uint64_t *key; - - key = (uint64_t *)malloc(sizeof(*key)); - if (!key) { - printf("malloc failed\n"); - exit(1); - } - /* RAND_MAX is not guaranteed to be more than 32K */ - *key = ((uint64_t)(random() & 0xffff) << 48) | - ((uint64_t)(random() & 0xffff) << 32) | - ((uint64_t)(random() & 0xffff) << 16) | - (uint64_t)(random() & 0xffff); - return key; -} - -void *test_key_alloc_sequential_str(size_t index) -{ - char *key; - - key = (char *)malloc(TEST_KEY_STR_LEN + 1); - if (!key) { - printf("malloc failed\n"); - exit(1); - } - snprintf(key, TEST_KEY_STR_LEN + 1, "sequential key! %010zu", index); - return key; -} - -void *test_key_alloc_sequential_int(size_t index) -{ - uint64_t *key; - - key = (uint64_t *)malloc(sizeof(*key)); - if (!key) { - printf("malloc failed\n"); - exit(1); - } - *key = index; - return key; -} - -void test_keys_generate(void) -{ - size_t i; - - srandom(99); /* Use reproducible random sequences */ - - keys_str_random = test_keys_alloc(TEST_NUM_KEYS + 1); - keys_str_sequential = test_keys_alloc(TEST_NUM_KEYS + 1); - keys_int_random = test_keys_alloc(TEST_NUM_KEYS + 1); - keys_int_sequential = test_keys_alloc(TEST_NUM_KEYS + 1); - for (i = 0; i < TEST_NUM_KEYS; ++i) { - keys_str_random[i] = test_key_alloc_random_str(); - keys_str_sequential[i] = test_key_alloc_sequential_str(i); - keys_int_random[i] = test_key_alloc_random_int(); - keys_int_sequential[i] = test_key_alloc_sequential_int(i); - } - keys_str_random[i] = NULL; - keys_str_sequential[i] = NULL; - keys_int_random[i] = NULL; - keys_int_sequential[i] = NULL; -} - -void test_load_keys(hashmap_void_t *map, void **keys) -{ - void **key; - int r; - - for (key = keys; *key; ++key) { - r = hashmap_put(map, *key, *key); - if (r < 0) { - printf("hashmap_put() failed: %s\n", strerror(-r)); - exit(1); - } - } -} - -void test_reset_map(hashmap_void_t *map) -{ - hashmap_reset(map); -} - -void test_print_stats(hashmap_void_t *map, const char *label) -{ - printf("Hashmap stats: %s\n", label); - printf(" # entries: %zu\n", hashmap_size(map)); - printf(" Table size: %zu\n", map->map_base.table_size); - printf(" Load factor: %.4f\n", hashmap_load_factor(map)); - printf(" Collisions mean: %.4f\n", hashmap_collisions_mean(map)); - printf(" Collisions variance: %.4f\n", hashmap_collisions_variance(map)); - -} - -bool test_run(hashmap_void_t *map, void **keys, const struct test *t) -{ - bool success; - uint64_t time_us; - - assert(t != NULL); - assert(t->name != NULL); - assert(t->run != NULL); - - if (t->pre_load) { - printf("Pre-loading keys..."); - test_load_keys(map, keys); - printf("done\n"); - } - printf("Running...\n"); - time_us = time_mono_us(); - success = t->run(map, keys); - time_us = time_mono_us() - time_us; - if (success) { - printf("Completed successfully\n"); - } else { - printf("FAILED\n"); - } - printf("Run time: %llu microseconds\n", (long long unsigned)time_us); - test_print_stats(map, t->name); - test_reset_map(map); - return success; -} - -bool test_run_all(hashmap_void_t *map, void **keys, - const struct test *tests, size_t num_tests, const char *env) -{ - const struct test *t; - size_t num_failed = 0; - - printf("\n**************************************************\n"); - printf("Starting test series:\n"); - printf(" %s\n", env); - printf("**************************************************\n\n"); - for (t = tests; t < &tests[num_tests]; ++t) { - printf("\n**************************************************\n"); - printf("Test %02u: %s\n", (unsigned)(t - tests) + 1, t->name); - if (t->description) { - printf(" Description: %s\n", t->description); - } - printf("\n"); - if (!test_run(map, keys, t)) { - ++num_failed; - } - } - printf("\n**************************************************\n"); - printf("Test results:\n"); - printf(" Passed: %zu\n", num_tests - num_failed); - printf(" Failed: %zu\n", num_failed); - printf("**************************************************\n"); - return (num_failed == 0); -} - -/* - * Worst case hash function. - */ -size_t test_hash_uint64_bad1(const uint64_t *key) -{ - return 999; -} - -/* - * Potentially bad hash function. Depending on the linear probing - * implementation, this could cause clustering and long chains when - * consecutive numeric keys are loaded. - */ -size_t test_hash_uint64_bad2(const uint64_t *key) -{ - return *key; -} - -/* - * Potentially bad hash function. Depending on the linear probing - * implementation, this could cause clustering and long chains when - * consecutive numeric keys are loaded. - */ -size_t test_hash_uint64_bad3(const uint64_t *key) -{ - return *key + *key; -} - -/* - * Use generic hash algorithm supplied by the hashmap library. - */ -size_t test_hash_uint64(const uint64_t *key) -{ - return hashmap_hash_default(key, sizeof(*key)); -} - -int test_compare_uint64(const uint64_t *a, const uint64_t *b) -{ - return memcmp(a, b, sizeof(uint64_t)); -} - -bool test_put(hashmap_void_t *map, void **keys) -{ - void **key; - int r; - - for (key = keys; *key; ++key) { - r = hashmap_put(map, *key, *key); - if (r < 0) { - printf("hashmap_put failed: %s\n", strerror(-r)); - return false; - } - } - return true; -} - -bool test_put_existing(hashmap_void_t *map, void **keys) -{ - void **key; - int r; - int temp_data = 99; - - for (key = keys; *key; ++key) { - r = hashmap_put(map, *key, &temp_data); - if (r != -EEXIST) { - printf("did not return existing data: %s\n", strerror(-r)); - return false; - } - } - return true; -} - -bool test_get(hashmap_void_t *map, void **keys) -{ - void **key; - void *data; - - for (key = keys; *key; ++key) { - data = hashmap_get(map, *key); - if (!data) { - printf("entry not found\n"); - return false; - } - if (data != *key) { - printf("got wrong entry\n"); - return false; - } - } - return true; -} - -bool test_get_nonexisting(hashmap_void_t *map, void **keys) -{ - void **key; - void *data; - const char *fake_key = "test_get_nonexisting fake key!"; - - for (key = keys; *key; ++key) { - data = hashmap_get(map, fake_key); - if (data) { - printf("unexpected entry found\n"); - return false; - } - } - return true; -} - -bool test_remove(hashmap_void_t *map, void **keys) -{ - void **key; - void *data; - - for (key = keys; *key; ++key) { - data = hashmap_remove(map, *key); - if (!data) { - printf("entry not found\n"); - return false; - } - if (data != *key) { - printf("removed wrong entry\n"); - return false; - } - } - return true; -} - -bool test_put_remove(hashmap_void_t *map, void **keys) -{ - size_t i = 0; - void **key; - void *data; - int r; - - if (!test_put(map, keys)) { - return false; - } - for (key = keys; *key; ++key) { - if (i++ >= TEST_NUM_KEYS / 2) { - break; - } - data = hashmap_remove(map, *key); - if (!data) { - printf("key not found\n"); - return false; - } - if (data != *key) { - printf("removed wrong entry\n"); - return false; - } - } - test_print_stats(map, "test_put_remove done"); - i = 0; - for (key = keys; *key; ++key) { - if (i++ >= TEST_NUM_KEYS / 2) { - break; - } - r = hashmap_put(map, *key, *key); - if (r < 0) { - printf("hashmap_put failed: %s\n", strerror(-r)); - return false; - } - } - return true; -} - -bool test_iterate(hashmap_void_t *map, void **keys) -{ - size_t i = 0; - const void *key; - void *data; - - hashmap_foreach(key, data, map) { - ++i; - if (!key) { - printf("key %zu is NULL\n", i); - return false; - } - if (!data) { - printf("data %zu is NULL\n", i); - return false; - } - } - if (i != TEST_NUM_KEYS) { - printf("did not iterate through all entries: " - "observed %zu, expected %u\n", i, TEST_NUM_KEYS); - return false; - } - return true; -} - -bool test_iterate_remove(hashmap_void_t *map, void **keys) -{ - size_t i = 0; - const void *key; - void *data, *temp; - - hashmap_foreach_safe(key, data, map, temp) { - ++i; - if (hashmap_get(map, key) != data) { - printf("invalid iterator on entry #%zu\n", i); - return false; - } - if (hashmap_remove(map, key) != data) { - printf("key/data mismatch %zu: %p != %p\n", i, key, data); - } - } - if (i != TEST_NUM_KEYS) { - printf("did not iterate through all entries: " - "observed %zu, expected %u\n", i, TEST_NUM_KEYS); - return false; - } - return true; -} - -bool test_iterate_remove_odd(hashmap_void_t *map, void **keys) -{ - size_t size = hashmap_size(map); - size_t i = 0; - size_t removed = 0; - const void *key; - void *temp; - - hashmap_foreach_key_safe(key, map, temp) { - if (i & 1) { - /* Remove odd indices */ - if (!hashmap_remove(map, key)) { - printf("could not remove expected key\n"); - return false; - } - ++removed; - } - ++i; - } - - if (hashmap_size(map) != size - removed) { - printf("foreach delete did not remove expected # of entries: " - "contains %zu vs. expected %zu\n", hashmap_size(map), - size - removed); - return false; - } - return true; -} - -bool test_clear(hashmap_void_t *map, void **keys) -{ - hashmap_clear(map); - return true; -} - -bool test_reset(hashmap_void_t *map, void **keys) -{ - hashmap_reset(map); - return true; -} - -const struct test tests[] = { - { - .name = "put performance", - .description = "put new hash keys", - .run = test_put - }, - { - .name = "put existing performance", - .description = "attempt to put existing hash keys", - .run = test_put_existing, - .pre_load = true - }, - { - .name = "get existing performance", - .description = "get existing hash keys", - .run = test_get, - .pre_load = true - }, - { - .name = "get non-existing performance", - .description = "get nonexistent hash keys", - .run = test_get_nonexisting, - .pre_load = true - }, - { - .name = "remove performance", - .description = "remove hash keys", - .run = test_remove, - .pre_load = true - }, - { - .name = "mixed put/remove performance", - .description = "put, remove 1/2, then put them back", - .run = test_put_remove - }, - { - .name = "iterate performance", - .description = "iterate through entries", - .run = test_iterate, - .pre_load = true - }, - { - .name = "iterate remove all", - .description = "iterate and remove all entries", - .run = test_iterate_remove, - .pre_load = true - }, - { - .name = "iterate remove odd indices", - .description = "iterate and delete alternate entries", - .run = test_iterate_remove_odd, - .pre_load = true - }, - { - .name = "clear performance", - .description = "clear entries", - .run = test_clear, - .pre_load = true - }, - { - .name = "reset performance", - .description = "reset entries", - .run = test_reset, - .pre_load = true - } -}; - -/* - * Main function - */ -int main(int argc, char **argv) -{ - bool success = true; - - /* Initialize */ - printf("Initializing hash maps...\n"); - hashmap_init(&str_map, hashmap_hash_string, strcmp); - -// hashmap_set_key_alloc_funcs(&str_map, strdup, (void(*)(char *))free); - - hashmap_init(&int_map, test_hash_uint64_bad2, test_compare_uint64); - - printf("Generating test %u test keys...", TEST_NUM_KEYS); - test_keys_generate(); - printf("done\n"); - - printf("Running tests\n\n"); - success &= test_run_all((hashmap_void_t *)&str_map, keys_str_random, tests, - ARRAY_SIZE(tests), "Hashmap w/randomized string keys"); - success &= test_run_all((hashmap_void_t *)&str_map, keys_str_sequential, tests, - ARRAY_SIZE(tests), "Hashmap w/sequential string keys"); - - success &= test_run_all((hashmap_void_t *)&int_map, keys_int_random, tests, - ARRAY_SIZE(tests), "Hashmap w/randomized integer keys"); - - success &= test_run_all((hashmap_void_t *)&int_map, keys_int_sequential, tests, - ARRAY_SIZE(tests), "Hashmap w/sequential integer keys"); - - printf("\nTests finished\n"); - - hashmap_cleanup(&str_map); - hashmap_cleanup(&int_map); - - if (!success) { - printf("Tests FAILED\n"); - exit(1); - } - return 0; -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..55c2b3f --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.19) +project(hashmap_test) + +set(CMAKE_C_STANDARD 11) +set(CMAKE_CXX_STANDARD 20) + +include(FetchContent) + +# Fetch Catch2 unit test framework at configure time +FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.8.1 + GIT_SHALLOW ON +) +FetchContent_MakeAvailable(Catch2) +list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) +include(CTest) +include(Catch) + +# Fetch Sanitizer CMake helpers +FetchContent_Declare( + SanitizersCMake + GIT_REPOSITORY https://github.com/arsenm/sanitizers-cmake.git + GIT_SHALLOW ON +) +FetchContent_GetProperties(SanitizersCMake) +if(NOT SanitizersCMake_POPULATED) + FetchContent_Populate(SanitizersCMake) + list(APPEND CMAKE_MODULE_PATH ${sanitizerscmake_SOURCE_DIR}/cmake) +endif() + +# Enable ASan and UBSan +find_package(Sanitizers) +set(SANITIZE_ADDRESS TRUE) +set(SANITIZE_UNDEFINED TRUE) + +# Hashmap unit test +add_executable(hashmap_test hashmap_test.cpp) +target_compile_options(hashmap_test PRIVATE -Wall -Werror) +target_link_libraries(hashmap_test PRIVATE Catch2::Catch2WithMain HashMap::HashMap) +add_sanitizers(hashmap_test) +catch_discover_tests(hashmap_test) diff --git a/tests/hashmap_test.cpp b/tests/hashmap_test.cpp new file mode 100644 index 0000000..be80842 --- /dev/null +++ b/tests/hashmap_test.cpp @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2025 David Leeds + * + * Hashmap is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include + +#include +#include + +#include + +using namespace std::literals; + +static std::unordered_map make_kvs(size_t count) +{ + std::unordered_map input; + + for (size_t i = 0; i < count; ++i) { + input.emplace("key" + std::to_string(i), "value" + std::to_string(i)); + } + + return input; +} + +static void fill_map(auto *map, const std::unordered_map &kvs) +{ + hashmap_clear(map); + + for (auto &[k, v] : kvs) { + CAPTURE(k, v); + REQUIRE(hashmap_put(map, k.c_str(), v.c_str()) == 0); + } +} + +TEST_CASE("hashmap", "[hashmap]") { + /* Create a hashmap with string keys and values */ + HASHMAP(char, const char) map; + hashmap_init(&map, hashmap_hash_string, strcmp); + + SECTION("initial state") { + REQUIRE(hashmap_empty(&map)); + REQUIRE(hashmap_size(&map) == 0); + + /* No allocation is performed prior to use */ + REQUIRE(hashmap_capacity(&map) == 0); + } + + SECTION("reserve") { + /* Reserve space for at least 1000 elements */ + constexpr size_t CAPACITY = 1000; + REQUIRE(hashmap_reserve(&map, CAPACITY) == 0); + + /* Check that at least the requested capacity was allocated */ + REQUIRE(hashmap_capacity(&map) >= CAPACITY); + } + + SECTION("put and get") { + /* Input is large enough to prompt rehashes */ + auto input = make_kvs(1000); + + for (auto &[k, v] : input) { + CAPTURE(k, v); + REQUIRE(hashmap_put(&map, k.c_str(), v.c_str()) == 0); + } + + REQUIRE_FALSE(hashmap_empty(&map)); + REQUIRE(hashmap_size(&map) == input.size()); + + for (auto &[k, v] : input) { + CAPTURE(k, v); + REQUIRE(hashmap_get(&map, k.c_str()) == v); + } + } + + SECTION("insert and get") { + /* Input is large enough to prompt rehashes */ + auto input = make_kvs(1000); + + for (auto &[k, v] : input) { + CAPTURE(k, v); + REQUIRE(hashmap_insert(&map, k.c_str(), v.c_str(), nullptr) == 1); + } + + REQUIRE_FALSE(hashmap_empty(&map)); + REQUIRE(hashmap_size(&map) == input.size()); + + for (auto &[k, v] : input) { + CAPTURE(k, v); + REQUIRE(hashmap_get(&map, k.c_str()) == v); + } + } + + SECTION("put duplicate entry") { + REQUIRE(hashmap_put(&map, "key1", "value1") == 0); + REQUIRE(hashmap_put(&map, "key1", "value2") == -EEXIST); + REQUIRE(hashmap_size(&map) == 1); + } + + SECTION("insert duplicate entry") { + const char *val1 = "value1"; + const char *val2 = "value2"; + const char *old_val; + + /* New key */ + old_val = "invalid"; + REQUIRE(hashmap_insert(&map, "key1", val1, &old_val) == 1); + REQUIRE(old_val == nullptr); + + /* Existing key, same value */ + old_val = "invalid"; + REQUIRE(hashmap_insert(&map, "key1", val1, &old_val) == 0); + REQUIRE(old_val == nullptr); + + /* Existing key, new value */ + old_val = "invalid"; + REQUIRE(hashmap_insert(&map, "key1", val2, &old_val) == 0); + REQUIRE(old_val == val1); + + REQUIRE(hashmap_size(&map) == 1); + } + + SECTION("get nonexistent entry") { + /* Empty map */ + REQUIRE(hashmap_get(&map, "key1") == nullptr); + + /* Non-empty map */ + REQUIRE(hashmap_put(&map, "key2", "value2") == 0); + REQUIRE(hashmap_get(&map, "key1") == nullptr); + } + + + SECTION("contains") { + REQUIRE(hashmap_put(&map, "key1", "value1") == 0); + + REQUIRE(hashmap_contains(&map, "key1")); + REQUIRE_FALSE(hashmap_contains(&map, "key2")); + } + + SECTION("remove") { + auto input = make_kvs(1000); + + fill_map(&map, input); + + size_t remaining = input.size(); + for (auto &[k, v] : input) { + CAPTURE(k, v); + + REQUIRE(hashmap_size(&map) == remaining); + REQUIRE(hashmap_get(&map, k.c_str()) == v); + + REQUIRE(hashmap_remove(&map, k.c_str()) == v); + --remaining; + + REQUIRE(hashmap_get(&map, k.c_str()) == nullptr); + REQUIRE(hashmap_size(&map) == remaining); + } + } + + SECTION("clear") { + auto input = make_kvs(1000); + + size_t empty_capacity = hashmap_capacity(&map); + + fill_map(&map, input); + + size_t full_capacity = hashmap_capacity(&map); + + hashmap_clear(&map); + + size_t cleared_capacity = hashmap_capacity(&map); + + /* All elements removed */ + REQUIRE(hashmap_empty(&map)); + + /* Should not reduce allocated space */ + REQUIRE(full_capacity > empty_capacity); + REQUIRE(cleared_capacity == full_capacity); + } + + SECTION("reset") { + auto input = make_kvs(1000); + + size_t empty_capacity = hashmap_capacity(&map); + + fill_map(&map, input); + + size_t full_capacity = hashmap_capacity(&map); + + hashmap_reset(&map); + + size_t cleared_capacity = hashmap_capacity(&map); + + /* All elements removed */ + REQUIRE(hashmap_empty(&map)); + + /* Should reset allocated space to a smaller initial size */ + REQUIRE(full_capacity > empty_capacity); + REQUIRE(cleared_capacity >= empty_capacity); + REQUIRE(cleared_capacity < full_capacity); + } + + SECTION("iteration with iterator") { + auto input = make_kvs(200); + + fill_map(&map, input); + + HASHMAP_ITER(map) iter = hashmap_iter(&map); + + size_t count = 0; + do { + REQUIRE(hashmap_iter_valid(&iter)); + + const char *k = hashmap_iter_get_key(&iter); + const char *v = hashmap_iter_get_data(&iter); + + REQUIRE(k != nullptr); + REQUIRE(v != nullptr); + + REQUIRE(input.contains(k)); + REQUIRE(input.at(k) == v); + + ++count; + } while (hashmap_iter_next(&iter)); + + REQUIRE(count == input.size()); + } + + SECTION("iteration with iterator and remove all") { + auto input = make_kvs(200); + + fill_map(&map, input); + + HASHMAP_ITER(map) iter = hashmap_iter(&map); + + size_t count = 0; + + while (hashmap_iter_valid(&iter)) { + const char *k = hashmap_iter_get_key(&iter); + const char *v = hashmap_iter_get_data(&iter); + + REQUIRE(k != nullptr); + REQUIRE(v != nullptr); + + REQUIRE(input.contains(k)); + REQUIRE(input.at(k) == v); + + hashmap_iter_remove(&iter); + + ++count; + } + + REQUIRE(count == input.size()); + REQUIRE(hashmap_empty(&map)); + } + + SECTION("iteration with iterator and remove some") { + auto input = make_kvs(200); + + fill_map(&map, input); + + HASHMAP_ITER(map) iter = hashmap_iter(&map); + + size_t count = 0; + + while (hashmap_iter_valid(&iter)) { + const char *k = hashmap_iter_get_key(&iter); + const char *v = hashmap_iter_get_data(&iter); + + REQUIRE(k != nullptr); + REQUIRE(v != nullptr); + + REQUIRE(input.contains(k)); + REQUIRE(input.at(k) == v); + + /* Remove every other entry */ + if (count % 2 == 0) { + hashmap_iter_remove(&iter); + } else { + hashmap_iter_next(&iter); + } + + ++count; + } + + REQUIRE(count == input.size()); + REQUIRE(hashmap_size(&map) == input.size() / 2); + } + + SECTION("find with iterator") { + HASHMAP_ITER(map) iter; + + REQUIRE(hashmap_put(&map, "key1", "value1") == 0); + + /* Found */ + iter = hashmap_iter_find(&map, "key1"); + REQUIRE(hashmap_iter_valid(&iter)); + REQUIRE(hashmap_iter_get_key(&iter) == "key1"s); + REQUIRE(hashmap_iter_get_data(&iter) == "value1"s); + + /* Not found */ + iter = hashmap_iter_find(&map, "key2"); + REQUIRE_FALSE(hashmap_iter_valid(&iter)); + } + + SECTION("iteration with foreach macros") { + auto input = make_kvs(200); + + fill_map(&map, input); + + const char *key; + const char *value; + + /* foreach */ + { + size_t count = 0; + hashmap_foreach(key, value, &map) { + REQUIRE(key != nullptr); + REQUIRE(value != nullptr); + + REQUIRE(input.contains(key)); + REQUIRE(input.at(key) == value); + + ++count; + } + REQUIRE(count == input.size()); + } + + /* foreach_key */ + { + size_t count = 0; + hashmap_foreach_key(key, &map) { + REQUIRE(key != nullptr); + REQUIRE(input.contains(key)); + ++count; + } + REQUIRE(count == input.size()); + } + + /* foreach_data */ + { + size_t count = 0; + hashmap_foreach_data(value, &map) { + REQUIRE(value != nullptr); + ++count; + } + REQUIRE(count == input.size()); + } + } + + SECTION("iteration and removal with safe foreach macros") { + auto input = make_kvs(200); + + const char *key; + const char *value; + const void *pos; + + /* safe foreach */ + { + size_t count = 0; + + fill_map(&map, input); + + hashmap_foreach_safe(key, value, &map, pos) { + REQUIRE(key != nullptr); + REQUIRE(value != nullptr); + + REQUIRE(input.contains(key)); + REQUIRE(input.at(key) == value); + + /* Remove every other entry */ + if (count % 2 == 0) { + hashmap_remove(&map, key); + } + + ++count; + } + REQUIRE(count == input.size()); + REQUIRE(hashmap_size(&map) == input.size() / 2); + } + + /* safe foreach_key */ + { + size_t count = 0; + + fill_map(&map, input); + + hashmap_foreach_key_safe(key, &map, pos) { + REQUIRE(key != nullptr); + REQUIRE(input.contains(key)); + + /* Remove every other entry */ + if (count % 2 == 1) { + hashmap_remove(&map, key); + } + + ++count; + } + REQUIRE(count == input.size()); + REQUIRE(hashmap_size(&map) == input.size() / 2); + } + + /* safe foreach_data */ + { + size_t count = 0; + + fill_map(&map, input); + + hashmap_foreach_data_safe(value, &map, pos) { + REQUIRE(value != nullptr); + ++count; + } + REQUIRE(count == input.size()); + REQUIRE(hashmap_size(&map) == input.size()); + } + } + + SECTION("internal key allocation") { + const char *key = "key1"; + auto strfree = [](char *k) { free(k); }; + + hashmap_set_key_alloc_funcs(&map, strdup, strfree); + + REQUIRE(hashmap_put(&map, key, key) == 0); + + auto iter = hashmap_iter_find(&map, key); + + REQUIRE(hashmap_iter_valid(&iter)); + REQUIRE(hashmap_iter_get_key(&iter) != key); + + hashmap_iter_remove(&iter); + } + + SECTION("bad hash functions") { + auto cmp = [](const int *a, const int *b) -> int { return a - b; }; + + static std::unordered_map input; + + for (int i = 0; i < 200; ++i) { + input.emplace(i, "value" + std::to_string(i)); + } + + /* Should be functional (albeit slower) when poor hash functions are used */ + auto test = [&](size_t (*hash)(const int *)) { + HASHMAP(int, const char) int_map; + hashmap_init(&int_map, hash, cmp); + + /* Put */ + for (auto &[k, v] : input) { + CAPTURE(k, v); + REQUIRE(hashmap_put(&int_map, &k, v.c_str()) == 0); + } + + /* Get */ + for (auto &[k, v] : input) { + CAPTURE(k, v); + REQUIRE(hashmap_get(&int_map, &k) == v); + } + + /* Remove */ + for (auto &[k, v] : input) { + CAPTURE(k); + REQUIRE(hashmap_remove(&int_map, &k) == v); + } + }; + + SECTION("worst") { + /* Hash lookup collides with every entry */ + auto hash = [](const int *) -> size_t { return 0; }; + + test(hash); + } + + SECTION("bad 1") { + /* Could cause clustering depending on implementation */ + auto hash = [](const int *k) -> size_t { return *k; }; + + test(hash); + } + + SECTION("bad 2") { + /* Could cause clustering depending on implementation */ + auto hash = [](const int *k) -> size_t { return *k + *k; }; + + test(hash); + } + } + + hashmap_cleanup(&map); +}