diff --git a/bench/bench_performance.py b/bench/bench_performance.py
new file mode 100644
index 0000000..247f0ce
--- /dev/null
+++ b/bench/bench_performance.py
@@ -0,0 +1,403 @@
+#!env python3
+# -*- coding: utf-8 -*-
+# pylint: disable=wrong-import-position, too-many-instance-attributes, line-too-long
+# pylint: disable=too-many-positional-arguments, too-many-arguments, too-many-locals
+'''bench_performance.py
+
+Benchmark for the Generalized Trie implementation.
+
+This script runs a series of tests to measure the performance of the Generalized Trie
+against a set of predefined test cases.
+
+Usage::
+
+```shell
+pytest benchmark_performance.py
+ pytest benchmark_performance.py --benchmark-sort=name \\
+ --benchmark-group-by=func \\
+ --benchmark-histogram='histogram/benchmark' \\
+ --benchmark-time-unit=ns
+```
+
+See the documentation for pytest-benchmark at https://pytest-benchmark.readthedocs.io/
+for more information on how to use it.
+'''
+import gc
+import itertools
+import time
+from typing import Any, Optional, Sequence
+
+
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path('../src').resolve()))
+
+import pytest # noqa: E402
+
+from gentrie import GeneralizedTrie, GeneralizedKey # noqa: E402
+
+# More robust benchmark configuration
+BENCHMARK_CONFIG: dict[str, Any] = {
+ 'warmup': True,
+ 'min_rounds': 100,
+ 'min_time': 1,
+ 'max_time': 10,
+ 'timer': time.perf_counter_ns
+}
+
+# Apply to all benchmarks
+# pytestmark = pytest.mark.benchmark(**BENCHMARK_CONFIG)
+
+SYMBOLS: str = '0123' # Define the symbols for the trie
+
+
+def generate_test_data(depth: int, symbols: str, max_keys: int) -> list[str]:
+ '''Generate test data for the Generalized Trie.
+
+ Args:
+ depth (int): The depth of the keys to generate.
+ symbols (str): The symbols to use in the keys.
+ max_keys (int): The maximum number of keys to generate.'''
+ test_data: list[str] = []
+ for key in itertools.product(symbols, repeat=depth):
+ key_string = ''.join(key)
+ test_data.append(key_string)
+ if len(test_data) >= max_keys:
+ break
+ return test_data
+
+
+TEST_DATA: dict[int, list[str]] = {}
+TEST_DEPTHS: list[int] = [2, 3, 4, 5, 6, 7, 8, 9] # Depths to test - '1' is generally omitted due to low key count
+TEST_MAX_KEYS: int = len(SYMBOLS) ** max(TEST_DEPTHS) # Limit to a manageable number of keys
+for gen_depth in TEST_DEPTHS:
+ max_keys_for_depth = len(SYMBOLS) ** gen_depth # pylint: disable=invalid-name
+ TEST_DATA[gen_depth] = generate_test_data(gen_depth, SYMBOLS, max_keys=max_keys_for_depth)
+
+
+def generate_test_trie(depth: int, symbols: str, max_keys: int, value: Optional[Any] = None) -> GeneralizedTrie:
+ '''Generate a test Generalized Trie for the given depth and symbols.
+
+ Args:
+ depth (int): The depth of the trie.
+ symbols (str): The symbols to use in the trie.
+ max_keys (int): The maximum number of keys to generate.
+ value (Optional[Any]): The value to assign to each key in the trie.
+ '''
+ test_data = generate_test_data(depth, symbols, max_keys)
+ trie = GeneralizedTrie(runtime_validation=False)
+
+ for key in test_data:
+ trie[key] = value
+ return trie
+
+
+def generate_test_trie_from_data(data: Sequence[GeneralizedKey], value: Optional[Any] = None) -> GeneralizedTrie:
+ '''Generate a test Generalized Trie from the passed Sequence of GeneralizedKey.
+
+ Args:
+ data (Sequence[GeneralizedKey]): The sequence of keys to insert into the trie.
+ value (Optional[Any]): The value to assign to each key in the trie.
+ '''
+ trie = GeneralizedTrie(runtime_validation=False)
+ for key in data:
+ trie[key] = value
+ return trie
+
+
+# We generate the TEST_TRIES from the TEST_DATA for synchronization
+TEST_TRIES: dict[int, GeneralizedTrie] = {}
+for gen_depth in TEST_DEPTHS:
+ TEST_TRIES[gen_depth] = generate_test_trie_from_data(TEST_DATA[gen_depth], None)
+
+
+def generate_trie_with_missing_key_from_data(
+ test_data: Sequence[GeneralizedKey], value: Optional[Any] = None) -> tuple[GeneralizedTrie, Any]:
+ """Generate a GeneralizedTrie and a key that is not in the trie.
+
+ The generated trie will contain all keys from the test_data except for the last one.
+
+ Args:
+ test_data: The test data to populate the trie.
+ value: The value to associate with the keys in the trie.
+ """
+ trie = generate_test_trie_from_data(data=test_data, value=value)
+ missing_key = test_data[-1] # Use the last key as the missing key
+ trie.remove(missing_key) # Ensure the key is not actually in the trie
+ return trie, missing_key
+
+
+# We generate the TEST_MISSING_KEY_TRIES from the TEST_DATA for synchronization
+TEST_MISSING_KEY_TRIES: dict[int, tuple[GeneralizedTrie, str]] = {}
+for gen_depth in TEST_DEPTHS:
+ TEST_MISSING_KEY_TRIES[gen_depth] = generate_trie_with_missing_key_from_data(TEST_DATA[gen_depth], None)
+
+
+def generate_fully_populated_trie(max_depth: int, value: Optional[Any] = None) -> GeneralizedTrie:
+ '''Generate a fully populated Generalized Trie for the given max_depth.
+
+ A fully populated trie contains all possible keys up to the specified depth.
+ It uses the pregenerated TEST_DATA as the source of truth for the keys for each depth
+ because it contains all the possible keys for the depth and symbol set.
+
+ Args:
+ max_depth (int): The maximum depth of the trie.
+ value (Optional[Any], default=None): The value to assign to each key in the trie.
+ '''
+ trie = GeneralizedTrie(runtime_validation=False)
+ # Use precomputed TEST_DATA if available for performance
+ for depth, data in TEST_DATA.items():
+ if depth <= max_depth:
+ for key in data:
+ trie[key] = value
+
+ # Generate any requested depths NOT included in TEST_DATA
+ for depth in range(1, max_depth + 1):
+ if depth not in TEST_DATA:
+ # Generate all possible keys for this depth
+ for key in generate_test_data(depth, SYMBOLS, len(SYMBOLS) ** depth):
+ trie[key] = value
+
+ return trie
+
+
+TEST_FULLY_POPULATED_TRIES: dict[int, GeneralizedTrie] = {}
+for gen_depth in TEST_DEPTHS:
+ TEST_FULLY_POPULATED_TRIES[gen_depth] = generate_fully_populated_trie(max_depth=gen_depth)
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', TEST_DEPTHS)
+def test_build_with_update(
+ benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ runtime_validation: bool,
+ depth: int):
+ '''Benchmark the adding of keys to the trie using update()
+
+ This test checks the performance of adding keys to the trie using update().
+ '''
+ benchmark_trie: GeneralizedTrie = GeneralizedTrie(runtime_validation=runtime_validation)
+ key_iter = iter(TEST_DATA[depth])
+
+ def setup():
+ return (), {'key': next(key_iter)} # Will crash when exhausted
+ rounds = len(TEST_DATA[depth]) # Rounds limited to prevent exhaustion
+
+ gc.collect()
+ benchmark.pedantic(benchmark_trie.update, # pyright: ignore[reportUnknownMemberType]
+ setup=setup,
+ rounds=rounds,
+ iterations=1)
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', TEST_DEPTHS)
+def test_build_with_add(
+ benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ runtime_validation: bool,
+ depth: int):
+ '''Benchmark the adding of keys to the trie using add()
+
+ This test checks the performance of adding keys to the trie using the add() method.
+ '''
+ benchmark_trie: GeneralizedTrie = GeneralizedTrie(runtime_validation=runtime_validation)
+ key_iter = iter(TEST_DATA[depth])
+
+ def setup():
+ return (), {'key': next(key_iter)} # Will crash when exhausted
+ rounds = len(TEST_DATA[depth])
+
+ gc.collect()
+ benchmark.pedantic(benchmark_trie.add, # pyright: ignore[reportUnknownMemberType]
+ setup=setup,
+ rounds=rounds,
+ iterations=1)
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', TEST_DEPTHS)
+def test_updating_trie(
+ benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ runtime_validation: bool,
+ depth: int):
+ '''Benchmark the update value for a key operation on a populated trie.
+
+ This test checks the performance of updating keys in the trie.
+ '''
+ benchmark_trie = TEST_TRIES[depth]
+ benchmark_trie.runtime_validation = runtime_validation
+ benchmark_key: str = TEST_DATA[depth][0] # Use the first key for benchmarking
+ # for idempotency we reuse the orignal value for the updated value
+ benchmark_value: Any = benchmark_trie[benchmark_key]
+
+ gc.collect()
+ benchmark(benchmark_trie.update, benchmark_key, benchmark_value)
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', TEST_DEPTHS)
+def test_key_in_trie(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ runtime_validation: bool,
+ depth: int) -> None:
+ '''Benchmark using keys with the in operator for GeneralizedTrie.
+
+ This test checks the performance of key lookups in the trie using the in operator.
+ '''
+ benchmark_trie: GeneralizedTrie = TEST_TRIES[depth]
+ benchmark_key: str = TEST_DATA[depth][-1] # Use the last key for benchmarking
+ gc.collect()
+ benchmark_trie.runtime_validation = runtime_validation
+ benchmark(benchmark_trie.__contains__, benchmark_key)
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', TEST_DEPTHS)
+def test_key_not_in_trie(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ runtime_validation: bool,
+ depth: int) -> None:
+ '''Benchmark missing keys with the in operator for GeneralizedTrie.
+
+ This test checks the performance of missing key lookups in the trie using the in operator.
+ '''
+ benchmark_trie, missing_key = TEST_MISSING_KEY_TRIES[depth]
+ benchmark_trie.runtime_validation = runtime_validation
+
+ gc.collect()
+ benchmark(benchmark_trie.__contains__, missing_key) # pyright: ignore[reportUnknownMemberType]
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', TEST_DEPTHS)
+def test_remove_key_from_trie(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ runtime_validation: bool,
+ depth: int) -> None:
+ '''Benchmark remove() method for GeneralizedTrie.
+
+ This test checks the performance of the remove() method.
+ '''
+ # Generate a NEW GeneralizedTrie from the test data to keep from corrupting the
+ # pre-built test tries with the deletions.
+ test_data = TEST_DATA[depth]
+ benchmark_trie: GeneralizedTrie = generate_test_trie_from_data(data=test_data, value=None)
+ benchmark_trie.runtime_validation = runtime_validation
+ key_iter = iter(test_data)
+
+ def setup():
+ return (), {'key': next(key_iter)} # Will crash when exhausted
+ rounds = len(test_data) # Rounds limited to prevent exhaustion
+
+ gc.collect()
+ benchmark.pedantic(benchmark_trie.remove, # pyright: ignore[reportUnknownMemberType]
+ setup=setup,
+ rounds=rounds)
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', TEST_DEPTHS)
+def test_get(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
+ runtime_validation: bool,
+ depth: int) -> None:
+ '''Benchmark get() method for GeneralizedTrie.
+
+ This test checks the performance of the get() method.
+ '''
+ test_data = TEST_DATA[depth]
+ benchmark_trie: GeneralizedTrie = TEST_TRIES[depth]
+ benchmark_trie.runtime_validation = runtime_validation
+ key_iter = iter(test_data)
+
+ def setup():
+ return (), {'key': next(key_iter)} # Will crash when exhausted
+ rounds = len(test_data) # Rounds limited to prevent exhaustion
+
+ gc.collect()
+ benchmark.pedantic(benchmark_trie.get, # pyright: ignore[reportUnknownMemberType]
+ setup=setup,
+ rounds=rounds)
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('depth', [3, 4, 5, 6, 7, 8, 9])
+def test_prefixes(benchmark, # pyright: ignore[reportMissingParameterType, reportUnknownParameterType]
+ runtime_validation: bool,
+ depth: int):
+ """Benchmark trie prefixes() method.
+
+ This test checks the performance of the prefixes() method on fully populated tries.
+ Because the potential number of matching keys in the trie increases exponentially with depth
+ and the full runtime of a prefix search is dominated by the sheer number of keys found,
+ this test aims to measure the impact of this growth on the performance of the prefixes() method.
+
+ Args:
+ runtime_validation (bool): Whether to enable runtime validation.
+ depth (int): The depth of the trie to test.
+ """
+ trie = TEST_FULLY_POPULATED_TRIES[depth]
+ trie.runtime_validation = runtime_validation
+ search_key = TEST_DATA[depth][0]
+
+ def helper_prefixes(trie: GeneralizedTrie, search_key: GeneralizedKey) -> list[GeneralizedKey]:
+ return list(trie.prefixes(search_key))
+
+ gc.collect()
+ # The helper function is needed because prefixes() is a generator
+ # which requires iteration to access all results.
+ # This results in a list of all matched keys being created
+ # with additional overhead vs the generator approach.
+ benchmark.extra_info[ # type: ignore
+ f'prefixes, key="{search_key}", number_of_matches, keys_in_trie'] = (
+ len(benchmark(helper_prefixes, trie, search_key)), # pyright: ignore[reportUnknownArgumentType]
+ len(trie))
+
+
+@pytest.mark.benchmark(**BENCHMARK_CONFIG)
+@pytest.mark.parametrize('runtime_validation', [False, True])
+@pytest.mark.parametrize('trie_depth', [7])
+@pytest.mark.parametrize('key_depth', [2, 3, 4]) # Focus on manageable depths
+@pytest.mark.parametrize('search_depth', [1, 2, 3]) # Focus on manageable depths
+def test_prefixed_by(benchmark, # pyright: ignore[reportMissingParameterType, reportUnknownParameterType]
+ runtime_validation: bool,
+ trie_depth: int,
+ key_depth: int,
+ search_depth: int):
+ """Benchmark trie prefixed_by() method.
+
+ This test checks the performance of the prefixed_by() method on fully populated tries.
+
+ prefixed_by() finds all keys in the trie that are prefixed by a given key up to a specified search depth.
+
+ Args:
+ runtime_validation (bool): Whether to enable runtime validation.
+ trie_depth (int): The depth of the trie to test.
+ key_depth (int): The depth of the key to test.
+ search_depth (int): The depth to search for prefixed keys starting from key_depth.
+ """
+ trie = TEST_FULLY_POPULATED_TRIES[trie_depth]
+ trie.runtime_validation = runtime_validation
+
+ # Use a prefix that matches multiple keys
+ search_key = TEST_DATA[key_depth][-1] # last key of the key_depth
+
+ def helper_prefixed_by(trie: GeneralizedTrie,
+ search_key: GeneralizedKey,
+ search_depth: int) -> list[GeneralizedKey]:
+ return list(trie.prefixed_by(search_key, search_depth))
+
+ gc.collect()
+ # The helper function is needed because prefixed_by() is a generator
+ # which requires iteration to access all results.
+ # This results in a list of all matched keys being created
+ # with additional overhead vs the generator approach.
+ benchmark.extra_info[ # type: ignore
+ f'prefixed_by, key="{search_key}", depth={search_depth}, number_of_matches, keys_in_trie'] = (
+ len(benchmark(helper_prefixed_by, # pyright: ignore[reportUnknownArgumentType]
+ trie, search_key, search_depth)),
+ len(trie))
diff --git a/docs/html/_images/build_trie_with_add.png b/docs/html/_images/build_trie_with_add.png
new file mode 100644
index 0000000..652e021
Binary files /dev/null and b/docs/html/_images/build_trie_with_add.png differ
diff --git a/docs/html/_images/build_trie_with_update.png b/docs/html/_images/build_trie_with_update.png
new file mode 100644
index 0000000..61bc48c
Binary files /dev/null and b/docs/html/_images/build_trie_with_update.png differ
diff --git a/docs/html/_images/get_kops_per_second_by_key_depth_and_runtime_validation.png b/docs/html/_images/get_kops_per_second_by_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..ed0d88d
Binary files /dev/null and b/docs/html/_images/get_kops_per_second_by_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/_images/key_in_trie.png b/docs/html/_images/key_in_trie.png
new file mode 100644
index 0000000..319ce91
Binary files /dev/null and b/docs/html/_images/key_in_trie.png differ
diff --git a/docs/html/_images/key_not_in_trie.png b/docs/html/_images/key_not_in_trie.png
new file mode 100644
index 0000000..cb6da69
Binary files /dev/null and b/docs/html/_images/key_not_in_trie.png differ
diff --git a/docs/html/_images/prefixed_by_average_kops_per_second_by_runtime_validation.png b/docs/html/_images/prefixed_by_average_kops_per_second_by_runtime_validation.png
new file mode 100644
index 0000000..b1d8afa
Binary files /dev/null and b/docs/html/_images/prefixed_by_average_kops_per_second_by_runtime_validation.png differ
diff --git a/docs/html/_images/prefixed_by_search_depth_vs_kops_per_second.png b/docs/html/_images/prefixed_by_search_depth_vs_kops_per_second.png
new file mode 100644
index 0000000..c48a358
Binary files /dev/null and b/docs/html/_images/prefixed_by_search_depth_vs_kops_per_second.png differ
diff --git a/docs/html/_images/prefixed_by_search_depth_vs_matched_keys_per_second.png b/docs/html/_images/prefixed_by_search_depth_vs_matched_keys_per_second.png
new file mode 100644
index 0000000..6cc0677
Binary files /dev/null and b/docs/html/_images/prefixed_by_search_depth_vs_matched_keys_per_second.png differ
diff --git a/docs/html/_images/prefixes_kops_per_second_by_key_depth_and_runtime_validation.png b/docs/html/_images/prefixes_kops_per_second_by_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..1e48232
Binary files /dev/null and b/docs/html/_images/prefixes_kops_per_second_by_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/_images/remove_kops_per_second_vs_key_depth_and_runtime_validation.png b/docs/html/_images/remove_kops_per_second_vs_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..93e98b8
Binary files /dev/null and b/docs/html/_images/remove_kops_per_second_vs_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/_images/update_kops_per_second_by_key_depth_and_runtime_validation.png b/docs/html/_images/update_kops_per_second_by_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..b2b098b
Binary files /dev/null and b/docs/html/_images/update_kops_per_second_by_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/_sources/benchmarks.rst.txt b/docs/html/_sources/benchmarks.rst.txt
new file mode 100644
index 0000000..9da997e
--- /dev/null
+++ b/docs/html/_sources/benchmarks.rst.txt
@@ -0,0 +1,457 @@
+====================
+Benchmarks
+====================
+
+.. toctree::
+
+
+These are benchmark results for the `gen_tries` library, focusing on
+various operations such as building tries, checking key existence,
+prefix matching, and updating tries.
+
+.. index::
+
+--------
+Platform
+--------
+
+Hardware
+--------
+
+Mac Studio, Apple M2 Ultra, 24-Core CPU (approx 3.5 GHz), 128GB RAM
+
+Python Version
+--------------
+
+Python 3.13.7
+
+Benchmark Configuration
+-----------------------
+
+- **gen-tries Version**: 0.9.4
+- **Single Threaded**
+- **Symbol Set**: '0123'
+- **Key Generation**: All possible combinations of symbols up to a certain length, starting with all '0's and incrementing the last symbol until all combinations were generated.
+- **Trie Depths**: 2 to 9
+- **Runtime Validation**: Enabled and Disabled
+- **Benchmarking Tool**: pytest-benchmark
+- **Benchmarking Code**: bench/benchmark_performance.py
+
+
+Running the Benchmarks
+----------------------
+
+To run all benchmarks with pytest-benchmark:
+
+.. code-block:: shell
+
+ cd bench
+ pytest bench_performance.py \
+ --benchmark-sort=name \
+ --benchmark-group-by=func \
+ --benchmark-histogram=histogram/benchmark \
+ --benchmark-time-unit=ns
+
+Benchmark Configuration
+-----------------------
+
+Benchmarks are configured with:
+
+- **min_rounds**: 100
+- **min_time**: 1s
+- **max_time**: 10s
+- **timer**: perf_counter_ns
+
+See the source for details.
+
+What is Measured
+----------------
+
+- **Add**: Time to add keys to the trie
+- **Update**: Time to update existing keys
+- **Lookup**: Time to check key presence (hit/miss)
+- **Remove**: Time to remove keys
+- **Traversal**: Time for `prefixes()` and `prefixed_by()`
+
+Interpreting Results
+--------------------
+
+- **ops**: Operations per second (higher is better)
+- **Scaling**: As trie depth increases, expect some slowdown; large jumps may indicate inefficiency.
+
+
+Advanced Usage
+--------------
+
+- Compare runs: ``pytest ... --benchmark-compare=previous.json``
+- Export results: ``pytest ... --benchmark-json=results.json``
+- View histograms: ``pytest ... --benchmark-histogram=histogram/benchmark``
+
+Discussion
+----------
+
+The benchmarks are designed to evaluate performance across different trie
+depths and configurations.
+
+Comparison of runtime validation settings reveals that disabling runtime validation
+can lead to small performance improvements in some scenarios, particularly for deeper
+tries. However, the performance gains are often minimal and may not justify the trade-offs
+in terms of safety and correctness. Runtime validation helps catch errors and ensures
+correctness by validating the structure and content of the trie during operations.
+
+Turning it off may be appropriate in extremely performance-critical applications where the data is
+well-defined and unlikely to change, allowing for optimizations that bypass some of the safety checks.
+
+**HOWEVER**, it is crucial to thoroughly test and validate the trie implementation in such
+scenarios to avoid potential issues. Absolutely NO assumptions should be made about
+the data or its structure. The behavior of the trie with invalid data is undefined and may lead to
+catastrophic failures.
+
+In Summary: If you choose to disable runtime validation, do so with extreme caution and only after
+thorough testing. The performance increase is relatively small, and the risks may outweigh the benefits.
+
+For the tests, a symbol set of '0123' was used and keys were generated by creating
+all possible combinations of these symbols up to a certain length starting
+with all '0's and incrementing the last symbol until all combinations were generated.
+
+So for a trie depth of 3, the keys would be '000', '001', '002', ..., '333'. Thus
+covering all possible keys of length 3. This approach ensures a comprehensive
+evaluation of the trie performance across different key patterns and depths and has a
+branching factor (4) that allows for efficient exploration of the search space to
+a substantial depth (9).
+
+Depth 1 is generally omitted from the benchmarks due to its limited key space and minimal impact on performance.
+
+Building trie using add()
+--------------------------------
+Table shows thousands of operations per second (Kops/sec) for different trie depths and
+runtime key validation settings while building the trie using the add() method
+using strings as keys. Trie Depth is equivalent to the number of symbols in each key
+being added to the trie.
+
+The operations being measured are the individual add() method calls adding keys to the trie.
+
+============= ====================== ==========
+Trie Depth Runtime Validation Kops / sec
+============= ====================== ==========
+2 False 266.7
+2 True 266.7
+3 False 609.5
+3 True 587.2
+4 False 612.4
+4 True 587.2
+5 False 644.8
+5 True 612.4
+6 False 634.4
+6 True 601.8
+7 False 618.5
+7 True 591.6
+8 False 622.9
+8 True 600.3
+9 False 602.6
+9 True 584.5
+============= ====================== ==========
+
+.. image:: _static/benchmarks/build_trie_with_add.png
+ :align: center
+
+Building trie using update()
+-----------------------------------
+
+This is the same as above but using the update() method to build the trie instead of add()
+
+============= ====================== ==========
+Trie Depth Runtime Validation Kops / sec
+============= ====================== ==========
+2 False 400.0
+2 True 444.4
+3 False 547.0
+3 True 561.4
+4 False 651.4
+4 True 635.2
+5 False 592.6
+5 True 635.2
+6 False 599.2
+6 True 600.5
+7 False 602.3
+7 True 585.9
+8 False 608.8
+8 True 588.8
+9 False 598.1
+9 True 573.8
+============= ====================== ==========
+
+.. image:: _static/benchmarks/build_trie_with_update.png
+ :align: center
+
+key in trie
+-----------
+
+============= ====================== ==========
+Key Depth Runtime Validation Kops / sec
+============= ====================== ==========
+2 False 4327.1
+2 True 3388.7
+3 False 3617.9
+3 True 2959.5
+4 False 3167.6
+4 True 2657.5
+5 False 2834.5
+5 True 2399.2
+6 False 2555.6
+6 True 2193.9
+7 False 2344.7
+7 True 2045.4
+8 False 2156.1
+8 True 1736.1
+9 False 1946.3
+9 True 1690.6
+============= ====================== ==========
+
+
+.. image:: _static/benchmarks/key_in_trie.png
+ :align: center
+
+key not in trie
+---------------
+
+============= ====================== ==========
+Key Depth Runtime Validation Kops / sec
+============= ====================== ==========
+2 False 5243.8
+2 True 3636.4
+3 False 4024.1
+3 True 3200.0
+4 False 3403.7
+4 True 2849.0
+5 False 3027.6
+5 True 2548.4
+6 False 2642.7
+6 True 2325.0
+7 False 2403.3
+7 True 2143.2
+8 False 2268.6
+8 True 1992.0
+9 False 2072.1
+9 True 1806.7
+============= ====================== ==========
+
+.. image:: _static/benchmarks/key_not_in_trie.png
+ :align: center
+
+get()
+-----
+
+The get() operation retrieves the value (the TrieEntry)associated with a key in the trie.
+
+This benchmark measures the performance of the get() operation for existing keys
+at various depths in the trie, with and without runtime validation enabled.
+
+============= ====================== ==========
+Key Depth Runtime Validation Kops / sec
+============= ====================== ==========
+2 False 1460.3
+2 True 1460.3
+3 False 2103.8
+3 True 1852.3
+4 False 2299.3
+4 True 1992.7
+5 False 2151.9
+5 True 1929.5
+6 False 1987.7
+6 True 1778.5
+7 False 1881.2
+7 True 1667.6
+8 False 1698.2
+8 True 1552.3
+9 False 1640.2
+9 True 1495.8
+============= ====================== ==========
+
+.. image:: _static/benchmarks/get_kops_per_second_by_key_depth_and_runtime_validation.png
+ :align: center
+
+update()
+--------
+
+This benchmark measures the performance of updating values for existing keys in the trie.
+
+The results show the number of values updated per second (Kops) for different key depths
+and runtime validation settings.
+
+============= ====================== ==========
+Key Depth Runtime Validation Kops / sec
+============= ====================== ==========
+2 False 2106.6
+2 True 1845.0
+3 False 1862.2
+3 True 1683.5
+4 False 1703.6
+4 True 1565.4
+5 False 1633.9
+5 True 1463.0
+6 False 1512.5
+6 True 1396.9
+7 False 1385.5
+7 True 1319.3
+8 False 1371.1
+8 True 1271.2
+9 False 1306.6
+9 True 1205.8
+============= ====================== ==========
+
+.. image:: _static/benchmarks/update_kops_per_second_by_key_depth_and_runtime_validation.png
+ :align: center
+
+remove()
+--------
+
+This benchmark is designed as a 'semi-worst-case' scenario for the remove() operation.
+
+By generating only keys at the maximum depth and then removing them,
+we can observe the performance impact on the trie structure as intermediate
+nodes are frequently removed all the way to the root node (requiring the most work).
+
+A true worst case scenario for the remove() operation would involve a degenerate trie,
+where all keys share the same prefix, effectively behaving like a linked list. While this
+is not the primary focus of the benchmark, it is worth noting that such a structure would
+exhibit significantly different performance characteristics than typical trie use cases.
+
+In practice, the performance impact of the remove() operation is usually mitigated by the
+structure of the trie and the distribution of keys within it. A degenerate trie is an edge case and
+is not representative of typical usage patterns (and probably indicative of a poorly chosen data structure
+for the task at hand).
+
+============= ====================== ==========
+Key Depth Runtime Validation Kops / sec
+============= ====================== ==========
+2 False 533.3
+2 True 457.1
+3 False 820.5
+3 True 633.7
+4 False 870.7
+4 True 795.0
+5 False 819.2
+5 True 782.3
+6 False 836.8
+6 True 751.1
+7 False 807.7
+7 True 727.4
+8 False 780.4
+8 True 709.3
+9 False 749.9
+9 True 685.8
+============= ====================== ==========
+
+.. image:: _static/benchmarks/remove_kops_per_second_vs_key_depth_and_runtime_validation.png
+ :align: center
+
+prefixed_by()
+-------------
+
+Performance of the `prefixed_by()` method is more complex due to the nature of prefix matching.
+
+Rather than a simple hit/miss scenario, `prefixed_by()` can return multiple, potentially
+thousands or more (!), matches depending on the prefix length, the search depth, and the
+number of keys present in the trie.
+
+For this test, a completely filled trie 7 levels deep is used using the same keys as before. Matches are done
+starting from key depths of 2 to 4 symbols to a search depth of 1 to 3 symbols from that starting point.
+
+A search for the prefix "01" (a key depth of 2) with a search depth of 1 would match the keys
+`"01"`, `"010"`, `"011"`, `"012"` and `"013"` (5 keys).
+
+The number of matches increases exponentially with the search depth in the completely filled trie.
+In this case, a search depth of 2 would match 21 keys (the key itself, 4 at the first level below it,
+and 16 more at the second level). A search depth of 3 would match 85 keys (1 + 4 + 16 + 64).
+
+At lower levels of a trie, the number of matches can be smaller due to there not being as many keys
+because of reaching the maximum depth of the trie, so the performance impact of returning multiple
+matches is less pronounced (it effectively limits the search depth). In this benchmark, we made sure
+that the trie is fully populated at all levels up to the maximum depth being searched (4 + 3) to avoid
+that issue.
+
+As can be seen, while the performance impact of looking up keys is slightly more pronounced at higher
+key depths, the performance impact of returning multiple matches is *far* more significant as the search
+depth increases, as the number of potential matches grows exponentially.
+
+Turning off runtime validation has a noticeable positive effect on performance, especially at higher search
+depths, but is not as significant as the impact of returning multiple matches.
+
+Note that the performance numbers here are for returning all matches as a list,
+rather than stopping at the first match found. This is done to provide a more comprehensive
+view of the performance characteristics of the `prefixed_by()` method.
+
+Note that although, for example, a search depth of 3 from a key depth of 4 in a trie of depth 7
+has a Kops/sec of 77.8 (Runtime Validation == False) or 85.5 (Runtime Validation == True),
+it is returning 85 matches each time and so the number of prefixed keys matched per second
+is actually 85 times higher (6.6 Mkeys/sec or 7.3 Mkeys/sec respectively).
+
+Real world performance will vary based on the specific use case, data distribution, and other factors.
+
+============ ========= ========== ================ ================== ==========
+Search Depth Key Depth Trie Depth Returned Matches Runtime Validation Kops / sec
+============ ========= ========== ================ ================== ==========
+1 2 7 5 False 902.5
+1 2 7 5 True 825.8
+1 3 7 5 False 883.4
+1 3 7 5 True 824.4
+1 4 7 5 False 873.4
+1 4 7 5 True 834.0
+2 2 7 21 False 308.5
+2 2 7 21 True 303.1
+2 3 7 21 False 316.8
+2 3 7 21 True 304.2
+2 4 7 21 False 307.3
+2 4 7 21 True 300.6
+3 2 7 85 False 86.1
+3 2 7 85 True 84.9
+3 3 7 85 False 85.8
+3 3 7 85 True 83.4
+3 4 7 85 False 77.8
+3 4 7 85 True 85.5
+============ ========= ========== ================ ================== ==========
+
+.. image:: _static/benchmarks/prefixed_by_average_kops_per_second_by_runtime_validation.png
+ :align: center
+
+.. image:: _static/benchmarks/prefixed_by_search_depth_vs_matched_keys_per_second.png
+ :align: center
+
+prefixes()
+----------
+
+prefixes() performance is generally better than prefixed_by() as it stops searching as soon as it finds a match
+and only returns the match and its parent keys. As the number of matches is limited to a maximum of the key depth,
+and key depth generally grows logarithmically (for non-degenerate tries), the performance impact is *far*
+less pronounced at greater key depths.
+
+A fully populated trie with a fanout of 4 and a depth 9 contains 349524 nodes - but prefixes() will never
+return more than 9 matches for it. With a fanout of 32, the number of nodes in a fully populated 9 level trie
+increases exponentially (to more than 36 trillion keys), but the maximum number of prefixes() matches
+returned remains unchanged at 9.
+
+The exception here is degenerate tries (tries where all keys share the same prefix). In these cases, the number of
+returned prefixes() matches can be much larger, as the trie may effectively behave like a linked list.
+
+========= ================ ================== ==========
+Key Depth Returned Matches Runtime Validation Kops / sec
+========= ================ ================== ==========
+3 3 False 1927.9
+3 3 True 1693.5
+4 4 False 1722.7
+4 4 True 1545.5
+5 5 False 1503.1
+5 5 True 1379.9
+6 6 False 1248.9
+6 6 True 1226.5
+7 7 False 1231.5
+7 7 True 1135.4
+8 8 False 1098.0
+8 8 True 993.4
+9 9 False 966.9
+9 9 True 947.1
+========= ================ ================== ==========
+
+.. image:: _static/benchmarks/prefixes_kops_per_second_by_key_depth_and_runtime_validation.png
+ :align: center
diff --git a/docs/html/_sources/index.rst.txt b/docs/html/_sources/index.rst.txt
index b1fee47..34cc61c 100644
--- a/docs/html/_sources/index.rst.txt
+++ b/docs/html/_sources/index.rst.txt
@@ -22,4 +22,5 @@ Contents
usage
reference
+ benchmarks
gentrie
diff --git a/docs/html/_sources/reference.rst.txt b/docs/html/_sources/reference.rst.txt
index 383df59..acfac31 100644
--- a/docs/html/_sources/reference.rst.txt
+++ b/docs/html/_sources/reference.rst.txt
@@ -5,6 +5,8 @@ This section provides detailed information about the API of **gen-trie**.
It includes descriptions of the main classes, methods, and attributes
available in the library.
+.. toctree::
+
.. index::
single: gentrie; GeneralizedTrie
single: gentrie; API Reference
diff --git a/docs/html/_sources/usage.rst.txt b/docs/html/_sources/usage.rst.txt
index b2bbe73..3c3f51b 100644
--- a/docs/html/_sources/usage.rst.txt
+++ b/docs/html/_sources/usage.rst.txt
@@ -5,6 +5,9 @@ Using gen-trie
.. _gentrie-installation:
+.. toctree::
+ :maxdepth: 4
+
.. index::
------------
diff --git a/docs/html/_static/benchmarks/build_trie_with_add.png b/docs/html/_static/benchmarks/build_trie_with_add.png
new file mode 100644
index 0000000..652e021
Binary files /dev/null and b/docs/html/_static/benchmarks/build_trie_with_add.png differ
diff --git a/docs/html/_static/benchmarks/build_trie_with_update.png b/docs/html/_static/benchmarks/build_trie_with_update.png
new file mode 100644
index 0000000..61bc48c
Binary files /dev/null and b/docs/html/_static/benchmarks/build_trie_with_update.png differ
diff --git a/docs/html/_static/benchmarks/get_kops_per_second_by_key_depth_and_runtime_validation.png b/docs/html/_static/benchmarks/get_kops_per_second_by_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..ed0d88d
Binary files /dev/null and b/docs/html/_static/benchmarks/get_kops_per_second_by_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/_static/benchmarks/key_in_trie.png b/docs/html/_static/benchmarks/key_in_trie.png
new file mode 100644
index 0000000..319ce91
Binary files /dev/null and b/docs/html/_static/benchmarks/key_in_trie.png differ
diff --git a/docs/html/_static/benchmarks/key_not_in_trie.png b/docs/html/_static/benchmarks/key_not_in_trie.png
new file mode 100644
index 0000000..cb6da69
Binary files /dev/null and b/docs/html/_static/benchmarks/key_not_in_trie.png differ
diff --git a/docs/html/_static/benchmarks/prefixed_by_average_kops_per_second_by_runtime_validation.png b/docs/html/_static/benchmarks/prefixed_by_average_kops_per_second_by_runtime_validation.png
new file mode 100644
index 0000000..b1d8afa
Binary files /dev/null and b/docs/html/_static/benchmarks/prefixed_by_average_kops_per_second_by_runtime_validation.png differ
diff --git a/docs/html/_static/benchmarks/prefixed_by_search_depth_vs_kops_per_second.png b/docs/html/_static/benchmarks/prefixed_by_search_depth_vs_kops_per_second.png
new file mode 100644
index 0000000..c48a358
Binary files /dev/null and b/docs/html/_static/benchmarks/prefixed_by_search_depth_vs_kops_per_second.png differ
diff --git a/docs/html/_static/benchmarks/prefixed_by_search_depth_vs_matched_keys_per_second.png b/docs/html/_static/benchmarks/prefixed_by_search_depth_vs_matched_keys_per_second.png
new file mode 100644
index 0000000..6cc0677
Binary files /dev/null and b/docs/html/_static/benchmarks/prefixed_by_search_depth_vs_matched_keys_per_second.png differ
diff --git a/docs/html/_static/benchmarks/prefixes_kops_per_second_by_key_depth_and_runtime_validation.png b/docs/html/_static/benchmarks/prefixes_kops_per_second_by_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..1e48232
Binary files /dev/null and b/docs/html/_static/benchmarks/prefixes_kops_per_second_by_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/_static/benchmarks/remove_kops_per_second_vs_key_depth_and_runtime_validation.png b/docs/html/_static/benchmarks/remove_kops_per_second_vs_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..93e98b8
Binary files /dev/null and b/docs/html/_static/benchmarks/remove_kops_per_second_vs_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/_static/benchmarks/update_kops_per_second_by_key_depth_and_runtime_validation.png b/docs/html/_static/benchmarks/update_kops_per_second_by_key_depth_and_runtime_validation.png
new file mode 100644
index 0000000..b2b098b
Binary files /dev/null and b/docs/html/_static/benchmarks/update_kops_per_second_by_key_depth_and_runtime_validation.png differ
diff --git a/docs/html/benchmarks.html b/docs/html/benchmarks.html
new file mode 100644
index 0000000..1cdbc2a
--- /dev/null
+++ b/docs/html/benchmarks.html
@@ -0,0 +1,1019 @@
+
+
+
+
These are benchmark results for the gen_tries library, focusing on
+various operations such as building tries, checking key existence,
+prefix matching, and updating tries.
Key Generation: All possible combinations of symbols up to a certain length, starting with all ‘0’s and incrementing the last symbol until all combinations were generated.
The benchmarks are designed to evaluate performance across different trie
+depths and configurations.
+
Comparison of runtime validation settings reveals that disabling runtime validation
+can lead to small performance improvements in some scenarios, particularly for deeper
+tries. However, the performance gains are often minimal and may not justify the trade-offs
+in terms of safety and correctness. Runtime validation helps catch errors and ensures
+correctness by validating the structure and content of the trie during operations.
+
Turning it off may be appropriate in extremely performance-critical applications where the data is
+well-defined and unlikely to change, allowing for optimizations that bypass some of the safety checks.
+
HOWEVER, it is crucial to thoroughly test and validate the trie implementation in such
+scenarios to avoid potential issues. Absolutely NO assumptions should be made about
+the data or its structure. The behavior of the trie with invalid data is undefined and may lead to
+catastrophic failures.
+
In Summary: If you choose to disable runtime validation, do so with extreme caution and only after
+thorough testing. The performance increase is relatively small, and the risks may outweigh the benefits.
+
For the tests, a symbol set of ‘0123’ was used and keys were generated by creating
+all possible combinations of these symbols up to a certain length starting
+with all ‘0’s and incrementing the last symbol until all combinations were generated.
+
So for a trie depth of 3, the keys would be ‘000’, ‘001’, ‘002’, …, ‘333’. Thus
+covering all possible keys of length 3. This approach ensures a comprehensive
+evaluation of the trie performance across different key patterns and depths and has a
+branching factor (4) that allows for efficient exploration of the search space to
+a substantial depth (9).
+
Depth 1 is generally omitted from the benchmarks due to its limited key space and minimal impact on performance.
Table shows thousands of operations per second (Kops/sec) for different trie depths and
+runtime key validation settings while building the trie using the add() method
+using strings as keys. Trie Depth is equivalent to the number of symbols in each key
+being added to the trie.
+
The operations being measured are the individual add() method calls adding keys to the trie.
The get() operation retrieves the value (the TrieEntry)associated with a key in the trie.
+
This benchmark measures the performance of the get() operation for existing keys
+at various depths in the trie, with and without runtime validation enabled.
This benchmark is designed as a ‘semi-worst-case’ scenario for the remove() operation.
+
By generating only keys at the maximum depth and then removing them,
+we can observe the performance impact on the trie structure as intermediate
+nodes are frequently removed all the way to the root node (requiring the most work).
+
A true worst case scenario for the remove() operation would involve a degenerate trie,
+where all keys share the same prefix, effectively behaving like a linked list. While this
+is not the primary focus of the benchmark, it is worth noting that such a structure would
+exhibit significantly different performance characteristics than typical trie use cases.
+
In practice, the performance impact of the remove() operation is usually mitigated by the
+structure of the trie and the distribution of keys within it. A degenerate trie is an edge case and
+is not representative of typical usage patterns (and probably indicative of a poorly chosen data structure
+for the task at hand).
Performance of the prefixed_by() method is more complex due to the nature of prefix matching.
+
Rather than a simple hit/miss scenario, prefixed_by() can return multiple, potentially
+thousands or more (!), matches depending on the prefix length, the search depth, and the
+number of keys present in the trie.
+
For this test, a completely filled trie 7 levels deep is used using the same keys as before. Matches are done
+starting from key depths of 2 to 4 symbols to a search depth of 1 to 3 symbols from that starting point.
+
A search for the prefix “01” (a key depth of 2) with a search depth of 1 would match the keys
+“01”, “010”, “011”, “012” and “013” (5 keys).
+
The number of matches increases exponentially with the search depth in the completely filled trie.
+In this case, a search depth of 2 would match 21 keys (the key itself, 4 at the first level below it,
+and 16 more at the second level). A search depth of 3 would match 85 keys (1 + 4 + 16 + 64).
+
At lower levels of a trie, the number of matches can be smaller due to there not being as many keys
+because of reaching the maximum depth of the trie, so the performance impact of returning multiple
+matches is less pronounced (it effectively limits the search depth). In this benchmark, we made sure
+that the trie is fully populated at all levels up to the maximum depth being searched (4 + 3) to avoid
+that issue.
+
As can be seen, while the performance impact of looking up keys is slightly more pronounced at higher
+key depths, the performance impact of returning multiple matches is far more significant as the search
+depth increases, as the number of potential matches grows exponentially.
+
Turning off runtime validation has a noticeable positive effect on performance, especially at higher search
+depths, but is not as significant as the impact of returning multiple matches.
+
Note that the performance numbers here are for returning all matches as a list,
+rather than stopping at the first match found. This is done to provide a more comprehensive
+view of the performance characteristics of the prefixed_by() method.
+
Note that although, for example, a search depth of 3 from a key depth of 4 in a trie of depth 7
+has a Kops/sec of 77.8 (Runtime Validation == False) or 85.5 (Runtime Validation == True),
+it is returning 85 matches each time and so the number of prefixed keys matched per second
+is actually 85 times higher (6.6 Mkeys/sec or 7.3 Mkeys/sec respectively).
+
Real world performance will vary based on the specific use case, data distribution, and other factors.
prefixes() performance is generally better than prefixed_by() as it stops searching as soon as it finds a match
+and only returns the match and its parent keys. As the number of matches is limited to a maximum of the key depth,
+and key depth generally grows logarithmically (for non-degenerate tries), the performance impact is far
+less pronounced at greater key depths.
+
A fully populated trie with a fanout of 4 and a depth 9 contains 349524 nodes - but prefixes() will never
+return more than 9 matches for it. With a fanout of 32, the number of nodes in a fully populated 9 level trie
+increases exponentially (to more than 36 trillion keys), but the maximum number of prefixes() matches
+returned remains unchanged at 9.
+
The exception here is degenerate tries (tries where all keys share the same prefix). In these cases, the number of
+returned prefixes() matches can be much larger, as the trie may effectively behave like a linked list.
+
+
+
Key Depth
+
Returned Matches
+
Runtime Validation
+
Kops / sec
+
+
+
+
3
+
3
+
False
+
1927.9
+
+
3
+
3
+
True
+
1693.5
+
+
4
+
4
+
False
+
1722.7
+
+
4
+
4
+
True
+
1545.5
+
+
5
+
5
+
False
+
1503.1
+
+
5
+
5
+
True
+
1379.9
+
+
6
+
6
+
False
+
1248.9
+
+
6
+
6
+
True
+
1226.5
+
+
7
+
7
+
False
+
1231.5
+
+
7
+
7
+
True
+
1135.4
+
+
8
+
8
+
False
+
1098.0
+
+
8
+
8
+
True
+
993.4
+
+
9
+
9
+
False
+
966.9
+
+
9
+
9
+
True
+
947.1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/html/genindex.html b/docs/html/genindex.html
index 3b50b2c..1560682 100644
--- a/docs/html/genindex.html
+++ b/docs/html/genindex.html
@@ -100,8 +100,15 @@