diff --git a/CHANGELOG.md b/CHANGELOG.md index 97213f8..4790e86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,3 +27,4 @@ * 0.9.3 - Addition of coverage support. Tests for traversal.py. pytest support. Test orchestration using pytest-order and pytest-dependency. Fixed TrieValueError export. * 0.9.4 - Addition of tests for is_triekeytoke(), is_hashable(), get() methods and for TrieEntry(). Fixed bugs in TrieEntry __eq__ and __hash__ dunder methods. Rewrote __getitem__ and __contains__ dunder tests, added __delitem__ dunder tests. Excluded test_play.py and testspec.py from coverage measurements. Changed Nodes class to use __slots__ for attributes. Added tuplization of keys when creating TrieEntrys' to aid in immutability preservation. * 0.9.5 - Benchmarking code, addition of py.typed for type support +* 0.9.6 - mypy types cleanup, addition of sphinxawesome-theme to \[dependency-groups.dev\] diff --git a/bench/bench_performance.py b/bench/bench_performance.py index 452948b..3184dd0 100755 --- a/bench/bench_performance.py +++ b/bench/bench_performance.py @@ -3,9 +3,6 @@ # pylint: disable=wrong-import-position, too-many-instance-attributes, line-too-long # pylint: disable=too-many-positional-arguments, too-many-arguments, too-many-locals # pyright: reportUnnecessaryTypeIgnoreComment=warning - -# Note: pytest-benchmark does not expose proper typing hence the many type -# ignores on lines related to benchmark and benchmark.info '''bench_performance.py Benchmark for the Generalized Trie implementation. @@ -27,7 +24,7 @@ import pytest -from gentrie import GeneralizedTrie, GeneralizedKey +from gentrie import GeneralizedTrie, GeneralizedKey, TrieEntry # More robust benchmark configuration BENCHMARK_CONFIG: dict[str, Any] = { @@ -176,7 +173,7 @@ def english_words(): @pytest.mark.benchmark(group="Build trie from English wordset using update()", **BENCHMARK_CONFIG) @pytest.mark.parametrize('runtime_validation', [False, True]) def test_organic_build_with_update_from_english_words_list( - benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] + benchmark, runtime_validation: bool): '''Benchmark the adding of a list of english words to the trie using update() @@ -189,9 +186,9 @@ def helper_create_dictionary(words: Sequence[str], runtime_validation: bool) -> return trie gc.collect() - benchmark.extra_info['number_of_words'] = len(TEST_ENGLISH_WORDS) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['number_of_words'] = len(TEST_ENGLISH_WORDS) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in TEST_ENGLISH_WORDS) / len(TEST_ENGLISH_WORDS) if TEST_ENGLISH_WORDS else 0) benchmark(helper_create_dictionary, words=TEST_ENGLISH_WORDS, @@ -201,7 +198,7 @@ def helper_create_dictionary(words: Sequence[str], runtime_validation: bool) -> @pytest.mark.benchmark(group="Microbenchmark update() building trie from English words", **BENCHMARK_CONFIG) @pytest.mark.parametrize('runtime_validation', [False, True]) def test_microbenchmarking_update_for_build_from_english_words_list( - benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] + benchmark, runtime_validation: bool): '''Benchmark the adding of keys to the trie using update() @@ -218,12 +215,12 @@ def setup(): rounds = len(TEST_ENGLISH_WORDS) # Rounds limited to prevent exhaustion gc.collect() - benchmark.extra_info['number_of_words'] = len(TEST_ENGLISH_WORDS) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['number_of_words'] = len(TEST_ENGLISH_WORDS) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in TEST_ENGLISH_WORDS) / len(TEST_ENGLISH_WORDS) if TEST_ENGLISH_WORDS else 0) - benchmark.pedantic(benchmark_trie.update, # pyright: ignore[reportUnknownMemberType] + benchmark.pedantic(benchmark_trie.update, setup=setup, rounds=rounds, iterations=1) @@ -234,7 +231,7 @@ def setup(): @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', TEST_DEPTHS) def test_build_with_update( - benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] + benchmark, runtime_validation: bool, depth: int): '''Benchmark the adding of keys to the trie using update() @@ -249,12 +246,12 @@ def setup(): rounds = len(TEST_DATA[depth]) # Rounds limited to prevent exhaustion gc.collect() - benchmark.extra_info['depth'] = depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['depth'] = depth + benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in TEST_DATA[depth]) / len(TEST_DATA[depth]) if TEST_DATA[depth] else 0) - benchmark.pedantic(benchmark_trie.update, # pyright: ignore[reportUnknownMemberType] + benchmark.pedantic(benchmark_trie.update, setup=setup, rounds=rounds, iterations=1) @@ -265,7 +262,7 @@ def setup(): @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', TEST_DEPTHS) def test_build_with_add( - benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] + benchmark, runtime_validation: bool, depth: int): '''Benchmark the adding of keys to the trie using add() @@ -280,12 +277,12 @@ def setup(): rounds = len(TEST_DATA[depth]) gc.collect() - benchmark.extra_info['depth'] = depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['depth'] = depth + benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in TEST_DATA[depth]) / len(TEST_DATA[depth]) if TEST_DATA[depth] else 0) - benchmark.pedantic(benchmark_trie.add, # pyright: ignore[reportUnknownMemberType] + benchmark.pedantic(benchmark_trie.add, setup=setup, rounds=rounds, iterations=1) @@ -296,7 +293,7 @@ def setup(): @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', TEST_DEPTHS) def test_updating_trie( - benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] + benchmark, runtime_validation: bool, depth: int): '''Benchmark the update value for a key operation on a populated trie. @@ -317,7 +314,7 @@ def test_updating_trie( **BENCHMARK_CONFIG) @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', TEST_DEPTHS) -def test_key_in_trie(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] +def test_key_in_trie(benchmark, runtime_validation: bool, depth: int) -> None: '''Benchmark using keys with the in operator for GeneralizedTrie. @@ -328,10 +325,10 @@ def test_key_in_trie(benchmark, # pyright: ignore[reportUnknownParameterType, r benchmark_key: str = TEST_DATA[depth][-1] # Use the last key for benchmarking gc.collect() benchmark_trie.runtime_validation = runtime_validation - benchmark.extra_info['depth'] = depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['depth'] = depth + benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in TEST_DATA[depth]) / len(TEST_DATA[depth]) if TEST_DATA[depth] else 0) benchmark(benchmark_trie.__contains__, benchmark_key) @@ -339,7 +336,7 @@ def test_key_in_trie(benchmark, # pyright: ignore[reportUnknownParameterType, r @pytest.mark.benchmark(group="Microbenchmark __contains__() for missing keys using synthetic data", **BENCHMARK_CONFIG) @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', TEST_DEPTHS) -def test_key_not_in_trie(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] +def test_key_not_in_trie(benchmark, runtime_validation: bool, depth: int) -> None: '''Benchmark missing keys with the in operator for GeneralizedTrie. @@ -350,10 +347,10 @@ def test_key_not_in_trie(benchmark, # pyright: ignore[reportUnknownParameterTyp benchmark_trie.runtime_validation = runtime_validation gc.collect() - benchmark.extra_info['depth'] = depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['depth'] = depth + benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in TEST_DATA[depth]) / len(TEST_DATA[depth]) if TEST_DATA[depth] else 0) benchmark(benchmark_trie.__contains__, missing_key) @@ -361,7 +358,7 @@ def test_key_not_in_trie(benchmark, # pyright: ignore[reportUnknownParameterTyp @pytest.mark.benchmark(group="Microbenchmark remove() method using synthetic data", **BENCHMARK_CONFIG) @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', TEST_DEPTHS) -def test_remove_key_from_trie(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] +def test_remove_key_from_trie(benchmark, runtime_validation: bool, depth: int) -> None: '''Benchmark remove() method for GeneralizedTrie. @@ -380,12 +377,12 @@ def setup(): rounds = len(test_data) # Rounds limited to prevent exhaustion gc.collect() - benchmark.extra_info['depth'] = depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['depth'] = depth + benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in test_data) / len(test_data) if test_data else 0) - benchmark.pedantic(benchmark_trie.remove, # pyright: ignore[reportUnknownMemberType] + benchmark.pedantic(benchmark_trie.remove, setup=setup, rounds=rounds) @@ -394,7 +391,7 @@ def setup(): **BENCHMARK_CONFIG) @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', TEST_DEPTHS) -def test_get(benchmark, # pyright: ignore[reportUnknownParameterType, reportMissingParameterType] +def test_get(benchmark, runtime_validation: bool, depth: int) -> None: '''Benchmark get() method for GeneralizedTrie. @@ -411,12 +408,12 @@ def setup(): rounds = len(test_data) # Rounds limited to prevent exhaustion gc.collect() - benchmark.extra_info['depth'] = depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['average_word_length'] = ( # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['depth'] = depth + benchmark.extra_info['number_of_words'] = len(TEST_DATA[depth]) + benchmark.extra_info['runtime_validation'] = runtime_validation + benchmark.extra_info['average_word_length'] = ( sum(len(word) for word in test_data) / len(test_data) if test_data else 0) - benchmark.pedantic(benchmark_trie.get, # pyright: ignore[reportUnknownMemberType] + benchmark.pedantic(benchmark_trie.get, setup=setup, rounds=rounds) @@ -424,7 +421,7 @@ def setup(): @pytest.mark.benchmark(group="Microbenchmark prefixes() using synthetic data", **BENCHMARK_CONFIG) @pytest.mark.parametrize('runtime_validation', [False, True]) @pytest.mark.parametrize('depth', [3, 4, 5, 6, 7, 8, 9]) -def test_prefixes(benchmark, # pyright: ignore[reportMissingParameterType, reportUnknownParameterType] +def test_prefixes(benchmark, runtime_validation: bool, depth: int): """Benchmark trie prefixes() method. @@ -443,7 +440,7 @@ def test_prefixes(benchmark, # pyright: ignore[reportMissingParameterType, repo trie.runtime_validation = runtime_validation search_key = TEST_DATA[depth][0] - def helper_prefixes(trie: GeneralizedTrie, search_key: GeneralizedKey) -> list[GeneralizedKey]: + def helper_prefixes(trie: GeneralizedTrie, search_key: GeneralizedKey) -> list[TrieEntry]: return list(trie.prefixes(search_key)) gc.collect() @@ -453,10 +450,10 @@ def helper_prefixes(trie: GeneralizedTrie, search_key: GeneralizedKey) -> list[G # with additional overhead vs the generator approach. results: list[GeneralizedKey] = cast(list[GeneralizedKey], benchmark(helper_prefixes, trie, search_key)) - benchmark.extra_info['number_of_matches_per_query'] = len(results) # pyright: ignore - benchmark.extra_info['keys_in_trie'] = len(trie) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['depth'] = depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['number_of_matches_per_query'] = len(results) + benchmark.extra_info['keys_in_trie'] = len(trie) + benchmark.extra_info['depth'] = depth + benchmark.extra_info['runtime_validation'] = runtime_validation @pytest.mark.benchmark(group="Microbenchmark prefixed_by() using synthetic data", @@ -465,7 +462,7 @@ def helper_prefixes(trie: GeneralizedTrie, search_key: GeneralizedKey) -> list[G @pytest.mark.parametrize('trie_depth', [7]) @pytest.mark.parametrize('key_depth', [2, 3, 4]) # Focus on manageable depths @pytest.mark.parametrize('search_depth', [1, 2, 3]) # Focus on manageable depths -def test_prefixed_by(benchmark, # pyright: ignore[reportMissingParameterType, reportUnknownParameterType] +def test_prefixed_by(benchmark, runtime_validation: bool, trie_depth: int, key_depth: int, @@ -490,7 +487,7 @@ def test_prefixed_by(benchmark, # pyright: ignore[reportMissingParameterType, r def helper_prefixed_by(trie: GeneralizedTrie, search_key: GeneralizedKey, - search_depth: int) -> list[GeneralizedKey]: + search_depth: int) -> list[TrieEntry]: return list(trie.prefixed_by(search_key, search_depth)) gc.collect() @@ -500,12 +497,12 @@ def helper_prefixed_by(trie: GeneralizedTrie, # with additional overhead vs the generator approach. results: list[GeneralizedKey] = cast(list[GeneralizedKey], benchmark(helper_prefixed_by, trie, search_key, search_depth)) - benchmark.extra_info['number_of_matches_per_query'] = len(results) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['keys_in_trie'] = len(trie) # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['trie_depth'] = trie_depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['search_depth'] = search_depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['key_depth'] = key_depth # pyright: ignore[reportUnknownMemberType] - benchmark.extra_info['runtime_validation'] = runtime_validation # pyright: ignore[reportUnknownMemberType] + benchmark.extra_info['number_of_matches_per_query'] = len(results) + benchmark.extra_info['keys_in_trie'] = len(trie) + benchmark.extra_info['trie_depth'] = trie_depth + benchmark.extra_info['search_depth'] = search_depth + benchmark.extra_info['key_depth'] = key_depth + benchmark.extra_info['runtime_validation'] = runtime_validation if __name__ == "__main__": diff --git a/bench/benchmark_gentri.py b/bench/benchmark_gentri.py index 50815f8..221147a 100755 --- a/bench/benchmark_gentri.py +++ b/bench/benchmark_gentri.py @@ -4,6 +4,14 @@ Benchmark for the Generalized Trie implementation. This script runs a series of tests to measure the performance of the Generalized Trie against a set of predefined test cases. + +The Generalized Trie is a data structure that allows for efficient storage and retrieval +of keys that can be composed of various types of tokens, not just strings. + + +The benchmark includes tests for insertion, search, and deletion operations, +as well as tests for different key depths and symbol sets. + ''' # pylint: disable=wrong-import-position, too-many-instance-attributes # pylint: disable=too-many-positional-arguments, too-many-arguments, too-many-locals @@ -636,7 +644,7 @@ class BenchCase: graph_y_starts_at_zero: bool = True graph_x_labels_rotation: float = 0.0 - def __post_init__(self): + def __post_init__(self) -> None: self.results: list[BenchResults] = [] @property @@ -650,7 +658,7 @@ def expanded_kwargs_variations(self) -> list[dict[str, Any]]: values = [self.kwargs_variations[key] for key in keys] return [dict(zip(keys, v)) for v in itertools.product(*values)] - def run(self): + def run(self) -> None: """Run the benchmark tests. This method will execute the benchmark for each combination of @@ -1042,7 +1050,7 @@ def default_runner( description=f'[green] Benchmarking {group} (iteration {iteration_pass:<6d}; ' f'time {0.00:<3.2f}s)') PROGRESS.start_task(TASKS[tasks_name]) - total_elapsed: float = 0 + total_elapsed: int = 0 iterations_list: list[BenchIteration] = [] while ((iteration_pass <= iterations_min or wall_time < min_stop_at) and wall_time < max_stop_at): @@ -1050,7 +1058,7 @@ def default_runner( iteration_result = BenchIteration() iteration_result.elapsed = 0 - if isinstance(setup, Callable): + if callable(setup): setup() # Timer for benchmarked code @@ -1058,7 +1066,7 @@ def default_runner( action() timer_end: int = DEFAULT_TIMER() - if isinstance(teardown, Callable): + if callable(teardown): teardown() if iteration_pass == 1: @@ -1724,13 +1732,13 @@ def run_benchmarks(args: Namespace): if case.results: if args.json or args.json_data: data_export.append(case.as_dict(args=args)) - + graph_file: Path if args.graph: if args.ops: - graph_file: Path = benchmark_run_dir.joinpath(f'benchmark_graph_ops_{case.group[:60]}.svg') + graph_file = benchmark_run_dir.joinpath(f'benchmark_graph_ops_{case.group[:60]}.svg') case.plot_ops_results(graph_file) if args.timing: - graph_file: Path = benchmark_run_dir.joinpath(f'benchmark_graph_timing_{case.group[:60]}.svg') + graph_file = benchmark_run_dir.joinpath(f'benchmark_graph_timing_{case.group[:60]}.svg') case.plot_timing_results(graph_file) if args.csv: @@ -1782,7 +1790,7 @@ def run_benchmarks(args: Namespace): PROGRESS.remove_task(task) -def main(): +def main() -> None: """Main entry point for running benchmarks.""" parser = ArgumentParser(description='Run GeneralizedTrie benchmarks.') parser.add_argument('--verbose', action='store_true', help='Enable verbose output') diff --git a/docs/html/.buildinfo.bak b/docs/html/.buildinfo.bak index 88091ad..906b29b 100644 --- a/docs/html/.buildinfo.bak +++ b/docs/html/.buildinfo.bak @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file records the configuration used when building these files. When it is not found, a full rebuild will be done. -config: e4d6d212882c2343c907aa4e56dbd288 +config: 7ccba592787a2b5a3e11d3594a57a1d6 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/html/_static/documentation_options.js b/docs/html/_static/documentation_options.js index bd9a7f8..df9d7e5 100644 --- a/docs/html/_static/documentation_options.js +++ b/docs/html/_static/documentation_options.js @@ -1,5 +1,5 @@ const DOCUMENTATION_OPTIONS = { - VERSION: '0.9.4', + VERSION: '0.9.6', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/html/benchmarks.html b/docs/html/benchmarks.html index 9d4fa91..5d36a4d 100644 --- a/docs/html/benchmarks.html +++ b/docs/html/benchmarks.html @@ -7,9 +7,9 @@ -