diff --git a/.github/workflows/performance-checks.yml b/.github/workflows/performance-checks.yml new file mode 100644 index 0000000000..364c119d45 --- /dev/null +++ b/.github/workflows/performance-checks.yml @@ -0,0 +1,122 @@ +name: Performance Checks + +# Runs performance benchmark scripts from other_resources/performance-checks/ +# manually on demand. Use the `scripts` input to choose which scripts to run. + +on: + push: # TODO: consider running on a schedule (e.g. weekly) but not on every push + branches: ["main", "reroll"] + workflow_dispatch: + inputs: + scripts: + description: >- + Comma-separated list of script filenames to run from + other_resources/performance-checks/ (e.g. + "fast-channel-random.py" or "fast-channel-random.py,other-script.py"). + Use "all" to run every *.py script found in that directory. + required: false + default: "all" + type: string + python-version: + description: >- + Python version to use (must be available via setup-python and + compatible with the uv.lock in this repo). + required: false + default: "3.11" + type: string + +jobs: + discover: + name: Discover scripts + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.build-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v6 + + - name: Build script matrix + id: build-matrix + shell: python + run: | + import json, os, pathlib, sys + + scripts_dir = pathlib.Path("other_resources/performance-checks") + requested = "${{ inputs.scripts }}".strip() + + all_scripts = sorted(p.name for p in scripts_dir.glob("*.py")) + + if not all_scripts: + print("::error::No *.py scripts found in other_resources/performance-checks/") + sys.exit(1) + + # Empty string occurs on push/schedule triggers where inputs are not set; + # treat it the same as the explicit "all" keyword. + if not requested or requested.lower() == "all": + selected = all_scripts + else: + candidates = [s.strip() for s in requested.split(",") if s.strip()] + missing = [c for c in candidates if c not in all_scripts] + if missing: + print(f"::error::The following requested scripts were not found: {missing}") + print(f"::error::Available scripts: {all_scripts}") + sys.exit(1) + selected = candidates + + matrix = {"script": selected} + print(f"Selected scripts: {selected}") + with open(os.environ["GITHUB_OUTPUT"], "a") as fh: + fh.write(f"matrix={json.dumps(matrix)}\n") + + run-script: + name: "Run ${{ matrix.script }} (${{ matrix.os }})" + needs: discover + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + script: ${{ fromJson(needs.discover.outputs.matrix).script }} + + # Force UTF-8 I/O on Windows (default console encoding is cp1252 which + # cannot represent box-drawing characters used in benchmark output). + env: + PYTHONUTF8: "1" + + defaults: + run: + # bash is available on all GitHub-hosted runners (Git Bash on Windows) + # and gives us consistent behaviour for tee, path separators, etc. + shell: bash + + steps: + - uses: actions/checkout@v6 + + - name: Install uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Set up Python ${{ inputs.python-version || '3.11' }} + uses: actions/setup-python@v6 + with: + python-version: ${{ inputs.python-version || '3.11' }} + + - name: Install activitysim + run: | + uv sync --locked + + - name: Run ${{ matrix.script }} + run: | + set -o pipefail + uv run python other_resources/performance-checks/${{ matrix.script }} \ + 2>&1 | tee perf-output-${{ matrix.os }}-${{ matrix.script }}.txt + + - name: Upload output + if: always() + uses: actions/upload-artifact@v7 + with: + # artifact names must be unique across the matrix, so include the OS + name: perf-output-${{ matrix.os }}-${{ matrix.script }} + path: perf-output-${{ matrix.os }}-${{ matrix.script }}.txt + retention-days: 90 diff --git a/activitysim/abm/test/test_pipeline/output/.gitignore b/activitysim/abm/test/test_pipeline/output/.gitignore deleted file mode 100644 index 9cf6da1afa..0000000000 --- a/activitysim/abm/test/test_pipeline/output/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -*.csv -*.txt -*.h5 -*.yaml -*.omx -*.mmap diff --git a/activitysim/abm/test/test_pipeline/output/cache/.gitignore b/activitysim/abm/test/test_pipeline/output/cache/.gitignore deleted file mode 100644 index a55e67af68..0000000000 --- a/activitysim/abm/test/test_pipeline/output/cache/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.mmap diff --git a/activitysim/abm/test/test_pipeline/output/trace/.gitignore b/activitysim/abm/test/test_pipeline/output/trace/.gitignore deleted file mode 100644 index afed0735dc..0000000000 --- a/activitysim/abm/test/test_pipeline/output/trace/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.csv diff --git a/activitysim/abm/test/test_pipeline/test_pipeline.py b/activitysim/abm/test/test_pipeline/test_pipeline.py index 2d837258a5..0931ca7cd3 100644 --- a/activitysim/abm/test/test_pipeline/test_pipeline.py +++ b/activitysim/abm/test/test_pipeline/test_pipeline.py @@ -37,7 +37,21 @@ def example_path(dirname): return str(importlib.resources.files("activitysim").joinpath(resource)) -def setup_dirs(ancillary_configs_dir=None, data_dir=None): +# Run every parametrized test against both the legacy SimpleChannel and the +# new FastChannel implementations. Tests use ``channel_type`` to (a) inform +# the workflow which channel implementation to use, (b) isolate per-type +# output directories so checkpoint stores written by one channel type cannot +# be read by the other, and (c) look up the appropriate regression-expected +# values from per-channel-type tables defined below. +CHANNEL_TYPES = ("simple", "fast", "faster") + + +@pytest.fixture(params=CHANNEL_TYPES) +def channel_type(request): + return request.param + + +def setup_dirs(ancillary_configs_dir=None, data_dir=None, channel_type="fast"): # ancillary_configs_dir is used by run_mp to test multiprocess test_pipeline_configs_dir = os.path.join(os.path.dirname(__file__), "configs") @@ -47,7 +61,13 @@ def setup_dirs(ancillary_configs_dir=None, data_dir=None): if ancillary_configs_dir is not None: configs_dir = [ancillary_configs_dir] + configs_dir - output_dir = os.path.join(os.path.dirname(__file__), "output") + # Use a per-channel-type output subdirectory so checkpoint stores and skim + # caches written under one channel implementation do not interfere with + # those written under the other. + output_dir = os.path.join( + os.path.dirname(__file__), "output", f"channel_{channel_type}" + ) + os.makedirs(output_dir, exist_ok=True) if not data_dir: data_dir = example_path("data") @@ -58,6 +78,9 @@ def setup_dirs(ancillary_configs_dir=None, data_dir=None): data_dir=data_dir, ) + # Select the channel implementation requested by the test parameter. + state.settings.rng_channel_type = channel_type + state.logging.config_logger() state.tracing.delete_output_files("csv") @@ -77,8 +100,8 @@ def close_handlers(): logger.setLevel(logging.NOTSET) -def test_rng_access(): - state = setup_dirs() +def test_rng_access(channel_type): + state = setup_dirs(channel_type=channel_type) state.settings.rng_base_seed = 0 state.checkpoint.restore() @@ -86,15 +109,40 @@ def test_rng_access(): rng = state.get_rn_generator() assert isinstance(rng, random.Random) + assert rng.channel_type == channel_type state.checkpoint.close_store() -def regress_mini_auto(state: workflow.State): +# Per-channel-type regression-expected values for the mini-pipeline tests. +# The "simple" values are the long-standing reference values; the "fast" +# values were captured from a clean run with the FastChannel implementation +# and are checked here to guard against unintended drift in the new +# vectorised PCG64 streams. +_MINI_AUTO_HH_IDS = [1099626, 1173905, 1196298, 1286259] +_MINI_AUTO_EXPECTED = { + "simple": [1, 1, 0, 0], + "fast": [0, 0, 1, 0], + "faster": [2, 0, 0, 1], +} + +_MINI_MTF_PER_IDS = { + "simple": [2566701, 2566702, 3061895], + "fast": [3188482, 3188483, 3188484], + "faster": [2566701, 2566702, 3188482], +} +_MINI_MTF_EXPECTED = { + "simple": ["school1", "school1", "work1"], + "fast": ["work1", "work1", "work_and_school"], + "faster": ["school1", "school1", "work1"], +} + + +def regress_mini_auto(state: workflow.State, channel_type: str = "simple"): # regression test: these are among the middle households in households table # should be the same results as in run_mp (multiprocessing) test case - hh_ids = [1099626, 1173905, 1196298, 1286259] - choices = [1, 1, 0, 0] + hh_ids = _MINI_AUTO_HH_IDS + choices = _MINI_AUTO_EXPECTED[channel_type] expected_choice = pd.Series( choices, index=pd.Index(hh_ids, name="household_id"), name="auto_ownership" ) @@ -106,12 +154,15 @@ def regress_mini_auto(state: workflow.State): offset = ( HOUSEHOLDS_SAMPLE_SIZE // 2 ) # choose something midway as hh_id ordered by hh size - print("auto_choice\n%s" % auto_choice.head(offset).tail(4)) + print( + "auto_choice (channel=%s)\n%s" + % (channel_type, auto_choice.head(offset).tail(4)) + ) auto_choice = auto_choice.reindex(hh_ids) """ - auto_choice + auto_choice (simple channel) household_id 1099626 1 1173905 1 @@ -122,14 +173,14 @@ def regress_mini_auto(state: workflow.State): pdt.assert_series_equal(auto_choice, expected_choice, check_dtype=False) -def regress_mini_mtf(state: workflow.State): +def regress_mini_mtf(state: workflow.State, channel_type: str = "simple"): mtf_choice = ( state.checkpoint.load_dataframe("persons").sort_index().mandatory_tour_frequency ) # these choices are for pure regression - their appropriateness has not been checked - per_ids = [2566701, 2566702, 3061895] - choices = ["school1", "school1", "work1"] + per_ids = _MINI_MTF_PER_IDS[channel_type] + choices = _MINI_MTF_EXPECTED[channel_type] expected_choice = pd.Series( choices, index=pd.Index(per_ids, name="person_id"), @@ -139,10 +190,16 @@ def regress_mini_mtf(state: workflow.State): mtf_choice = mtf_choice[mtf_choice != ""] # drop null (empty string) choices offset = len(mtf_choice) // 2 # choose something midway as hh_id ordered by hh size - print("mtf_choice\n%s" % mtf_choice.head(offset).tail(3)) + print( + "mtf_choice (channel=%s)\n%s" % (channel_type, mtf_choice.head(offset).tail(3)) + ) + print( + "mtf_choice for regression per_ids (channel=%s):\n%s" + % (channel_type, mtf_choice.astype(str).reindex(per_ids)) + ) """ - mtf_choice + mtf_choice (simple channel) person_id 2566701 school1 2566702 school1 @@ -165,10 +222,10 @@ def regress_mini_location_choice_logsums(state: workflow.State): assert "workplace_location_logsum" not in persons -def test_mini_pipeline_run(): +def test_mini_pipeline_run(channel_type): from activitysim.abm.tables.skims import network_los_preload - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) state.get(network_los_preload) state.settings.households_sample_size = HOUSEHOLDS_SAMPLE_SIZE @@ -185,12 +242,12 @@ def test_mini_pipeline_run(): state.run(models=_MODELS, resume_after=None) - regress_mini_auto(state) + regress_mini_auto(state, channel_type=channel_type) state.run.by_name("cdap_simulate") state.run.by_name("mandatory_tour_frequency") - regress_mini_mtf(state) + regress_mini_mtf(state, channel_type=channel_type) regress_mini_location_choice_logsums(state) # try to get a non-existant table @@ -213,12 +270,12 @@ def test_mini_pipeline_run(): close_handlers() -def test_mini_pipeline_run2(): +def test_mini_pipeline_run2(channel_type): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) from activitysim.abm.tables.skims import network_los_preload state.get(network_los_preload) @@ -236,7 +293,7 @@ def test_mini_pipeline_run2(): state.checkpoint.restore("auto_ownership_simulate") - regress_mini_auto(state) + regress_mini_auto(state, channel_type=channel_type) # try to run a model already in pipeline with pytest.raises(DuplicateWorkflowNameError) as excinfo: @@ -247,7 +304,7 @@ def test_mini_pipeline_run2(): state.run.by_name("cdap_simulate") state.run.by_name("mandatory_tour_frequency") - regress_mini_mtf(state) + regress_mini_mtf(state, channel_type=channel_type) # should be able to get this before pipeline is closed (from existing open store) checkpoints_df = state.checkpoint.get_inventory() @@ -265,10 +322,10 @@ def test_mini_pipeline_run2(): close_handlers() -def test_mini_pipeline_run3(): +def test_mini_pipeline_run3(channel_type): # test that hh_ids setting overrides household sampling - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) state.settings.hh_ids = "override_hh_ids.csv" households = state.get_dataframe("households") @@ -294,8 +351,9 @@ def full_run( trace_hh_id=None, trace_od=None, check_for_variability=False, + channel_type="fast", ): - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) state.settings.households_sample_size = households_sample_size state.settings.chunk_size = chunk_size @@ -318,10 +376,36 @@ def full_run( return state, tour_count -EXPECT_TOUR_COUNT = 121 +EXPECT_TOUR_COUNT = { + "simple": 121, + "fast": 108, + "faster": 102, +} + + +# Per-channel-type expected per-tour values for HH_ID, sorted by +# (person_id, tour_category, tour_num). These were captured from clean +# reference runs and are checked here to guard against unintended drift. +_EXPECT_PERSON_IDS = { + "simple": [325051, 325051, 325051, 325052, 325052, 325052], + "fast": [325051, 325051, 325051, 325052], + "faster": [325051, 325052], +} +_EXPECT_TOUR_TYPES = { + "simple": ["othdiscr", "work", "work", "business", "work", "othmaint"], + "fast": ["work", "escort", "eatout", "work"], + "faster": ["shopping", "work"], +} -def regress_tour_modes(tours_df): +_EXPECT_MODES = { + "simple": ["WALK", "WALK", "SHARED3FREE", "WALK", "WALK_LOC", "WALK"], + "fast": ["WALK", "TNC_SHARED", "WALK", "WALK"], + "faster": ["WALK", "WALK_LOC"], +} + + +def regress_tour_modes(tours_df, channel_type: str = "simple"): mode_cols = ["tour_mode", "person_id", "tour_type", "tour_num", "tour_category"] tours_df = tours_df[tours_df.household_id == HH_ID] @@ -329,9 +413,10 @@ def regress_tour_modes(tours_df): tours_df.tour_category = tours_df.tour_category.astype(str) tours_df = tours_df.sort_values(by=["person_id", "tour_category", "tour_num"]) - print("mode_df\n%s" % tours_df[mode_cols]) + print("mode_df (channel=%s)\n%s" % (channel_type, tours_df[mode_cols])) """ + simple channel: tour_mode person_id tour_type tour_num tour_category tour_id 13327106 WALK 325051 othdiscr 1 joint @@ -340,27 +425,19 @@ def regress_tour_modes(tours_df): 13327132 WALK 325052 business 1 atwork 13327171 WALK_LOC 325052 work 1 mandatory 13327160 WALK 325052 othmaint 1 non_mandatory - """ - EXPECT_PERSON_IDS = [ - 325051, - 325051, - 325051, - 325052, - 325052, - 325052, - ] - - EXPECT_TOUR_TYPES = ["othdiscr", "work", "work", "business", "work", "othmaint"] + fast channel: + tour_mode person_id tour_type tour_num tour_category + tour_id + 13327130 WALK 325051 work 1 mandatory + 13327100 TNC_SHARED 325051 escort 1 non_mandatory + 13327097 WALK 325051 eatout 2 non_mandatory + 13327171 WALK 325052 work 1 mandatory + """ - EXPECT_MODES = [ - "WALK", - "WALK", - "SHARED3FREE", - "WALK", - "WALK_LOC", - "WALK", - ] + EXPECT_PERSON_IDS = _EXPECT_PERSON_IDS[channel_type] + EXPECT_TOUR_TYPES = _EXPECT_TOUR_TYPES[channel_type] + EXPECT_MODES = _EXPECT_MODES[channel_type] assert len(tours_df) == len(EXPECT_PERSON_IDS) assert (tours_df.person_id.values == EXPECT_PERSON_IDS).all() @@ -368,7 +445,7 @@ def regress_tour_modes(tours_df): assert (tours_df.tour_mode.astype(str).values == EXPECT_MODES).all() -def regress(state: workflow.State): +def regress(state: workflow.State, channel_type: str = "simple"): persons_df = state.checkpoint.load_dataframe("persons") persons_df = persons_df[persons_df.household_id == HH_ID] print("persons_df\n%s" % persons_df[["value_of_time", "distance_to_work"]]) @@ -383,27 +460,31 @@ def regress(state: workflow.State): tours_df = state.checkpoint.load_dataframe("tours") - regress_tour_modes(tours_df) + regress_tour_modes(tours_df, channel_type=channel_type) assert tours_df.shape[0] > 0 assert not tours_df.tour_mode.isnull().any() - # optional logsum column was added to all tours except mandatory - assert "destination_logsum" in tours_df - if ( - tours_df.destination_logsum.isnull() != (tours_df.tour_category == "mandatory") - ).any(): - print( - tours_df[ - ( - tours_df.destination_logsum.isnull() - != (tours_df.tour_category == "mandatory") - ) - ] - ) - assert ( - tours_df.destination_logsum.isnull() == (tours_df.tour_category == "mandatory") - ).all() + if "destination_logsum" in tours_df: + # optional logsum column was added to all tours except mandatory + # since there are now multiple different random generators, there is no + # guarantee that there are any non-mandatory tours (e.g. in the singleton test) + if ( + tours_df.destination_logsum.isnull() + != (tours_df.tour_category == "mandatory") + ).any(): + print( + tours_df[ + ( + tours_df.destination_logsum.isnull() + != (tours_df.tour_category == "mandatory") + ) + ] + ) + assert ( + tours_df.destination_logsum.isnull() + == (tours_df.tour_category == "mandatory") + ).all() # mode choice logsum calculated for all tours assert "mode_choice_logsum" in tours_df @@ -434,7 +515,15 @@ def regress(state: workflow.State): trip_matrices.close() -def test_full_run1(): +def _check_tour_count(channel_type, tour_count): + expect = EXPECT_TOUR_COUNT[channel_type] + assert tour_count == expect, "EXPECT_TOUR_COUNT %s but got tour_count %s" % ( + expect, + tour_count, + ) + + +def test_full_run1(channel_type): if SKIP_FULL_RUN: return @@ -442,39 +531,38 @@ def test_full_run1(): trace_hh_id=HH_ID, check_for_variability=True, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, + channel_type=channel_type, ) print("tour_count", tour_count) - assert ( - tour_count == EXPECT_TOUR_COUNT - ), "EXPECT_TOUR_COUNT %s but got tour_count %s" % (EXPECT_TOUR_COUNT, tour_count) + _check_tour_count(channel_type, tour_count) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run2(): +def test_full_run2(channel_type): # resume_after should successfully load tours table and replicate results if SKIP_FULL_RUN: return state, tour_count = full_run( - resume_after="non_mandatory_tour_scheduling", trace_hh_id=HH_ID + resume_after="non_mandatory_tour_scheduling", + trace_hh_id=HH_ID, + channel_type=channel_type, ) - assert ( - tour_count == EXPECT_TOUR_COUNT - ), "EXPECT_TOUR_COUNT %s but got tour_count %s" % (EXPECT_TOUR_COUNT, tour_count) + _check_tour_count(channel_type, tour_count) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run3_with_chunks(): +def test_full_run3_with_chunks(channel_type): # should get the same result with different chunk size if SKIP_FULL_RUN: @@ -484,33 +572,34 @@ def test_full_run3_with_chunks(): trace_hh_id=HH_ID, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, chunk_size=500000, + channel_type=channel_type, ) - assert ( - tour_count == EXPECT_TOUR_COUNT - ), "EXPECT_TOUR_COUNT %s but got tour_count %s" % (EXPECT_TOUR_COUNT, tour_count) + _check_tour_count(channel_type, tour_count) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run4_stability(): +def test_full_run4_stability(channel_type): # hh should get the same result with different sample size if SKIP_FULL_RUN: return state, tour_count = full_run( - trace_hh_id=HH_ID, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE - 10 + trace_hh_id=HH_ID, + households_sample_size=HOUSEHOLDS_SAMPLE_SIZE - 10, + channel_type=channel_type, ) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run5_singleton(): +def test_full_run5_singleton(channel_type): # should work with only one hh # run with minimum chunk size to drive potential chunking errors in models # where choosers has multiple rows that all have to be included in the same chunk @@ -519,15 +608,18 @@ def test_full_run5_singleton(): return state, tour_count = full_run( - trace_hh_id=HH_ID, households_sample_size=1, chunk_size=1 + trace_hh_id=HH_ID, + households_sample_size=1, + chunk_size=1, + channel_type=channel_type, ) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() if __name__ == "__main__": print("running test_full_run1") - test_full_run1() + test_full_run1("simple") # teardown_function(None) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 825283126c..5d3dd5e6f8 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -1,9 +1,9 @@ from __future__ import annotations -from pathlib import Path -from typing import Any, Literal import struct import time +from pathlib import Path +from typing import Any, Literal from pydantic import model_validator, validator @@ -743,6 +743,28 @@ def _check_store_skims_in_shm(self): rng_base_seed: Union[int, None] = 0 """Base seed for pseudo-random number generator.""" + rng_channel_type: Literal["simple", "fast", "faster"] = "simple" + """ + Which random-channel implementation to use for per-row RNG streams. + + * ``"fast"`` (default) + Use :class:`activitysim.core.fast_random.FastChannel`, with a + vectorised PCG64-based implementation. + * ``"faster"`` (default) + Use :class:`activitysim.core.fast_random.FastChannel`, with a + vectorised SFC64-based implementation and hash-based reseeding. + * ``"simple"`` + Use the legacy :class:`activitysim.core.random.SimpleChannel`, + which seeds a per-row :class:`numpy.random.RandomState` on the fly. + Slower but provided for backward compatibility / reproducibility + with results generated by older ActivitySim versions. + + Note the default is `simple` not because simple is preferred, but because + many integration tests are built looking for the results derived from the + original `simple` channel. New models should generally prefer the `fast` + channel type, as it is faster. + """ + duplicate_step_execution: Literal["error", "allow"] = "error" """ How activitysim should handle attempts to re-run a step with the same name. @@ -780,7 +802,7 @@ def _check_store_skims_in_shm(self): """ run checks to validate that YAML settings files are loadable and spec and coefficient csv can be resolved. - should catch many common errors early, including missing required configurations or specified coefficient labels without defined values. + should catch many common errors early, including missing required configurations or specified coefficient labels without defined values. """ skip_failed_choices: bool = True diff --git a/activitysim/core/fast_random/__init__.py b/activitysim/core/fast_random/__init__.py new file mode 100644 index 0000000000..0eecb22bed --- /dev/null +++ b/activitysim/core/fast_random/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from ._fast_channel import FastChannel # noqa: F403 + +__all__ = ( # noqa: F405 + # TODO: Add all public symbols here. + "FastChannel", +) diff --git a/activitysim/core/fast_random/_entropy.py b/activitysim/core/fast_random/_entropy.py new file mode 100644 index 0000000000..ba03052035 --- /dev/null +++ b/activitysim/core/fast_random/_entropy.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from typing import Literal + +import numba as nb +import numpy as np +import pandas as pd + +from ._fast_random import FastGenerator + + +@nb.njit +def _fold_in_256(key_arr: np.ndarray, data: np.uint64) -> np.ndarray: + """ + Numba kernel: mixes a uint64 `data` value into each of the four 64-bit + words of `key_arr` using a MurmurHash3-style finalizer, then applies a + four-step XOR diffusion pass so that every input bit influences all four + output words. + """ + GOLDEN = np.uint64(0x9E3779B97F4A7C15) + C1 = np.uint64(0xFF51AFD7ED558CCD) + C2 = np.uint64(0xC4CEB9FE1A85EC53) + SHIFT = np.uint64(33) + + result = np.empty(4, dtype=np.uint64) + for i in range(4): + # Salt each word position uniquely before mixing + salt = data + np.uint64(i) * GOLDEN + h = key_arr[i] ^ salt + # MurmurHash3 64-bit finalizer + h ^= h >> SHIFT + h *= C1 + h ^= h >> SHIFT + h *= C2 + h ^= h >> SHIFT + result[i] = h + + # Cross-word diffusion: ensure all 256 input bits affect all 256 output bits. + # Four XOR steps form a simple diffusion network at negligible cost. + result[1] ^= result[0] + result[2] ^= result[1] + result[3] ^= result[2] + result[0] ^= result[3] + + return result + + +@nb.njit +def _fold_in_256_batch(key_arr: np.ndarray, data_arr: np.ndarray) -> np.ndarray: + """ + Batch seeding. + + Generates `len(data_arr)` independent 256-bit child states from a single + parent `key_arr` state in one sweep. + + Parameters + ---------- + key_arr : np.ndarray + A 4-element uint64 array representing the parent 256-bit key. + data_arr : np.ndarray + A 1D array of uint64 values to fold into the key. Each value + will produce one child key. + + Returns + ------- + results : np.ndarray + A (N, 4) uint64 array representing the child keys. + """ + n = data_arr.shape[0] + results = np.empty((n, 4), dtype=np.uint64) + for j in range(n): + results[j] = _fold_in_256(key_arr, data_arr[j]) + return results + + +def _fast_entropy_raw(base_seeds: int | list[int], index_keys: pd.Index) -> np.ndarray: + + if isinstance(base_seeds, int): + base_seeds = [base_seeds] + base_seed_sequence = np.random.SeedSequence(base_seeds) + base_state = base_seed_sequence.generate_state(4, dtype=np.uint64) + return _fold_in_256_batch(base_state, np.asarray(index_keys).astype(np.uint64)) + + +_FG_PCG64 = FastGenerator(42, "PCG64") +_FG_SFC64 = FastGenerator(42, "SFC64") + + +def fast_entropy_PCG64(base_seeds: int | list[int], index_keys: pd.Index) -> np.ndarray: + generated_states = _fast_entropy_raw(base_seeds, index_keys) + generated_states[:, -1] |= 1 # Ensure the last word is odd for PCG64 + # make a couple draws to properly mix the state and avoid any initial correlation with the input keys + _FG_PCG64.vector_random_standard_uniform(generated_states, shape=2) + return generated_states + + +def fast_entropy_SFC64(base_seeds: int | list[int], index_keys: pd.Index) -> np.ndarray: + generated_states = _fast_entropy_raw(base_seeds, index_keys) + generated_states[:, -1] = 1 # the last word is a counter, start it at 1 + # SFC initialization typically makes a dozen draws to properly mix the state + _FG_SFC64.vector_random_standard_uniform(generated_states, shape=12) + return generated_states + + +def fast_entropy( + base_seeds: int | list[int], + index_keys: pd.Index, + bit_gen: Literal["PCG64", "SFC64"], +) -> np.ndarray: + if bit_gen == "PCG64": + return fast_entropy_PCG64(base_seeds, index_keys) + elif bit_gen == "SFC64": + return fast_entropy_SFC64(base_seeds, index_keys) + else: + raise ValueError("Unsupported bit gen %s" % bit_gen) diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py new file mode 100644 index 0000000000..5f339c6285 --- /dev/null +++ b/activitysim/core/fast_random/_fast_channel.py @@ -0,0 +1,507 @@ +from __future__ import annotations + +import hashlib +from typing import Literal + +import numpy as np +import pandas as pd +from cffi import FFI + +from ._entropy import fast_entropy +from ._fast_random import FastGenerator, quick_entropy + +# one more than 0xFFFFFFFF so we can wrap using: int64 % _MAX_SEED +_MAX_SEED = 1 << 32 +_SEED_MASK = 0xFFFFFFFF + +_FFI = FFI() + + +def hash32(s): + """ + + Parameters + ---------- + s: str + + Returns + ------- + 32 bit unsigned hash + """ + s = s.encode("utf8") + h = hashlib.md5(s).hexdigest() + return int(h, base=16) & _SEED_MASK + + +class FastChannel: + def __init__( + self, + channel_name: str, + base_seed: int, + domain_df: pd.DataFrame, + step_name: str = "", + bit_generator: Literal["PCG64", "SFC64"] = "PCG64", + entropy_type: Literal["robust", "quick"] = "quick", + ) -> None: + """ + Create a new FastChannel for vectorised PCG64-based random number generation. + + Each row in *domain_df* gets its own independent PCG64 bit-generator whose + initial state is derived from the combination of *base_seed*, + *channel_name*, the current step name, and the row's index value. This + guarantees reproducibility across runs while keeping every row's stream + independent. + + Parameters + ---------- + channel_name : str + Unique name for this channel (e.g. ``"households"``, ``"persons"``). + Hashed into the per-row seed so that different channels produce + distinct streams even when their domain index values overlap. + base_seed : int + Global base seed added to every per-row seed sequence. Change this + value to shift all random streams without breaking their relative + independence. + domain_df : pandas.DataFrame + DataFrame whose index defines the set of agents (rows) managed by + this channel. The index is copied and stored; the DataFrame columns + are ignored. + step_name : str, optional + If non-empty, ``begin_step(step_name)`` is called immediately after + construction so the channel is ready to generate numbers straight + away. Defaults to ``""`` (no step started). + bit_generator : {"SFC64", "PCG64"}, default: "SFC64" + Which bit generator to use for the per-row streams. Defaults to + SFC64, which supports using quick-hash random entropy for maximum + speed at runtime. + entropy_type : {"robust", "quick"}, default: "quick" + The type of entropy used to reseed the bit generators. + Robust entropy uses the numpy SeedSequence tools to create entropy + with strong statistical properties, but is slower to generate. Quick + entropy uses a custom hash-based method that is much faster to generate + but may have a slightly greater risk of non-independent streams. Since + ActivitySim reseeds quite frequently, the practical risk of problems + is low. + + """ + self.base_seed = base_seed + self.channel_name = channel_name + self.channel_seed = hash32(self.channel_name) + self.domain_index = domain_df.index[:0].copy() + self.step_name = None + self.step_seed = None + + # If entropy_type is not given, choose a default appropriate for the bit generator + if entropy_type is None: + if bit_generator == "PCG64": + entropy_type = "robust" + elif bit_generator == "SFC64": + entropy_type = "quick" + else: + raise ValueError(f"unsupported bit generator class: {bit_generator}") + + if entropy_type not in {"robust", "quick"}: + raise ValueError("entropy_type must be 'robust' or 'quick'") + self._entropy_type = entropy_type + self._fast_generator = FastGenerator(bit_gen=bit_generator) + self._state_array = None + self.extend_domain(domain_df) + if step_name: + self.begin_step(step_name) + + def _batch_init_states(self, base_seeds, index_seeds): + if self._entropy_type == "quick": + return fast_entropy( + base_seeds, index_seeds, self._fast_generator._bit_gen_class + ) + elif self._entropy_type == "robust": + new_state = np.empty(shape=[len(index_seeds), 4], dtype=np.uint64) + for n, i in enumerate(index_seeds): + new_state[n, :] = self._fast_generator.get_state_array( + np.random.SeedSequence([*base_seeds, i]) + ) + return new_state + else: + raise ValueError("entropy_type must be 'robust' or 'quick'") + + def extend_domain(self, domain_df: pd.DataFrame) -> None: + """ + Extend the channel's domain by adding new rows from *domain_df*. + + If a step is currently active, the per-row PCG64 state for the new rows + is initialised immediately (using the current ``step_seed``) and + appended to ``self._state_array`` so that random draws can be made for + the extended rows within the same step. + + The index values of *domain_df* must be disjoint from the channel's + existing ``domain_index`` so there is no ambiguity / collision between + rows. + + Parameters + ---------- + domain_df : pandas.DataFrame + DataFrame whose index defines the new agents (rows) to add to the + channel. Columns are ignored. + + Raises + ------ + AssertionError + If any index value in *domain_df* already exists in the channel's + domain. + """ + new_index = domain_df.index + + if new_index.empty: + return + + # new rows must be disjoint from existing domain + assert ( + len(self.domain_index.intersection(new_index)) == 0 + ), "extend_domain: new domain_df index overlaps existing domain" + + if self._state_array is not None: + # we already have state for some rows, so we also need to + # generate state for the new rows and append to existing state array + new_state = self._batch_init_states( + [self.base_seed, self.channel_seed, self.step_seed], new_index + ) + self._state_array = np.concatenate([self._state_array, new_state], axis=0) + + if len(self.domain_index) == 0: + self.domain_index = new_index.copy() + else: + self.domain_index = self.domain_index.append(new_index) + + def _reseed_step(self, force: bool = False) -> None: + """ + Initialise (or re-initialise) the per-row PCG64 states for a new step. + + Must be called before any random-number methods are used within a step. + The method seeds every row's bit-generator from the four-integer sequence + ``[base_seed, channel_seed, step_seed, row_index]`` via + :class:`numpy.random.SeedSequence`, ensuring that: + + * the same step always produces the same stream (reproducibility), and + * different steps produce independent streams (no cross-step correlation). + + Parameters + ---------- + step_name : str + Name of the pipeline step being started (e.g. ``"auto_ownership"``). + Hashed into the seed so that different steps yield distinct streams. + + Raises + ------ + AssertionError + If a step is already active (``end_step`` was not called first). + """ + + if self._state_array is None or force: + # Seed the bit generators, extracting state along the way + self._state_array = self._batch_init_states( + [self.base_seed, self.channel_seed, self.step_seed], self.domain_index + ) + + def begin_step(self, step_name: str) -> None: + """ + Initialise (or re-initialise) the per-row PCG64 states for a new step. + + Must be called before any random-number methods are used within a step. + The method seeds every row's bit-generator from the four-integer sequence + ``[base_seed, channel_seed, step_seed, row_index]`` via + :class:`numpy.random.SeedSequence`, ensuring that: + + * the same step always produces the same stream (reproducibility), and + * different steps produce independent streams (no cross-step correlation). + + Parameters + ---------- + step_name : str + Name of the pipeline step being started (e.g. ``"auto_ownership"``). + Hashed into the seed so that different steps yield distinct streams. + + Raises + ------ + AssertionError + If a step is already active (``end_step`` was not called first). + """ + + assert self.step_name is None + + self.step_name = step_name + self.step_seed = hash32(self.step_name) + + # do NOT reseed immediately, defer until the first call to generate + # any random numbers using this channel. There may not be any such + # calls (most ActivitySim steps only use one of many channels), and + # we want to avoid the overhead of seeding every channel in every + # step when many channels are unused. + + def end_step(self, step_name: str = "") -> None: + """ + Tear down the per-row PCG64 states at the end of a step. + + Clears ``step_name``, ``step_seed``, ``_bitgenerator``, and + ``_state_array`` so that accidental use of stale state after a step + boundary raises an error rather than silently producing wrong numbers. + + Parameters + ---------- + step_name : str, optional + When provided, asserts that the currently active step matches + *step_name* as a consistency check. Pass an empty string (the + default) to skip the check. + + Raises + ------ + AssertionError + If *step_name* is provided and does not match the active step name. + """ + if step_name: + assert self.step_name == step_name + self.step_name = None + self.step_seed = None + self._state_array = None + + def _check_valid_df(self, df: pd.DataFrame) -> np.ndarray: + """ + Validate *df* against the channel's domain and return row positions. + + Performs three checks: + + 1. *df* has no duplicate index values. + 2. Every index value in *df* exists in the channel's domain index. + 3. A step is currently active (``_state_array`` is not ``None``). + + Parameters + ---------- + df : pandas.DataFrame + DataFrame whose index is to be validated. Columns are ignored. + + Returns + ------- + selected_positions : numpy.ndarray + 1-D integer array of shape ``(len(df),)`` containing the positional + indices into ``self.domain_index`` (and therefore into + ``self._state_array``) that correspond to each row of *df*. + + Raises + ------ + ValueError + If *df* contains duplicate index values, if any index value is + absent from the domain, or if no step is currently active. + """ + # check that df.index has no duplicates + if len(df.index.unique()) != len(df.index): + raise ValueError("DataFrame must have unique index") + + selected_positions = self.domain_index.get_indexer(df.index) + + # check that all df.index values were found in self.domain_index + # (skip the check for empty input – min() on a zero-size array errors) + if selected_positions.size and selected_positions.min() < 0: + raise ValueError("DataFrame has index values not found in the domain") + + if self.step_name is None: + raise ValueError("outside of a defined step") + + return selected_positions + + def normal_for_df( + self, + df: pd.DataFrame, + step_name: str, + mu: float | np.ndarray = 0, + sigma: float | np.ndarray = 1, + lognormal: bool = False, + size: int | tuple[int, ...] = 1, + ) -> np.ndarray: + """ + Draw normal (or lognormal) random variates for each row in *df*. + + Uses the vectorised PCG64 state array to generate standard-normal + samples, then affinely transforms them with *mu* and *sigma*. + Successive calls within the same step advance each row's stream + independently. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame whose index selects which agents receive draws. Columns + are ignored. + step_name : str + Name of the currently active step; checked for consistency. + mu : float or array-like, optional + Mean of the normal distribution. A scalar is broadcast across all + rows; an array must have one element per row in *df*. Defaults to + ``0``. + sigma : float or array-like, optional + Standard deviation of the normal distribution. Same broadcasting + rules as *mu*. Defaults to ``1``. + lognormal : bool, optional + When ``True``, return ``exp(normal_sample)`` so that the result + follows a lognormal distribution with the given underlying-normal + parameters. Defaults to ``False``. + size : int or tuple of int, optional + Number of draws per agent. A plain ``int`` *k* yields *k* draws + per row; a tuple gives the per-row shape. Defaults to ``1``. + + Returns + ------- + result : numpy.ndarray + Array of shape ``(len(df), *size)`` containing the random variates. + + Raises + ------ + AssertionError + If *step_name* is ``None`` or does not match the active step. + ValueError + If *df* fails the domain validation performed by + :meth:`_check_valid_df`. + """ + assert step_name is not None + assert step_name == self.step_name + selected_positions = self._check_valid_df(df) + self._reseed_step() + if size is None: + size = 1 + + mu = np.asarray(mu) + sigma = np.asarray(sigma) + result = self._fast_generator.vector_random_standard_normal( + self._state_array, selected_positions=selected_positions, shape=size + ) + result = result * sigma + mu + if lognormal: + result = np.exp(result) + return result + + def random_for_df( + self, + df: pd.DataFrame, + step_name: str, + n: int | tuple[int, ...] = 1, + ) -> np.ndarray: + """ + Draw standard-uniform random variates for each row in *df*. + + Uses the vectorised PCG64 state array to generate draws from U[0, 1). + Successive calls within the same step advance each row's stream + independently. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame whose index selects which agents receive draws. Columns + are ignored. + step_name : str + Name of the currently active step; checked for consistency. + n : int or tuple of int, optional + Number of draws per agent. A plain ``int`` *k* yields *k* draws + per row; a tuple gives the per-row shape. Defaults to ``1``. + + Returns + ------- + rands : numpy.ndarray + Array of shape ``(len(df), *n)`` with values in U[0, 1). + + Raises + ------ + AssertionError + If *step_name* is ``None`` or does not match the active step. + ValueError + If *df* fails the domain validation performed by + :meth:`_check_valid_df`. + """ + assert step_name is not None + assert step_name == self.step_name + selected_positions = self._check_valid_df(df) + self._reseed_step() + return self._fast_generator.vector_random_standard_uniform( + self._state_array, selected_positions=selected_positions, shape=n + ) + + def choice_for_df( + self, + df: pd.DataFrame, + step_name: str, + a: int | np.ndarray, + size: int | tuple[int, ...] = 1, + replace: bool = False, + ) -> np.ndarray: + """ + Apply numpy.random.choice once for each row in df + using the appropriate random channel for each row. + + Concatenate the the choice arrays for every row into a single 1-D ndarray + The resulting array will be of length: size * len(df.index) + This method is designed to support creation of a interaction_dataset + + The columns in df are ignored; the index name and values are used to determine + which random number sequence to to use. + + Parameters + ---------- + df : pandas.DataFrame + df with index name and values corresponding to a registered channel + step_name : str + current step name so we can update row_states seed info + a : 1-D array-like or int + If an ndarray, a random sample is generated from its elements. + If an int, the random sample is generated as if a was np.arange(a). + size : int or tuple of ints + Output shape (per df row). + replace : bool + Whether the sample is with or without replacement. + + Returns + ------- + choices : 1-D ndarray of length: prod(size) * len(df.index) + The generated random samples for each row concatenated into a + single (flat) array. + """ + assert step_name is not None + assert step_name == self.step_name + selected_positions = self._check_valid_df(df) + self._reseed_step() + + # total number of draws required per row + if isinstance(size, (int, np.integer)): + total = int(size) + else: + total = int(np.prod(size)) + + # population to sample from + if isinstance(a, (int, np.integer)): + a_arr = np.arange(int(a)) + else: + a_arr = np.asarray(a) + n_pop = len(a_arr) + + if replace: + # draw `total` uniforms per selected row and map to indices in a + rands = self._fast_generator.vector_random_standard_uniform( + self._state_array, + selected_positions=selected_positions, + shape=total, + ) + idx = (rands * n_pop).astype(np.int64) + # guard against the (vanishingly rare) case rands == 1.0 - epsilon edge + np.minimum(idx, n_pop - 1, out=idx) + sample = a_arr[idx].reshape(-1) + else: + if total > n_pop: + raise ValueError( + "Cannot take a larger sample than population when 'replace=False'" + ) + # draw n_pop uniforms per selected row; argsort produces a random + # permutation of [0, n_pop), and we take the first `total` entries. + rands = self._fast_generator.vector_random_standard_uniform( + self._state_array, + selected_positions=selected_positions, + shape=n_pop, + ) + order = np.argsort(rands, axis=1)[:, :total] + sample = a_arr[order].reshape(-1) + + return sample diff --git a/activitysim/core/fast_random/_fast_random.py b/activitysim/core/fast_random/_fast_random.py new file mode 100644 index 0000000000..91490a8ca6 --- /dev/null +++ b/activitysim/core/fast_random/_fast_random.py @@ -0,0 +1,1934 @@ +# The following C code was lovingly borrowed from numpy: +# https://github.com/numpy/numpy/blob/main/numpy/random/src/distributions/ziggurat_constants.h +# https://github.com/numpy/numpy/blob/main/numpy/random/src/distributions/distributions.c +# +# Copyright (c) 2005-2025, NumPy Developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of the NumPy Developers nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# +# static const uint64_t ki_double[] = { +# 0x000EF33D8025EF6AULL, 0x0000000000000000ULL, 0x000C08BE98FBC6A8ULL, +# 0x000DA354FABD8142ULL, 0x000E51F67EC1EEEAULL, 0x000EB255E9D3F77EULL, +# 0x000EEF4B817ECAB9ULL, 0x000F19470AFA44AAULL, 0x000F37ED61FFCB18ULL, +# 0x000F4F469561255CULL, 0x000F61A5E41BA396ULL, 0x000F707A755396A4ULL, +# 0x000F7CB2EC28449AULL, 0x000F86F10C6357D3ULL, 0x000F8FA6578325DEULL, +# 0x000F9724C74DD0DAULL, 0x000F9DA907DBF509ULL, 0x000FA360F581FA74ULL, +# 0x000FA86FDE5B4BF8ULL, 0x000FACF160D354DCULL, 0x000FB0FB6718B90FULL, +# 0x000FB49F8D5374C6ULL, 0x000FB7EC2366FE77ULL, 0x000FBAECE9A1E50EULL, +# 0x000FBDAB9D040BEDULL, 0x000FC03060FF6C57ULL, 0x000FC2821037A248ULL, +# 0x000FC4A67AE25BD1ULL, 0x000FC6A2977AEE31ULL, 0x000FC87AA92896A4ULL, +# 0x000FCA325E4BDE85ULL, 0x000FCBCCE902231AULL, 0x000FCD4D12F839C4ULL, +# 0x000FCEB54D8FEC99ULL, 0x000FD007BF1DC930ULL, 0x000FD1464DD6C4E6ULL, +# 0x000FD272A8E2F450ULL, 0x000FD38E4FF0C91EULL, 0x000FD49A9990B478ULL, +# 0x000FD598B8920F53ULL, 0x000FD689C08E99ECULL, 0x000FD76EA9C8E832ULL, +# 0x000FD848547B08E8ULL, 0x000FD9178BAD2C8CULL, 0x000FD9DD07A7ADD2ULL, +# 0x000FDA9970105E8CULL, 0x000FDB4D5DC02E20ULL, 0x000FDBF95C5BFCD0ULL, +# 0x000FDC9DEBB99A7DULL, 0x000FDD3B8118729DULL, 0x000FDDD288342F90ULL, +# 0x000FDE6364369F64ULL, 0x000FDEEE708D514EULL, 0x000FDF7401A6B42EULL, +# 0x000FDFF46599ED40ULL, 0x000FE06FE4BC24F2ULL, 0x000FE0E6C225A258ULL, +# 0x000FE1593C28B84CULL, 0x000FE1C78CBC3F99ULL, 0x000FE231E9DB1CAAULL, +# 0x000FE29885DA1B91ULL, 0x000FE2FB8FB54186ULL, 0x000FE35B33558D4AULL, +# 0x000FE3B799D0002AULL, 0x000FE410E99EAD7FULL, 0x000FE46746D47734ULL, +# 0x000FE4BAD34C095CULL, 0x000FE50BAED29524ULL, 0x000FE559F74EBC78ULL, +# 0x000FE5A5C8E41212ULL, 0x000FE5EF3E138689ULL, 0x000FE6366FD91078ULL, +# 0x000FE67B75C6D578ULL, 0x000FE6BE661E11AAULL, 0x000FE6FF55E5F4F2ULL, +# 0x000FE73E5900A702ULL, 0x000FE77B823E9E39ULL, 0x000FE7B6E37070A2ULL, +# 0x000FE7F08D774243ULL, 0x000FE8289053F08CULL, 0x000FE85EFB35173AULL, +# 0x000FE893DC840864ULL, 0x000FE8C741F0CEBCULL, 0x000FE8F9387D4EF6ULL, +# 0x000FE929CC879B1DULL, 0x000FE95909D388EAULL, 0x000FE986FB939AA2ULL, +# 0x000FE9B3AC714866ULL, 0x000FE9DF2694B6D5ULL, 0x000FEA0973ABE67CULL, +# 0x000FEA329CF166A4ULL, 0x000FEA5AAB32952CULL, 0x000FEA81A6D5741AULL, +# 0x000FEAA797DE1CF0ULL, 0x000FEACC85F3D920ULL, 0x000FEAF07865E63CULL, +# 0x000FEB13762FEC13ULL, 0x000FEB3585FE2A4AULL, 0x000FEB56AE3162B4ULL, +# 0x000FEB76F4E284FAULL, 0x000FEB965FE62014ULL, 0x000FEBB4F4CF9D7CULL, +# 0x000FEBD2B8F449D0ULL, 0x000FEBEFB16E2E3EULL, 0x000FEC0BE31EBDE8ULL, +# 0x000FEC2752B15A15ULL, 0x000FEC42049DAFD3ULL, 0x000FEC5BFD29F196ULL, +# 0x000FEC75406CEEF4ULL, 0x000FEC8DD2500CB4ULL, 0x000FECA5B6911F12ULL, +# 0x000FECBCF0C427FEULL, 0x000FECD38454FB15ULL, 0x000FECE97488C8B3ULL, +# 0x000FECFEC47F91B7ULL, 0x000FED1377358528ULL, 0x000FED278F844903ULL, +# 0x000FED3B10242F4CULL, 0x000FED4DFBAD586EULL, 0x000FED605498C3DDULL, +# 0x000FED721D414FE8ULL, 0x000FED8357E4A982ULL, 0x000FED9406A42CC8ULL, +# 0x000FEDA42B85B704ULL, 0x000FEDB3C8746AB4ULL, 0x000FEDC2DF416652ULL, +# 0x000FEDD171A46E52ULL, 0x000FEDDF813C8AD3ULL, 0x000FEDED0F909980ULL, +# 0x000FEDFA1E0FD414ULL, 0x000FEE06AE124BC4ULL, 0x000FEE12C0D95A06ULL, +# 0x000FEE1E579006E0ULL, 0x000FEE29734B6524ULL, 0x000FEE34150AE4BCULL, +# 0x000FEE3E3DB89B3CULL, 0x000FEE47EE2982F4ULL, 0x000FEE51271DB086ULL, +# 0x000FEE59E9407F41ULL, 0x000FEE623528B42EULL, 0x000FEE6A0B5897F1ULL, +# 0x000FEE716C3E077AULL, 0x000FEE7858327B82ULL, 0x000FEE7ECF7B06BAULL, +# 0x000FEE84D2484AB2ULL, 0x000FEE8A60B66343ULL, 0x000FEE8F7ACCC851ULL, +# 0x000FEE94207E25DAULL, 0x000FEE9851A829EAULL, 0x000FEE9C0E13485CULL, +# 0x000FEE9F557273F4ULL, 0x000FEEA22762CCAEULL, 0x000FEEA4836B42ACULL, +# 0x000FEEA668FC2D71ULL, 0x000FEEA7D76ED6FAULL, 0x000FEEA8CE04FA0AULL, +# 0x000FEEA94BE8333BULL, 0x000FEEA950296410ULL, 0x000FEEA8D9C0075EULL, +# 0x000FEEA7E7897654ULL, 0x000FEEA678481D24ULL, 0x000FEEA48AA29E83ULL, +# 0x000FEEA21D22E4DAULL, 0x000FEE9F2E352024ULL, 0x000FEE9BBC26AF2EULL, +# 0x000FEE97C524F2E4ULL, 0x000FEE93473C0A3AULL, 0x000FEE8E40557516ULL, +# 0x000FEE88AE369C7AULL, 0x000FEE828E7F3DFDULL, 0x000FEE7BDEA7B888ULL, +# 0x000FEE749BFF37FFULL, 0x000FEE6CC3A9BD5EULL, 0x000FEE64529E007EULL, +# 0x000FEE5B45A32888ULL, 0x000FEE51994E57B6ULL, 0x000FEE474A0006CFULL, +# 0x000FEE3C53E12C50ULL, 0x000FEE30B2E02AD8ULL, 0x000FEE2462AD8205ULL, +# 0x000FEE175EB83C5AULL, 0x000FEE09A22A1447ULL, 0x000FEDFB27E349CCULL, +# 0x000FEDEBEA76216CULL, 0x000FEDDBE422047EULL, 0x000FEDCB0ECE39D3ULL, +# 0x000FEDB964042CF4ULL, 0x000FEDA6DCE938C9ULL, 0x000FED937237E98DULL, +# 0x000FED7F1C38A836ULL, 0x000FED69D2B9C02BULL, 0x000FED538D06AE00ULL, +# 0x000FED3C41DEA422ULL, 0x000FED23E76A2FD8ULL, 0x000FED0A732FE644ULL, +# 0x000FECEFDA07FE34ULL, 0x000FECD4100EB7B8ULL, 0x000FECB708956EB4ULL, +# 0x000FEC98B61230C1ULL, 0x000FEC790A0DA978ULL, 0x000FEC57F50F31FEULL, +# 0x000FEC356686C962ULL, 0x000FEC114CB4B335ULL, 0x000FEBEB948E6FD0ULL, +# 0x000FEBC429A0B692ULL, 0x000FEB9AF5EE0CDCULL, 0x000FEB6FE1C98542ULL, +# 0x000FEB42D3AD1F9EULL, 0x000FEB13B00B2D4BULL, 0x000FEAE2591A02E9ULL, +# 0x000FEAAEAE992257ULL, 0x000FEA788D8EE326ULL, 0x000FEA3FCFFD73E5ULL, +# 0x000FEA044C8DD9F6ULL, 0x000FE9C5D62F563BULL, 0x000FE9843BA947A4ULL, +# 0x000FE93F471D4728ULL, 0x000FE8F6BD76C5D6ULL, 0x000FE8AA5DC4E8E6ULL, +# 0x000FE859E07AB1EAULL, 0x000FE804F690A940ULL, 0x000FE7AB488233C0ULL, +# 0x000FE74C751F6AA5ULL, 0x000FE6E8102AA202ULL, 0x000FE67DA0B6ABD8ULL, +# 0x000FE60C9F38307EULL, 0x000FE5947338F742ULL, 0x000FE51470977280ULL, +# 0x000FE48BD436F458ULL, 0x000FE3F9BFFD1E37ULL, 0x000FE35D35EEB19CULL, +# 0x000FE2B5122FE4FEULL, 0x000FE20003995557ULL, 0x000FE13C82788314ULL, +# 0x000FE068C4EE67B0ULL, 0x000FDF82B02B71AAULL, 0x000FDE87C57EFEAAULL, +# 0x000FDD7509C63BFDULL, 0x000FDC46E529BF13ULL, 0x000FDAF8F82E0282ULL, +# 0x000FD985E1B2BA75ULL, 0x000FD7E6EF48CF04ULL, 0x000FD613ADBD650BULL, +# 0x000FD40149E2F012ULL, 0x000FD1A1A7B4C7ACULL, 0x000FCEE204761F9EULL, +# 0x000FCBA8D85E11B2ULL, 0x000FC7D26ECD2D22ULL, 0x000FC32B2F1E22EDULL, +# 0x000FBD6581C0B83AULL, 0x000FB606C4005434ULL, 0x000FAC40582A2874ULL, +# 0x000F9E971E014598ULL, 0x000F89FA48A41DFCULL, 0x000F66C5F7F0302CULL, +# 0x000F1A5A4B331C4AULL}; +# +# static const double wi_double[] = { +# 8.68362706080130616677e-16, 4.77933017572773682428e-17, +# 6.35435241740526230246e-17, 7.45487048124769627714e-17, +# 8.32936681579309972857e-17, 9.06806040505948228243e-17, +# 9.71486007656776183958e-17, 1.02947503142410192108e-16, +# 1.08234302884476839838e-16, 1.13114701961090307945e-16, +# 1.17663594570229211411e-16, 1.21936172787143633280e-16, +# 1.25974399146370927864e-16, 1.29810998862640315416e-16, +# 1.33472037368241227547e-16, 1.36978648425712032797e-16, +# 1.40348230012423820659e-16, 1.43595294520569430270e-16, +# 1.46732087423644219083e-16, 1.49769046683910367425e-16, +# 1.52715150035961979750e-16, 1.55578181694607639484e-16, +# 1.58364940092908853989e-16, 1.61081401752749279325e-16, +# 1.63732852039698532012e-16, 1.66323990584208352778e-16, +# 1.68859017086765964015e-16, 1.71341701765596607184e-16, +# 1.73775443658648593310e-16, 1.76163319230009959832e-16, +# 1.78508123169767272927e-16, 1.80812402857991522674e-16, +# 1.83078487648267501776e-16, 1.85308513886180189386e-16, +# 1.87504446393738816849e-16, 1.89668097007747596212e-16, +# 1.91801140648386198029e-16, 1.93905129306251037069e-16, +# 1.95981504266288244037e-16, 1.98031606831281739736e-16, +# 2.00056687762733300198e-16, 2.02057915620716538808e-16, +# 2.04036384154802118313e-16, 2.05993118874037063144e-16, +# 2.07929082904140197311e-16, 2.09845182223703516690e-16, +# 2.11742270357603418769e-16, 2.13621152594498681022e-16, +# 2.15482589785814580926e-16, 2.17327301775643674990e-16, +# 2.19155970504272708519e-16, 2.20969242822353175995e-16, +# 2.22767733047895534948e-16, 2.24552025294143552381e-16, +# 2.26322675592856786566e-16, 2.28080213834501706782e-16, +# 2.29825145544246839061e-16, 2.31557953510408037008e-16, +# 2.33279099280043561128e-16, 2.34989024534709550938e-16, +# 2.36688152357916037468e-16, 2.38376888404542434981e-16, +# 2.40055621981350627349e-16, 2.41724727046750252175e-16, +# 2.43384563137110286400e-16, 2.45035476226149539878e-16, +# 2.46677799523270498158e-16, 2.48311854216108767769e-16, +# 2.49937950162045242375e-16, 2.51556386532965786439e-16, +# 2.53167452417135826983e-16, 2.54771427381694417303e-16, +# 2.56368581998939683749e-16, 2.57959178339286723500e-16, +# 2.59543470433517070146e-16, 2.61121704706701939097e-16, +# 2.62694120385972564623e-16, 2.64260949884118951286e-16, +# 2.65822419160830680292e-16, 2.67378748063236329361e-16, +# 2.68930150647261591777e-16, 2.70476835481199518794e-16, +# 2.72019005932773206655e-16, 2.73556860440867908686e-16, +# 2.75090592773016664571e-16, 2.76620392269639032183e-16, +# 2.78146444075954410103e-16, 2.79668929362423005309e-16, +# 2.81188025534502074329e-16, 2.82703906432447923059e-16, +# 2.84216742521840606520e-16, 2.85726701075460149289e-16, +# 2.87233946347097994381e-16, 2.88738639737848191815e-16, +# 2.90240939955384233230e-16, 2.91741003166694553259e-16, +# 2.93238983144718163965e-16, 2.94735031409293489611e-16, +# 2.96229297362806647792e-16, 2.97721928420902891115e-16, +# 2.99213070138601307081e-16, 3.00702866332133102993e-16, +# 3.02191459196806151971e-16, 3.03678989421180184427e-16, +# 3.05165596297821922381e-16, 3.06651417830895451744e-16, +# 3.08136590840829717032e-16, 3.09621251066292253306e-16, +# 3.11105533263689296831e-16, 3.12589571304399892784e-16, +# 3.14073498269944617203e-16, 3.15557446545280064031e-16, +# 3.17041547910402852545e-16, 3.18525933630440648871e-16, +# 3.20010734544401137886e-16, 3.21496081152744704901e-16, +# 3.22982103703941557538e-16, 3.24468932280169778077e-16, +# 3.25956696882307838340e-16, 3.27445527514370671802e-16, +# 3.28935554267536967851e-16, 3.30426907403912838589e-16, +# 3.31919717440175233652e-16, 3.33414115231237245918e-16, +# 3.34910232054077845412e-16, 3.36408199691876507948e-16, +# 3.37908150518594979994e-16, 3.39410217584148914282e-16, +# 3.40914534700312603713e-16, 3.42421236527501816058e-16, +# 3.43930458662583133920e-16, 3.45442337727858401604e-16, +# 3.46957011461378353333e-16, 3.48474618808741370700e-16, +# 3.49995300016538099813e-16, 3.51519196727607440975e-16, +# 3.53046452078274009054e-16, 3.54577210797743572160e-16, +# 3.56111619309838843415e-16, 3.57649825837265051035e-16, +# 3.59191980508602994994e-16, 3.60738235468235137839e-16, +# 3.62288744989419151904e-16, 3.63843665590734438546e-16, +# 3.65403156156136995766e-16, 3.66967378058870090021e-16, +# 3.68536495289491401456e-16, 3.70110674588289834952e-16, +# 3.71690085582382297792e-16, 3.73274900927794352614e-16, +# 3.74865296456848868882e-16, 3.76461451331202869131e-16, +# 3.78063548200896037651e-16, 3.79671773369794425924e-16, +# 3.81286316967837738238e-16, 3.82907373130524317507e-16, +# 3.84535140186095955858e-16, 3.86169820850914927119e-16, +# 3.87811622433558721164e-16, 3.89460757048192620674e-16, +# 3.91117441837820542060e-16, 3.92781899208054153270e-16, +# 3.94454357072087711446e-16, 3.96135049107613542983e-16, +# 3.97824215026468259474e-16, 3.99522100857856502444e-16, +# 4.01228959246062907451e-16, 4.02945049763632792393e-16, +# 4.04670639241074995115e-16, 4.06406002114225038723e-16, +# 4.08151420790493873480e-16, 4.09907186035326643447e-16, +# 4.11673597380302570170e-16, 4.13450963554423599878e-16, +# 4.15239602940268833891e-16, 4.17039844056831587498e-16, +# 4.18852026071011229572e-16, 4.20676499339901510978e-16, +# 4.22513625986204937320e-16, 4.24363780509307796137e-16, +# 4.26227350434779809917e-16, 4.28104737005311666397e-16, +# 4.29996355916383230161e-16, 4.31902638100262944617e-16, +# 4.33824030562279080411e-16, 4.35760997273684900553e-16, +# 4.37714020125858747008e-16, 4.39683599951052137423e-16, +# 4.41670257615420348435e-16, 4.43674535190656726604e-16, +# 4.45696997211204306674e-16, 4.47738232024753387312e-16, +# 4.49798853244554968009e-16, 4.51879501313005876278e-16, +# 4.53980845187003400947e-16, 4.56103584156742206384e-16, +# 4.58248449810956667052e-16, 4.60416208163115281428e-16, +# 4.62607661954784567754e-16, 4.64823653154320737780e-16, +# 4.67065065671263059081e-16, 4.69332828309332890697e-16, +# 4.71627917983835129766e-16, 4.73951363232586715165e-16, +# 4.76304248053313737663e-16, 4.78687716104872284247e-16, +# 4.81102975314741720538e-16, 4.83551302941152515162e-16, +# 4.86034051145081195402e-16, 4.88552653135360343280e-16, +# 4.91108629959526955862e-16, 4.93703598024033454728e-16, +# 4.96339277440398725619e-16, 4.99017501309182245754e-16, +# 5.01740226071808946011e-16, 5.04509543081872748637e-16, +# 5.07327691573354207058e-16, 5.10197073234156184149e-16, +# 5.13120268630678373200e-16, 5.16100055774322824569e-16, +# 5.19139431175769859873e-16, 5.22241633800023428760e-16, +# 5.25410172417759732697e-16, 5.28648856950494511482e-16, +# 5.31961834533840037535e-16, 5.35353631181649688145e-16, +# 5.38829200133405320160e-16, 5.42393978220171234073e-16, +# 5.46053951907478041166e-16, 5.49815735089281410703e-16, +# 5.53686661246787600374e-16, 5.57674893292657647836e-16, +# 5.61789555355541665830e-16, 5.66040892008242216739e-16, +# 5.70440462129138908417e-16, 5.75001376891989523684e-16, +# 5.79738594572459365014e-16, 5.84669289345547900201e-16, +# 5.89813317647789942685e-16, 5.95193814964144415532e-16, +# 6.00837969627190832234e-16, 6.06778040933344851394e-16, +# 6.13052720872528159123e-16, 6.19708989458162555387e-16, +# 6.26804696330128439415e-16, 6.34412240712750598627e-16, +# 6.42623965954805540945e-16, 6.51560331734499356881e-16, +# 6.61382788509766415145e-16, 6.72315046250558662913e-16, +# 6.84680341756425875856e-16, 6.98971833638761995415e-16, +# 7.15999493483066421560e-16, 7.37242430179879890722e-16, +# 7.65893637080557275482e-16, 8.11384933765648418565e-16}; +# +# static const double fi_double[] = { +# 1.00000000000000000000e+00, 9.77101701267671596263e-01, +# 9.59879091800106665211e-01, 9.45198953442299649730e-01, +# 9.32060075959230460718e-01, 9.19991505039347012840e-01, +# 9.08726440052130879366e-01, 8.98095921898343418910e-01, +# 8.87984660755833377088e-01, 8.78309655808917399966e-01, +# 8.69008688036857046555e-01, 8.60033621196331532488e-01, +# 8.51346258458677951353e-01, 8.42915653112204177333e-01, +# 8.34716292986883434679e-01, 8.26726833946221373317e-01, +# 8.18929191603702366642e-01, 8.11307874312656274185e-01, +# 8.03849483170964274059e-01, 7.96542330422958966274e-01, +# 7.89376143566024590648e-01, 7.82341832654802504798e-01, +# 7.75431304981187174974e-01, 7.68637315798486264740e-01, +# 7.61953346836795386565e-01, 7.55373506507096115214e-01, +# 7.48892447219156820459e-01, 7.42505296340151055290e-01, +# 7.36207598126862650112e-01, 7.29995264561476231435e-01, +# 7.23864533468630222401e-01, 7.17811932630721960535e-01, +# 7.11834248878248421200e-01, 7.05928501332754310127e-01, +# 7.00091918136511615067e-01, 6.94321916126116711609e-01, +# 6.88616083004671808432e-01, 6.82972161644994857355e-01, +# 6.77388036218773526009e-01, 6.71861719897082099173e-01, +# 6.66391343908750100056e-01, 6.60975147776663107813e-01, +# 6.55611470579697264149e-01, 6.50298743110816701574e-01, +# 6.45035480820822293424e-01, 6.39820277453056585060e-01, +# 6.34651799287623608059e-01, 6.29528779924836690007e-01, +# 6.24450015547026504592e-01, 6.19414360605834324325e-01, +# 6.14420723888913888899e-01, 6.09468064925773433949e-01, +# 6.04555390697467776029e-01, 5.99681752619125263415e-01, +# 5.94846243767987448159e-01, 5.90047996332826008015e-01, +# 5.85286179263371453274e-01, 5.80559996100790898232e-01, +# 5.75868682972353718164e-01, 5.71211506735253227163e-01, +# 5.66587763256164445025e-01, 5.61996775814524340831e-01, +# 5.57437893618765945014e-01, 5.52910490425832290562e-01, +# 5.48413963255265812791e-01, 5.43947731190026262382e-01, +# 5.39511234256952132426e-01, 5.35103932380457614215e-01, +# 5.30725304403662057062e-01, 5.26374847171684479008e-01, +# 5.22052074672321841931e-01, 5.17756517229756352272e-01, +# 5.13487720747326958914e-01, 5.09245245995747941592e-01, +# 5.05028667943468123624e-01, 5.00837575126148681903e-01, +# 4.96671569052489714213e-01, 4.92530263643868537748e-01, +# 4.88413284705458028423e-01, 4.84320269426683325253e-01, +# 4.80250865909046753544e-01, 4.76204732719505863248e-01, +# 4.72181538467730199660e-01, 4.68180961405693596422e-01, +# 4.64202689048174355069e-01, 4.60246417812842867345e-01, +# 4.56311852678716434184e-01, 4.52398706861848520777e-01, +# 4.48506701507203064949e-01, 4.44635565395739396077e-01, +# 4.40785034665803987508e-01, 4.36954852547985550526e-01, +# 4.33144769112652261445e-01, 4.29354541029441427735e-01, +# 4.25583931338021970170e-01, 4.21832709229495894654e-01, +# 4.18100649837848226120e-01, 4.14387534040891125642e-01, +# 4.10693148270188157500e-01, 4.07017284329473372217e-01, +# 4.03359739221114510510e-01, 3.99720314980197222177e-01, +# 3.96098818515832451492e-01, 3.92495061459315619512e-01, +# 3.88908860018788715696e-01, 3.85340034840077283462e-01, +# 3.81788410873393657674e-01, 3.78253817245619183840e-01, +# 3.74736087137891138443e-01, 3.71235057668239498696e-01, +# 3.67750569779032587814e-01, 3.64282468129004055601e-01, +# 3.60830600989648031529e-01, 3.57394820145780500731e-01, +# 3.53974980800076777232e-01, 3.50570941481406106455e-01, +# 3.47182563956793643900e-01, 3.43809713146850715049e-01, +# 3.40452257044521866547e-01, 3.37110066637006045021e-01, +# 3.33783015830718454708e-01, 3.30470981379163586400e-01, +# 3.27173842813601400970e-01, 3.23891482376391093290e-01, +# 3.20623784956905355514e-01, 3.17370638029913609834e-01, +# 3.14131931596337177215e-01, 3.10907558126286509559e-01, +# 3.07697412504292056035e-01, 3.04501391976649993243e-01, +# 3.01319396100803049698e-01, 2.98151326696685481377e-01, +# 2.94997087799961810184e-01, 2.91856585617095209972e-01, +# 2.88729728482182923521e-01, 2.85616426815501756042e-01, +# 2.82516593083707578948e-01, 2.79430141761637940157e-01, +# 2.76356989295668320494e-01, 2.73297054068577072172e-01, +# 2.70250256365875463072e-01, 2.67216518343561471038e-01, +# 2.64195763997261190426e-01, 2.61187919132721213522e-01, +# 2.58192911337619235290e-01, 2.55210669954661961700e-01, +# 2.52241126055942177508e-01, 2.49284212418528522415e-01, +# 2.46339863501263828249e-01, 2.43408015422750312329e-01, +# 2.40488605940500588254e-01, 2.37581574431238090606e-01, +# 2.34686861872330010392e-01, 2.31804410824338724684e-01, +# 2.28934165414680340644e-01, 2.26076071322380278694e-01, +# 2.23230075763917484855e-01, 2.20396127480151998723e-01, +# 2.17574176724331130872e-01, 2.14764175251173583536e-01, +# 2.11966076307030182324e-01, 2.09179834621125076977e-01, +# 2.06405406397880797353e-01, 2.03642749310334908452e-01, +# 2.00891822494656591136e-01, 1.98152586545775138971e-01, +# 1.95425003514134304483e-01, 1.92709036903589175926e-01, +# 1.90004651670464985713e-01, 1.87311814223800304768e-01, +# 1.84630492426799269756e-01, 1.81960655599522513892e-01, +# 1.79302274522847582272e-01, 1.76655321443734858455e-01, +# 1.74019770081838553999e-01, 1.71395595637505754327e-01, +# 1.68782774801211288285e-01, 1.66181285764481906364e-01, +# 1.63591108232365584074e-01, 1.61012223437511009516e-01, +# 1.58444614155924284882e-01, 1.55888264724479197465e-01, +# 1.53343161060262855866e-01, 1.50809290681845675763e-01, +# 1.48286642732574552861e-01, 1.45775208005994028060e-01, +# 1.43274978973513461566e-01, 1.40785949814444699690e-01, +# 1.38308116448550733057e-01, 1.35841476571253755301e-01, +# 1.33386029691669155683e-01, 1.30941777173644358090e-01, +# 1.28508722279999570981e-01, 1.26086870220185887081e-01, +# 1.23676228201596571932e-01, 1.21276805484790306533e-01, +# 1.18888613442910059947e-01, 1.16511665625610869035e-01, +# 1.14145977827838487895e-01, 1.11791568163838089811e-01, +# 1.09448457146811797824e-01, 1.07116667774683801961e-01, +# 1.04796225622487068629e-01, 1.02487158941935246892e-01, +# 1.00189498768810017482e-01, 9.79032790388624646338e-02, +# 9.56285367130089991594e-02, 9.33653119126910124859e-02, +# 9.11136480663737591268e-02, 8.88735920682758862021e-02, +# 8.66451944505580717859e-02, 8.44285095703534715916e-02, +# 8.22235958132029043366e-02, 8.00305158146630696292e-02, +# 7.78493367020961224423e-02, 7.56801303589271778804e-02, +# 7.35229737139813238622e-02, 7.13779490588904025339e-02, +# 6.92451443970067553879e-02, 6.71246538277884968737e-02, +# 6.50165779712428976156e-02, 6.29210244377581412456e-02, +# 6.08381083495398780614e-02, 5.87679529209337372930e-02, +# 5.67106901062029017391e-02, 5.46664613248889208474e-02, +# 5.26354182767921896513e-02, 5.06177238609477817000e-02, +# 4.86135532158685421122e-02, 4.66230949019303814174e-02, +# 4.46465522512944634759e-02, 4.26841449164744590750e-02, +# 4.07361106559409394401e-02, 3.88027074045261474722e-02, +# 3.68842156885673053135e-02, 3.49809414617161251737e-02, +# 3.30932194585785779961e-02, 3.12214171919203004046e-02, +# 2.93659397581333588001e-02, 2.75272356696031131329e-02, +# 2.57058040085489103443e-02, 2.39022033057958785407e-02, +# 2.21170627073088502113e-02, 2.03510962300445102935e-02, +# 1.86051212757246224594e-02, 1.68800831525431419000e-02, +# 1.51770883079353092332e-02, 1.34974506017398673818e-02, +# 1.18427578579078790488e-02, 1.02149714397014590439e-02, +# 8.61658276939872638800e-03, 7.05087547137322242369e-03, +# 5.52240329925099155545e-03, 4.03797259336302356153e-03, +# 2.60907274610215926189e-03, 1.26028593049859797236e-03}; +# +# static const uint32_t ki_float[] = { +# 0x007799ECUL, 0x00000000UL, 0x006045F5UL, 0x006D1AA8UL, 0x00728FB4UL, +# 0x007592AFUL, 0x00777A5CUL, 0x0078CA38UL, 0x0079BF6BUL, 0x007A7A35UL, +# 0x007B0D2FUL, 0x007B83D4UL, 0x007BE597UL, 0x007C3788UL, 0x007C7D33UL, +# 0x007CB926UL, 0x007CED48UL, 0x007D1B08UL, 0x007D437FUL, 0x007D678BUL, +# 0x007D87DBUL, 0x007DA4FCUL, 0x007DBF61UL, 0x007DD767UL, 0x007DED5DUL, +# 0x007E0183UL, 0x007E1411UL, 0x007E2534UL, 0x007E3515UL, 0x007E43D5UL, +# 0x007E5193UL, 0x007E5E67UL, 0x007E6A69UL, 0x007E75AAUL, 0x007E803EUL, +# 0x007E8A32UL, 0x007E9395UL, 0x007E9C72UL, 0x007EA4D5UL, 0x007EACC6UL, +# 0x007EB44EUL, 0x007EBB75UL, 0x007EC243UL, 0x007EC8BCUL, 0x007ECEE8UL, +# 0x007ED4CCUL, 0x007EDA6BUL, 0x007EDFCBUL, 0x007EE4EFUL, 0x007EE9DCUL, +# 0x007EEE94UL, 0x007EF31BUL, 0x007EF774UL, 0x007EFBA0UL, 0x007EFFA3UL, +# 0x007F037FUL, 0x007F0736UL, 0x007F0ACAUL, 0x007F0E3CUL, 0x007F118FUL, +# 0x007F14C4UL, 0x007F17DCUL, 0x007F1ADAUL, 0x007F1DBDUL, 0x007F2087UL, +# 0x007F233AUL, 0x007F25D7UL, 0x007F285DUL, 0x007F2AD0UL, 0x007F2D2EUL, +# 0x007F2F7AUL, 0x007F31B3UL, 0x007F33DCUL, 0x007F35F3UL, 0x007F37FBUL, +# 0x007F39F3UL, 0x007F3BDCUL, 0x007F3DB7UL, 0x007F3F84UL, 0x007F4145UL, +# 0x007F42F8UL, 0x007F449FUL, 0x007F463AUL, 0x007F47CAUL, 0x007F494EUL, +# 0x007F4AC8UL, 0x007F4C38UL, 0x007F4D9DUL, 0x007F4EF9UL, 0x007F504CUL, +# 0x007F5195UL, 0x007F52D5UL, 0x007F540DUL, 0x007F553DUL, 0x007F5664UL, +# 0x007F5784UL, 0x007F589CUL, 0x007F59ACUL, 0x007F5AB5UL, 0x007F5BB8UL, +# 0x007F5CB3UL, 0x007F5DA8UL, 0x007F5E96UL, 0x007F5F7EUL, 0x007F605FUL, +# 0x007F613BUL, 0x007F6210UL, 0x007F62E0UL, 0x007F63AAUL, 0x007F646FUL, +# 0x007F652EUL, 0x007F65E8UL, 0x007F669CUL, 0x007F674CUL, 0x007F67F6UL, +# 0x007F689CUL, 0x007F693CUL, 0x007F69D9UL, 0x007F6A70UL, 0x007F6B03UL, +# 0x007F6B91UL, 0x007F6C1BUL, 0x007F6CA0UL, 0x007F6D21UL, 0x007F6D9EUL, +# 0x007F6E17UL, 0x007F6E8CUL, 0x007F6EFCUL, 0x007F6F68UL, 0x007F6FD1UL, +# 0x007F7035UL, 0x007F7096UL, 0x007F70F3UL, 0x007F714CUL, 0x007F71A1UL, +# 0x007F71F2UL, 0x007F723FUL, 0x007F7289UL, 0x007F72CFUL, 0x007F7312UL, +# 0x007F7350UL, 0x007F738BUL, 0x007F73C3UL, 0x007F73F6UL, 0x007F7427UL, +# 0x007F7453UL, 0x007F747CUL, 0x007F74A1UL, 0x007F74C3UL, 0x007F74E0UL, +# 0x007F74FBUL, 0x007F7511UL, 0x007F7524UL, 0x007F7533UL, 0x007F753FUL, +# 0x007F7546UL, 0x007F754AUL, 0x007F754BUL, 0x007F7547UL, 0x007F753FUL, +# 0x007F7534UL, 0x007F7524UL, 0x007F7511UL, 0x007F74F9UL, 0x007F74DEUL, +# 0x007F74BEUL, 0x007F749AUL, 0x007F7472UL, 0x007F7445UL, 0x007F7414UL, +# 0x007F73DFUL, 0x007F73A5UL, 0x007F7366UL, 0x007F7323UL, 0x007F72DAUL, +# 0x007F728DUL, 0x007F723AUL, 0x007F71E3UL, 0x007F7186UL, 0x007F7123UL, +# 0x007F70BBUL, 0x007F704DUL, 0x007F6FD9UL, 0x007F6F5FUL, 0x007F6EDFUL, +# 0x007F6E58UL, 0x007F6DCBUL, 0x007F6D37UL, 0x007F6C9CUL, 0x007F6BF9UL, +# 0x007F6B4FUL, 0x007F6A9CUL, 0x007F69E2UL, 0x007F691FUL, 0x007F6854UL, +# 0x007F677FUL, 0x007F66A1UL, 0x007F65B8UL, 0x007F64C6UL, 0x007F63C8UL, +# 0x007F62C0UL, 0x007F61ABUL, 0x007F608AUL, 0x007F5F5DUL, 0x007F5E21UL, +# 0x007F5CD8UL, 0x007F5B7FUL, 0x007F5A17UL, 0x007F589EUL, 0x007F5713UL, +# 0x007F5575UL, 0x007F53C4UL, 0x007F51FEUL, 0x007F5022UL, 0x007F4E2FUL, +# 0x007F4C22UL, 0x007F49FAUL, 0x007F47B6UL, 0x007F4553UL, 0x007F42CFUL, +# 0x007F4028UL, 0x007F3D5AUL, 0x007F3A64UL, 0x007F3741UL, 0x007F33EDUL, +# 0x007F3065UL, 0x007F2CA4UL, 0x007F28A4UL, 0x007F245FUL, 0x007F1FCEUL, +# 0x007F1AEAUL, 0x007F15A9UL, 0x007F1000UL, 0x007F09E4UL, 0x007F0346UL, +# 0x007EFC16UL, 0x007EF43EUL, 0x007EEBA8UL, 0x007EE237UL, 0x007ED7C8UL, +# 0x007ECC2FUL, 0x007EBF37UL, 0x007EB09DUL, 0x007EA00AUL, 0x007E8D0DUL, +# 0x007E7710UL, 0x007E5D47UL, 0x007E3E93UL, 0x007E1959UL, 0x007DEB2CUL, +# 0x007DB036UL, 0x007D6203UL, 0x007CF4B9UL, 0x007C4FD2UL, 0x007B3630UL, +# 0x0078D2D2UL}; +# +# static const float wi_float[] = { +# 4.66198677960027669255e-07f, 2.56588335019207033255e-08f, +# 3.41146697750176784592e-08f, 4.00230311410932959821e-08f, +# 4.47179475877737745459e-08f, 4.86837785973537366722e-08f, +# 5.21562578925932412861e-08f, 5.52695199001886257153e-08f, +# 5.81078488992733116465e-08f, 6.07279932024587421409e-08f, +# 6.31701613261172047795e-08f, 6.54639842900233842742e-08f, +# 6.76319905583641815324e-08f, 6.96917493470166688656e-08f, +# 7.16572544283857476692e-08f, 7.35398519048393832969e-08f, +# 7.53488822443557479279e-08f, 7.70921367281667127885e-08f, +# 7.87761895947956022626e-08f, 8.04066446825615346857e-08f, +# 8.19883218760237408659e-08f, 8.35254002936857088917e-08f, +# 8.50215298165053411740e-08f, 8.64799190652369040985e-08f, +# 8.79034055989140110861e-08f, 8.92945125124233511541e-08f, +# 9.06554945027956262312e-08f, 9.19883756905278607229e-08f, +# 9.32949809202232869780e-08f, 9.45769618559625849039e-08f, +# 9.58358188855612866442e-08f, 9.70729196232813152662e-08f, +# 9.82895146313061088986e-08f, 9.94867508514382224721e-08f, +# 1.00665683139461669691e-07f, 1.01827284217853923044e-07f, +# 1.02972453302539369464e-07f, 1.04102023612124921572e-07f, +# 1.05216768930574060431e-07f, 1.06317409364335657741e-07f, +# 1.07404616410877866490e-07f, 1.08479017436113134283e-07f, +# 1.09541199642370962438e-07f, 1.10591713595628691212e-07f, +# 1.11631076370069356306e-07f, 1.12659774359245895023e-07f, +# 1.13678265795837113569e-07f, 1.14686983015899673063e-07f, +# 1.15686334498432158725e-07f, 1.16676706706789039179e-07f, +# 1.17658465754873988919e-07f, 1.18631958917986203582e-07f, +# 1.19597516005596215528e-07f, 1.20555450611113917226e-07f, +# 1.21506061251817163689e-07f, 1.22449632410483948386e-07f, +# 1.23386435488872536840e-07f, 1.24316729681986364321e-07f, +# 1.25240762781015530062e-07f, 1.26158771911939892267e-07f, +# 1.27070984215989333455e-07f, 1.27977617477468922011e-07f, +# 1.28878880703854958297e-07f, 1.29774974662539874521e-07f, +# 1.30666092378141980504e-07f, 1.31552419593887221722e-07f, +# 1.32434135200211397569e-07f, 1.33311411633413359243e-07f, +# 1.34184415246907777059e-07f, 1.35053306657377859830e-07f, +# 1.35918241067904315860e-07f, 1.36779368569952053923e-07f, +# 1.37636834425917531047e-07f, 1.38490779333783508675e-07f, +# 1.39341339675287344817e-07f, 1.40188647748881762555e-07f, +# 1.41032831988654882776e-07f, 1.41874017170273235693e-07f, +# 1.42712324604921442006e-07f, 1.43547872322127921816e-07f, +# 1.44380775242292721080e-07f, 1.45211145339665544509e-07f, +# 1.46039091796461362146e-07f, 1.46864721148745476208e-07f, +# 1.47688137424670065700e-07f, 1.48509442275598857119e-07f, +# 1.49328735100614641423e-07f, 1.50146113164867617390e-07f, +# 1.50961671712187416111e-07f, 1.51775504072350982845e-07f, +# 1.52587701763369746341e-07f, 1.53398354589133671168e-07f, +# 1.54207550732725568797e-07f, 1.55015376845697999657e-07f, +# 1.55821918133584372604e-07f, 1.56627258437898192833e-07f, +# 1.57431480314857468671e-07f, 1.58234665111056041043e-07f, +# 1.59036893036289199880e-07f, 1.59838243233728855017e-07f, +# 1.60638793847630850137e-07f, 1.61438622088746393909e-07f, +# 1.62237804297600106296e-07f, 1.63036416005787357730e-07f, +# 1.63834531995435479082e-07f, 1.64632226356965902954e-07f, +# 1.65429572545287097020e-07f, 1.66226643434541294491e-07f, +# 1.67023511371523209274e-07f, 1.67820248227882200051e-07f, +# 1.68616925451215588827e-07f, 1.69413614115155757272e-07f, +# 1.70210384968549673733e-07f, 1.71007308483826142122e-07f, +# 1.71804454904642543391e-07f, 1.72601894292900061024e-07f, +# 1.73399696575213681990e-07f, 1.74197931588920988271e-07f, +# 1.74996669127712165834e-07f, 1.75795978986961275677e-07f, +# 1.76595931008838063924e-07f, 1.77396595127278238022e-07f, +# 1.78198041412889183130e-07f, 1.79000340117867431104e-07f, +# 1.79803561721004406185e-07f, 1.80607776972855859813e-07f, +# 1.81413056941151359868e-07f, 1.82219473056520464354e-07f, +# 1.83027097158612474240e-07f, 1.83836001542687613069e-07f, +# 1.84646259006759307383e-07f, 1.85457942899367347876e-07f, +# 1.86271127168064649331e-07f, 1.87085886408701333260e-07f, +# 1.87902295915592424729e-07f, 1.88720431732658022414e-07f, +# 1.89540370705627262627e-07f, 1.90362190535400839128e-07f, +# 1.91185969832669990437e-07f, 1.92011788173893651535e-07f, +# 1.92839726158739913768e-07f, 1.93669865469102145482e-07f, +# 1.94502288929804890433e-07f, 1.95337080571120616772e-07f, +# 1.96174325693223683314e-07f, 1.97014110932714374919e-07f, +# 1.97856524331352952716e-07f, 1.98701655407150388211e-07f, +# 1.99549595227971635348e-07f, 2.00400436487814600236e-07f, +# 2.01254273585938820883e-07f, 2.02111202709026498408e-07f, +# 2.02971321916571014951e-07f, 2.03834731229698846698e-07f, +# 2.04701532723644121196e-07f, 2.05571830624108885378e-07f, +# 2.06445731407757185541e-07f, 2.07323343907107312957e-07f, +# 2.08204779420104330037e-07f, 2.09090151824673600213e-07f, +# 2.09979577698577670508e-07f, 2.10873176444920111011e-07f, +# 2.11771070423665379388e-07f, 2.12673385089569268965e-07f, +# 2.13580249136944118603e-07f, 2.14491794651713402832e-07f, +# 2.15408157271244625533e-07f, 2.16329476352486921685e-07f, +# 2.17255895148978920488e-07f, 2.18187560997337924713e-07f, +# 2.19124625513888206785e-07f, 2.20067244802139479285e-07f, +# 2.21015579671883851683e-07f, 2.21969795870742159701e-07f, +# 2.22930064329060010376e-07f, 2.23896561419128954210e-07f, +# 2.24869469229791575583e-07f, 2.25848975857580322189e-07f, +# 2.26835275715640744118e-07f, 2.27828569861799901001e-07f, +# 2.28829066347263833069e-07f, 2.29836980587561823183e-07f, +# 2.30852535757505260518e-07f, 2.31875963212094114516e-07f, +# 2.32907502935486642699e-07f, 2.33947404020352726160e-07f, +# 2.34995925180156140289e-07f, 2.36053335297164516378e-07f, +# 2.37119914009265667728e-07f, 2.38195952338983970691e-07f, +# 2.39281753368440712742e-07f, 2.40377632964396957621e-07f, +# 2.41483920557958384709e-07f, 2.42600959984018662258e-07f, +# 2.43729110386077326413e-07f, 2.44868747192698939290e-07f, +# 2.46020263172594533433e-07f, 2.47184069576113545901e-07f, +# 2.48360597371852893654e-07f, 2.49550298588131851232e-07f, +# 2.50753647770270890721e-07f, 2.51971143565970967140e-07f, +# 2.53203310452642767375e-07f, 2.54450700622322097890e-07f, +# 2.55713896041856770961e-07f, 2.56993510708419870887e-07f, +# 2.58290193123138874550e-07f, 2.59604629008804833146e-07f, +# 2.60937544301314385690e-07f, 2.62289708448800566945e-07f, +# 2.63661938057441759882e-07f, 2.65055100928844238758e-07f, +# 2.66470120540847889467e-07f, 2.67907981031821866252e-07f, +# 2.69369732758258246335e-07f, 2.70856498507068313229e-07f, +# 2.72369480457841388042e-07f, 2.73909968006952220135e-07f, +# 2.75479346585437289399e-07f, 2.77079107626811561009e-07f, +# 2.78710859870496796972e-07f, 2.80376342222588603820e-07f, +# 2.82077438439999912690e-07f, 2.83816193958769527230e-07f, +# 2.85594835255375795814e-07f, 2.87415792215003905739e-07f, +# 2.89281724087851835900e-07f, 2.91195549750371467233e-07f, +# 2.93160483161771875581e-07f, 2.95180075129332912389e-07f, +# 2.97258262785797916083e-07f, 2.99399428561531794298e-07f, +# 3.01608470935804138388e-07f, 3.03890889921758510417e-07f, +# 3.06252891144972267537e-07f, 3.08701513613258141075e-07f, +# 3.11244787989714509378e-07f, 3.13891934589336184321e-07f, +# 3.16653613755314681314e-07f, 3.19542246256559459667e-07f, +# 3.22572428717978242099e-07f, 3.25761480217458181578e-07f, +# 3.29130173358915628534e-07f, 3.32703730345002116955e-07f, +# 3.36513208964639108346e-07f, 3.40597478255417943913e-07f, +# 3.45006114675213401550e-07f, 3.49803789521323211592e-07f, +# 3.55077180848341416206e-07f, 3.60946392031859609868e-07f, +# 3.67584959507244041831e-07f, 3.75257645787954431030e-07f, +# 3.84399301057791926300e-07f, 3.95804015855768440983e-07f, +# 4.11186015434435801956e-07f, 4.35608969373823260746e-07f}; +# +# static const float fi_float[] = { +# 1.00000000000000000000e+00f, 9.77101701267671596263e-01f, +# 9.59879091800106665211e-01f, 9.45198953442299649730e-01f, +# 9.32060075959230460718e-01f, 9.19991505039347012840e-01f, +# 9.08726440052130879366e-01f, 8.98095921898343418910e-01f, +# 8.87984660755833377088e-01f, 8.78309655808917399966e-01f, +# 8.69008688036857046555e-01f, 8.60033621196331532488e-01f, +# 8.51346258458677951353e-01f, 8.42915653112204177333e-01f, +# 8.34716292986883434679e-01f, 8.26726833946221373317e-01f, +# 8.18929191603702366642e-01f, 8.11307874312656274185e-01f, +# 8.03849483170964274059e-01f, 7.96542330422958966274e-01f, +# 7.89376143566024590648e-01f, 7.82341832654802504798e-01f, +# 7.75431304981187174974e-01f, 7.68637315798486264740e-01f, +# 7.61953346836795386565e-01f, 7.55373506507096115214e-01f, +# 7.48892447219156820459e-01f, 7.42505296340151055290e-01f, +# 7.36207598126862650112e-01f, 7.29995264561476231435e-01f, +# 7.23864533468630222401e-01f, 7.17811932630721960535e-01f, +# 7.11834248878248421200e-01f, 7.05928501332754310127e-01f, +# 7.00091918136511615067e-01f, 6.94321916126116711609e-01f, +# 6.88616083004671808432e-01f, 6.82972161644994857355e-01f, +# 6.77388036218773526009e-01f, 6.71861719897082099173e-01f, +# 6.66391343908750100056e-01f, 6.60975147776663107813e-01f, +# 6.55611470579697264149e-01f, 6.50298743110816701574e-01f, +# 6.45035480820822293424e-01f, 6.39820277453056585060e-01f, +# 6.34651799287623608059e-01f, 6.29528779924836690007e-01f, +# 6.24450015547026504592e-01f, 6.19414360605834324325e-01f, +# 6.14420723888913888899e-01f, 6.09468064925773433949e-01f, +# 6.04555390697467776029e-01f, 5.99681752619125263415e-01f, +# 5.94846243767987448159e-01f, 5.90047996332826008015e-01f, +# 5.85286179263371453274e-01f, 5.80559996100790898232e-01f, +# 5.75868682972353718164e-01f, 5.71211506735253227163e-01f, +# 5.66587763256164445025e-01f, 5.61996775814524340831e-01f, +# 5.57437893618765945014e-01f, 5.52910490425832290562e-01f, +# 5.48413963255265812791e-01f, 5.43947731190026262382e-01f, +# 5.39511234256952132426e-01f, 5.35103932380457614215e-01f, +# 5.30725304403662057062e-01f, 5.26374847171684479008e-01f, +# 5.22052074672321841931e-01f, 5.17756517229756352272e-01f, +# 5.13487720747326958914e-01f, 5.09245245995747941592e-01f, +# 5.05028667943468123624e-01f, 5.00837575126148681903e-01f, +# 4.96671569052489714213e-01f, 4.92530263643868537748e-01f, +# 4.88413284705458028423e-01f, 4.84320269426683325253e-01f, +# 4.80250865909046753544e-01f, 4.76204732719505863248e-01f, +# 4.72181538467730199660e-01f, 4.68180961405693596422e-01f, +# 4.64202689048174355069e-01f, 4.60246417812842867345e-01f, +# 4.56311852678716434184e-01f, 4.52398706861848520777e-01f, +# 4.48506701507203064949e-01f, 4.44635565395739396077e-01f, +# 4.40785034665803987508e-01f, 4.36954852547985550526e-01f, +# 4.33144769112652261445e-01f, 4.29354541029441427735e-01f, +# 4.25583931338021970170e-01f, 4.21832709229495894654e-01f, +# 4.18100649837848226120e-01f, 4.14387534040891125642e-01f, +# 4.10693148270188157500e-01f, 4.07017284329473372217e-01f, +# 4.03359739221114510510e-01f, 3.99720314980197222177e-01f, +# 3.96098818515832451492e-01f, 3.92495061459315619512e-01f, +# 3.88908860018788715696e-01f, 3.85340034840077283462e-01f, +# 3.81788410873393657674e-01f, 3.78253817245619183840e-01f, +# 3.74736087137891138443e-01f, 3.71235057668239498696e-01f, +# 3.67750569779032587814e-01f, 3.64282468129004055601e-01f, +# 3.60830600989648031529e-01f, 3.57394820145780500731e-01f, +# 3.53974980800076777232e-01f, 3.50570941481406106455e-01f, +# 3.47182563956793643900e-01f, 3.43809713146850715049e-01f, +# 3.40452257044521866547e-01f, 3.37110066637006045021e-01f, +# 3.33783015830718454708e-01f, 3.30470981379163586400e-01f, +# 3.27173842813601400970e-01f, 3.23891482376391093290e-01f, +# 3.20623784956905355514e-01f, 3.17370638029913609834e-01f, +# 3.14131931596337177215e-01f, 3.10907558126286509559e-01f, +# 3.07697412504292056035e-01f, 3.04501391976649993243e-01f, +# 3.01319396100803049698e-01f, 2.98151326696685481377e-01f, +# 2.94997087799961810184e-01f, 2.91856585617095209972e-01f, +# 2.88729728482182923521e-01f, 2.85616426815501756042e-01f, +# 2.82516593083707578948e-01f, 2.79430141761637940157e-01f, +# 2.76356989295668320494e-01f, 2.73297054068577072172e-01f, +# 2.70250256365875463072e-01f, 2.67216518343561471038e-01f, +# 2.64195763997261190426e-01f, 2.61187919132721213522e-01f, +# 2.58192911337619235290e-01f, 2.55210669954661961700e-01f, +# 2.52241126055942177508e-01f, 2.49284212418528522415e-01f, +# 2.46339863501263828249e-01f, 2.43408015422750312329e-01f, +# 2.40488605940500588254e-01f, 2.37581574431238090606e-01f, +# 2.34686861872330010392e-01f, 2.31804410824338724684e-01f, +# 2.28934165414680340644e-01f, 2.26076071322380278694e-01f, +# 2.23230075763917484855e-01f, 2.20396127480151998723e-01f, +# 2.17574176724331130872e-01f, 2.14764175251173583536e-01f, +# 2.11966076307030182324e-01f, 2.09179834621125076977e-01f, +# 2.06405406397880797353e-01f, 2.03642749310334908452e-01f, +# 2.00891822494656591136e-01f, 1.98152586545775138971e-01f, +# 1.95425003514134304483e-01f, 1.92709036903589175926e-01f, +# 1.90004651670464985713e-01f, 1.87311814223800304768e-01f, +# 1.84630492426799269756e-01f, 1.81960655599522513892e-01f, +# 1.79302274522847582272e-01f, 1.76655321443734858455e-01f, +# 1.74019770081838553999e-01f, 1.71395595637505754327e-01f, +# 1.68782774801211288285e-01f, 1.66181285764481906364e-01f, +# 1.63591108232365584074e-01f, 1.61012223437511009516e-01f, +# 1.58444614155924284882e-01f, 1.55888264724479197465e-01f, +# 1.53343161060262855866e-01f, 1.50809290681845675763e-01f, +# 1.48286642732574552861e-01f, 1.45775208005994028060e-01f, +# 1.43274978973513461566e-01f, 1.40785949814444699690e-01f, +# 1.38308116448550733057e-01f, 1.35841476571253755301e-01f, +# 1.33386029691669155683e-01f, 1.30941777173644358090e-01f, +# 1.28508722279999570981e-01f, 1.26086870220185887081e-01f, +# 1.23676228201596571932e-01f, 1.21276805484790306533e-01f, +# 1.18888613442910059947e-01f, 1.16511665625610869035e-01f, +# 1.14145977827838487895e-01f, 1.11791568163838089811e-01f, +# 1.09448457146811797824e-01f, 1.07116667774683801961e-01f, +# 1.04796225622487068629e-01f, 1.02487158941935246892e-01f, +# 1.00189498768810017482e-01f, 9.79032790388624646338e-02f, +# 9.56285367130089991594e-02f, 9.33653119126910124859e-02f, +# 9.11136480663737591268e-02f, 8.88735920682758862021e-02f, +# 8.66451944505580717859e-02f, 8.44285095703534715916e-02f, +# 8.22235958132029043366e-02f, 8.00305158146630696292e-02f, +# 7.78493367020961224423e-02f, 7.56801303589271778804e-02f, +# 7.35229737139813238622e-02f, 7.13779490588904025339e-02f, +# 6.92451443970067553879e-02f, 6.71246538277884968737e-02f, +# 6.50165779712428976156e-02f, 6.29210244377581412456e-02f, +# 6.08381083495398780614e-02f, 5.87679529209337372930e-02f, +# 5.67106901062029017391e-02f, 5.46664613248889208474e-02f, +# 5.26354182767921896513e-02f, 5.06177238609477817000e-02f, +# 4.86135532158685421122e-02f, 4.66230949019303814174e-02f, +# 4.46465522512944634759e-02f, 4.26841449164744590750e-02f, +# 4.07361106559409394401e-02f, 3.88027074045261474722e-02f, +# 3.68842156885673053135e-02f, 3.49809414617161251737e-02f, +# 3.30932194585785779961e-02f, 3.12214171919203004046e-02f, +# 2.93659397581333588001e-02f, 2.75272356696031131329e-02f, +# 2.57058040085489103443e-02f, 2.39022033057958785407e-02f, +# 2.21170627073088502113e-02f, 2.03510962300445102935e-02f, +# 1.86051212757246224594e-02f, 1.68800831525431419000e-02f, +# 1.51770883079353092332e-02f, 1.34974506017398673818e-02f, +# 1.18427578579078790488e-02f, 1.02149714397014590439e-02f, +# 8.61658276939872638800e-03f, 7.05087547137322242369e-03f, +# 5.52240329925099155545e-03f, 4.03797259336302356153e-03f, +# 2.60907274610215926189e-03f, 1.26028593049859797236e-03f}; +# +# static const uint64_t ke_double[] = { +# 0x001C5214272497C6, 0x0000000000000000, 0x00137D5BD79C317E, +# 0x00186EF58E3F3C10, 0x001A9BB7320EB0AE, 0x001BD127F719447C, +# 0x001C951D0F88651A, 0x001D1BFE2D5C3972, 0x001D7E5BD56B18B2, +# 0x001DC934DD172C70, 0x001E0409DFAC9DC8, 0x001E337B71D47836, +# 0x001E5A8B177CB7A2, 0x001E7B42096F046C, 0x001E970DAF08AE3E, +# 0x001EAEF5B14EF09E, 0x001EC3BD07B46556, 0x001ED5F6F08799CE, +# 0x001EE614AE6E5688, 0x001EF46ECA361CD0, 0x001F014B76DDD4A4, +# 0x001F0CE313A796B6, 0x001F176369F1F77A, 0x001F20F20C452570, +# 0x001F29AE1951A874, 0x001F31B18FB95532, 0x001F39125157C106, +# 0x001F3FE2EB6E694C, 0x001F463332D788FA, 0x001F4C10BF1D3A0E, +# 0x001F51874C5C3322, 0x001F56A109C3ECC0, 0x001F5B66D9099996, +# 0x001F5FE08210D08C, 0x001F6414DD445772, 0x001F6809F6859678, +# 0x001F6BC52A2B02E6, 0x001F6F4B3D32E4F4, 0x001F72A07190F13A, +# 0x001F75C8974D09D6, 0x001F78C71B045CC0, 0x001F7B9F12413FF4, +# 0x001F7E5346079F8A, 0x001F80E63BE21138, 0x001F835A3DAD9162, +# 0x001F85B16056B912, 0x001F87ED89B24262, 0x001F8A10759374FA, +# 0x001F8C1BBA3D39AC, 0x001F8E10CC45D04A, 0x001F8FF102013E16, +# 0x001F91BD968358E0, 0x001F9377AC47AFD8, 0x001F95204F8B64DA, +# 0x001F96B878633892, 0x001F98410C968892, 0x001F99BAE146BA80, +# 0x001F9B26BC697F00, 0x001F9C85561B717A, 0x001F9DD759CFD802, +# 0x001F9F1D6761A1CE, 0x001FA058140936C0, 0x001FA187EB3A3338, +# 0x001FA2AD6F6BC4FC, 0x001FA3C91ACE0682, 0x001FA4DB5FEE6AA2, +# 0x001FA5E4AA4D097C, 0x001FA6E55EE46782, 0x001FA7DDDCA51EC4, +# 0x001FA8CE7CE6A874, 0x001FA9B793CE5FEE, 0x001FAA9970ADB858, +# 0x001FAB745E588232, 0x001FAC48A3740584, 0x001FAD1682BF9FE8, +# 0x001FADDE3B5782C0, 0x001FAEA008F21D6C, 0x001FAF5C2418B07E, +# 0x001FB012C25B7A12, 0x001FB0C41681DFF4, 0x001FB17050B6F1FA, +# 0x001FB2179EB2963A, 0x001FB2BA2BDFA84A, 0x001FB358217F4E18, +# 0x001FB3F1A6C9BE0C, 0x001FB486E10CACD6, 0x001FB517F3C793FC, +# 0x001FB5A500C5FDAA, 0x001FB62E2837FE58, 0x001FB6B388C9010A, +# 0x001FB7353FB50798, 0x001FB7B368DC7DA8, 0x001FB82E1ED6BA08, +# 0x001FB8A57B0347F6, 0x001FB919959A0F74, 0x001FB98A85BA7204, +# 0x001FB9F861796F26, 0x001FBA633DEEE286, 0x001FBACB2F41EC16, +# 0x001FBB3048B49144, 0x001FBB929CAEA4E2, 0x001FBBF23CC8029E, +# 0x001FBC4F39D22994, 0x001FBCA9A3E140D4, 0x001FBD018A548F9E, +# 0x001FBD56FBDE729C, 0x001FBDAA068BD66A, 0x001FBDFAB7CB3F40, +# 0x001FBE491C7364DE, 0x001FBE9540C9695E, 0x001FBEDF3086B128, +# 0x001FBF26F6DE6174, 0x001FBF6C9E828AE2, 0x001FBFB031A904C4, +# 0x001FBFF1BA0FFDB0, 0x001FC03141024588, 0x001FC06ECF5B54B2, +# 0x001FC0AA6D8B1426, 0x001FC0E42399698A, 0x001FC11BF9298A64, +# 0x001FC151F57D1942, 0x001FC1861F770F4A, 0x001FC1B87D9E74B4, +# 0x001FC1E91620EA42, 0x001FC217EED505DE, 0x001FC2450D3C83FE, +# 0x001FC27076864FC2, 0x001FC29A2F90630E, 0x001FC2C23CE98046, +# 0x001FC2E8A2D2C6B4, 0x001FC30D654122EC, 0x001FC33087DE9C0E, +# 0x001FC3520E0B7EC6, 0x001FC371FADF66F8, 0x001FC390512A2886, +# 0x001FC3AD137497FA, 0x001FC3C844013348, 0x001FC3E1E4CCAB40, +# 0x001FC3F9F78E4DA8, 0x001FC4107DB85060, 0x001FC4257877FD68, +# 0x001FC438E8B5BFC6, 0x001FC44ACF15112A, 0x001FC45B2BF447E8, +# 0x001FC469FF6C4504, 0x001FC477495001B2, 0x001FC483092BFBB8, +# 0x001FC48D3E457FF6, 0x001FC495E799D21A, 0x001FC49D03DD30B0, +# 0x001FC4A29179B432, 0x001FC4A68E8E07FC, 0x001FC4A8F8EBFB8C, +# 0x001FC4A9CE16EA9E, 0x001FC4A90B41FA34, 0x001FC4A6AD4E28A0, +# 0x001FC4A2B0C82E74, 0x001FC49D11E62DE2, 0x001FC495CC852DF4, +# 0x001FC48CDC265EC0, 0x001FC4823BEC237A, 0x001FC475E696DEE6, +# 0x001FC467D6817E82, 0x001FC458059DC036, 0x001FC4466D702E20, +# 0x001FC433070BCB98, 0x001FC41DCB0D6E0E, 0x001FC406B196BBF6, +# 0x001FC3EDB248CB62, 0x001FC3D2C43E593C, 0x001FC3B5DE0591B4, +# 0x001FC396F599614C, 0x001FC376005A4592, 0x001FC352F3069370, +# 0x001FC32DC1B22818, 0x001FC3065FBD7888, 0x001FC2DCBFCBF262, +# 0x001FC2B0D3B99F9E, 0x001FC2828C8FFCF0, 0x001FC251DA79F164, +# 0x001FC21EACB6D39E, 0x001FC1E8F18C6756, 0x001FC1B09637BB3C, +# 0x001FC17586DCCD10, 0x001FC137AE74D6B6, 0x001FC0F6F6BB2414, +# 0x001FC0B348184DA4, 0x001FC06C898BAFF0, 0x001FC022A092F364, +# 0x001FBFD5710F72B8, 0x001FBF84DD29488E, 0x001FBF30C52FC60A, +# 0x001FBED907770CC6, 0x001FBE7D80327DDA, 0x001FBE1E094BA614, +# 0x001FBDBA7A354408, 0x001FBD52A7B9F826, 0x001FBCE663C6201A, +# 0x001FBC757D2C4DE4, 0x001FBBFFBF63B7AA, 0x001FBB84F23FE6A2, +# 0x001FBB04D9A0D18C, 0x001FBA7F351A70AC, 0x001FB9F3BF92B618, +# 0x001FB9622ED4ABFC, 0x001FB8CA33174A16, 0x001FB82B76765B54, +# 0x001FB7859C5B895C, 0x001FB6D840D55594, 0x001FB622F7D96942, +# 0x001FB5654C6F37E0, 0x001FB49EBFBF69D2, 0x001FB3CEC803E746, +# 0x001FB2F4CF539C3E, 0x001FB21032442852, 0x001FB1203E5A9604, +# 0x001FB0243042E1C2, 0x001FAF1B31C479A6, 0x001FAE045767E104, +# 0x001FACDE9DBF2D72, 0x001FABA8E640060A, 0x001FAA61F399FF28, +# 0x001FA908656F66A2, 0x001FA79AB3508D3C, 0x001FA61726D1F214, +# 0x001FA47BD48BEA00, 0x001FA2C693C5C094, 0x001FA0F4F47DF314, +# 0x001F9F04336BBE0A, 0x001F9CF12B79F9BC, 0x001F9AB84415ABC4, +# 0x001F98555B782FB8, 0x001F95C3ABD03F78, 0x001F92FDA9CEF1F2, +# 0x001F8FFCDA9AE41C, 0x001F8CB99E7385F8, 0x001F892AEC479606, +# 0x001F8545F904DB8E, 0x001F80FDC336039A, 0x001F7C427839E926, +# 0x001F7700A3582ACC, 0x001F71200F1A241C, 0x001F6A8234B7352A, +# 0x001F630000A8E266, 0x001F5A66904FE3C4, 0x001F50724ECE1172, +# 0x001F44C7665C6FDA, 0x001F36E5A38A59A2, 0x001F26143450340A, +# 0x001F113E047B0414, 0x001EF6AEFA57CBE6, 0x001ED38CA188151E, +# 0x001EA2A61E122DB0, 0x001E5961C78B267C, 0x001DDDF62BAC0BB0, +# 0x001CDB4DD9E4E8C0}; +# +# static const double we_double[] = { +# 9.655740063209182975e-16, 7.089014243955414331e-18, +# 1.163941249669122378e-17, 1.524391512353216015e-17, +# 1.833284885723743916e-17, 2.108965109464486630e-17, +# 2.361128077843138196e-17, 2.595595772310893952e-17, +# 2.816173554197752338e-17, 3.025504130321382330e-17, +# 3.225508254836375280e-17, 3.417632340185027033e-17, +# 3.602996978734452488e-17, 3.782490776869649048e-17, +# 3.956832198097553231e-17, 4.126611778175946428e-17, +# 4.292321808442525631e-17, 4.454377743282371417e-17, +# 4.613133981483185932e-17, 4.768895725264635940e-17, +# 4.921928043727962847e-17, 5.072462904503147014e-17, +# 5.220704702792671737e-17, 5.366834661718192181e-17, +# 5.511014372835094717e-17, 5.653388673239667134e-17, +# 5.794088004852766616e-17, 5.933230365208943081e-17, +# 6.070922932847179572e-17, 6.207263431163193485e-17, +# 6.342341280303076511e-17, 6.476238575956142121e-17, +# 6.609030925769405241e-17, 6.740788167872722244e-17, +# 6.871574991183812442e-17, 7.001451473403929616e-17, +# 7.130473549660643409e-17, 7.258693422414648352e-17, +# 7.386159921381791997e-17, 7.512918820723728089e-17, +# 7.639013119550825792e-17, 7.764483290797848102e-17, +# 7.889367502729790548e-17, 8.013701816675454434e-17, +# 8.137520364041762206e-17, 8.260855505210038174e-17, +# 8.383737972539139383e-17, 8.506196999385323132e-17, +# 8.628260436784112996e-17, 8.749954859216182511e-17, +# 8.871305660690252281e-17, 8.992337142215357066e-17, +# 9.113072591597909173e-17, 9.233534356381788123e-17, +# 9.353743910649128938e-17, 9.473721916312949566e-17, +# 9.593488279457997317e-17, 9.713062202221521206e-17, +# 9.832462230649511362e-17, 9.951706298915071878e-17, +# 1.007081177024294931e-16, 1.018979547484694078e-16, +# 1.030867374515421954e-16, 1.042746244856188556e-16, +# 1.054617701794576406e-16, 1.066483248011914702e-16, +# 1.078344348241948498e-16, 1.090202431758350473e-16, +# 1.102058894705578110e-16, 1.113915102286197502e-16, +# 1.125772390816567488e-16, 1.137632069661684705e-16, +# 1.149495423059009298e-16, 1.161363711840218308e-16, +# 1.173238175059045788e-16, 1.185120031532669434e-16, +# 1.197010481303465158e-16, 1.208910707027385520e-16, +# 1.220821875294706151e-16, 1.232745137888415193e-16, +# 1.244681632985112523e-16, 1.256632486302898513e-16, +# 1.268598812200397542e-16, 1.280581714730749379e-16, +# 1.292582288654119552e-16, 1.304601620412028847e-16, +# 1.316640789066572582e-16, 1.328700867207380889e-16, +# 1.340782921828999433e-16, 1.352888015181175458e-16, +# 1.365017205594397770e-16, 1.377171548282880964e-16, +# 1.389352096127063919e-16, 1.401559900437571538e-16, +# 1.413796011702485188e-16, 1.426061480319665444e-16, +# 1.438357357315790180e-16, 1.450684695053687684e-16, +# 1.463044547929475721e-16, 1.475437973060951633e-16, +# 1.487866030968626066e-16, 1.500329786250736949e-16, +# 1.512830308253539427e-16, 1.525368671738125550e-16, +# 1.537945957544996933e-16, 1.550563253257577148e-16, +# 1.563221653865837505e-16, 1.575922262431176140e-16, +# 1.588666190753684151e-16, 1.601454560042916733e-16, +# 1.614288501593278662e-16, 1.627169157465130500e-16, +# 1.640097681172717950e-16, 1.653075238380036909e-16, +# 1.666103007605742067e-16, 1.679182180938228863e-16, +# 1.692313964762022267e-16, 1.705499580496629830e-16, +# 1.718740265349031656e-16, 1.732037273081008369e-16, +# 1.745391874792533975e-16, 1.758805359722491379e-16, +# 1.772279036068006489e-16, 1.785814231823732619e-16, +# 1.799412295642463721e-16, 1.813074597718501559e-16, +# 1.826802530695252266e-16, 1.840597510598587828e-16, +# 1.854460977797569461e-16, 1.868394397994192684e-16, +# 1.882399263243892051e-16, 1.896477093008616722e-16, +# 1.910629435244376536e-16, 1.924857867525243818e-16, +# 1.939163998205899420e-16, 1.953549467624909132e-16, +# 1.968015949351037382e-16, 1.982565151475019047e-16, +# 1.997198817949342081e-16, 2.011918729978734671e-16, +# 2.026726707464198289e-16, 2.041624610503588774e-16, +# 2.056614340951917875e-16, 2.071697844044737034e-16, +# 2.086877110088159721e-16, 2.102154176219292789e-16, +# 2.117531128241075913e-16, 2.133010102535779087e-16, +# 2.148593288061663316e-16, 2.164282928437604723e-16, +# 2.180081324120784027e-16, 2.195990834682870728e-16, +# 2.212013881190495942e-16, 2.228152948696180545e-16, +# 2.244410588846308588e-16, 2.260789422613173739e-16, +# 2.277292143158621037e-16, 2.293921518837311354e-16, +# 2.310680396348213318e-16, 2.327571704043534613e-16, +# 2.344598455404957859e-16, 2.361763752697773994e-16, +# 2.379070790814276700e-16, 2.396522861318623520e-16, +# 2.414123356706293277e-16, 2.431875774892255956e-16, +# 2.449783723943070217e-16, 2.467850927069288738e-16, +# 2.486081227895851719e-16, 2.504478596029557040e-16, +# 2.523047132944217013e-16, 2.541791078205812227e-16, +# 2.560714816061770759e-16, 2.579822882420530896e-16, +# 2.599119972249746917e-16, 2.618610947423924219e-16, +# 2.638300845054942823e-16, 2.658194886341845120e-16, +# 2.678298485979525166e-16, 2.698617262169488933e-16, +# 2.719157047279818500e-16, 2.739923899205814823e-16, +# 2.760924113487617126e-16, 2.782164236246436081e-16, +# 2.803651078006983464e-16, 2.825391728480253184e-16, +# 2.847393572388174091e-16, 2.869664306419817679e-16, +# 2.892211957417995598e-16, 2.915044901905293183e-16, +# 2.938171887070028633e-16, 2.961602053345465687e-16, +# 2.985344958730045276e-16, 3.009410605012618141e-16, +# 3.033809466085003416e-16, 3.058552518544860874e-16, +# 3.083651274815310004e-16, 3.109117819034266344e-16, +# 3.134964845996663118e-16, 3.161205703467105734e-16, +# 3.187854438219713117e-16, 3.214925846206797361e-16, +# 3.242435527309451638e-16, 3.270399945182240440e-16, +# 3.298836492772283149e-16, 3.327763564171671408e-16, +# 3.357200633553244075e-16, 3.387168342045505162e-16, +# 3.417688593525636996e-16, 3.448784660453423890e-16, +# 3.480481301037442286e-16, 3.512804889222979418e-16, +# 3.545783559224791863e-16, 3.579447366604276541e-16, +# 3.613828468219060593e-16, 3.648961323764542545e-16, +# 3.684882922095621322e-16, 3.721633036080207290e-16, +# 3.759254510416256532e-16, 3.797793587668874387e-16, +# 3.837300278789213687e-16, 3.877828785607895292e-16, +# 3.919437984311428867e-16, 3.962191980786774996e-16, +# 4.006160751056541688e-16, 4.051420882956573177e-16, +# 4.098056438903062509e-16, 4.146159964290904582e-16, +# 4.195833672073398926e-16, 4.247190841824385048e-16, +# 4.300357481667470702e-16, 4.355474314693952008e-16, +# 4.412699169036069903e-16, 4.472209874259932285e-16, +# 4.534207798565834480e-16, 4.598922204905932469e-16, +# 4.666615664711475780e-16, 4.737590853262492027e-16, +# 4.812199172829237933e-16, 4.890851827392209900e-16, +# 4.974034236191939753e-16, 5.062325072144159699e-16, +# 5.156421828878082953e-16, 5.257175802022274839e-16, +# 5.365640977112021618e-16, 5.483144034258703912e-16, +# 5.611387454675159622e-16, 5.752606481503331688e-16, +# 5.909817641652102998e-16, 6.087231416180907671e-16, +# 6.290979034877557049e-16, 6.530492053564040799e-16, +# 6.821393079028928626e-16, 7.192444966089361564e-16, +# 7.706095350032096755e-16, 8.545517038584027421e-16}; +# +# static const double fe_double[] = { +# 1.000000000000000000e+00, 9.381436808621747003e-01, +# 9.004699299257464817e-01, 8.717043323812035949e-01, +# 8.477855006239896074e-01, 8.269932966430503241e-01, +# 8.084216515230083777e-01, 7.915276369724956185e-01, +# 7.759568520401155522e-01, 7.614633888498962833e-01, +# 7.478686219851951034e-01, 7.350380924314234843e-01, +# 7.228676595935720206e-01, 7.112747608050760117e-01, +# 7.001926550827881623e-01, 6.895664961170779872e-01, +# 6.793505722647653622e-01, 6.695063167319247333e-01, +# 6.600008410789997004e-01, 6.508058334145710999e-01, +# 6.418967164272660897e-01, 6.332519942143660652e-01, +# 6.248527387036659775e-01, 6.166821809152076561e-01, +# 6.087253820796220127e-01, 6.009689663652322267e-01, +# 5.934009016917334289e-01, 5.860103184772680329e-01, +# 5.787873586028450257e-01, 5.717230486648258170e-01, +# 5.648091929124001709e-01, 5.580382822625874484e-01, +# 5.514034165406412891e-01, 5.448982376724396115e-01, +# 5.385168720028619127e-01, 5.322538802630433219e-01, +# 5.261042139836197284e-01, 5.200631773682335979e-01, +# 5.141263938147485613e-01, 5.082897764106428795e-01, +# 5.025495018413477233e-01, 4.969019872415495476e-01, +# 4.913438695940325340e-01, 4.858719873418849144e-01, +# 4.804833639304542103e-01, 4.751751930373773747e-01, +# 4.699448252839599771e-01, 4.647897562504261781e-01, +# 4.597076156421376902e-01, 4.546961574746155033e-01, +# 4.497532511627549967e-01, 4.448768734145485126e-01, +# 4.400651008423538957e-01, 4.353161032156365740e-01, +# 4.306281372884588343e-01, 4.259995411430343437e-01, +# 4.214287289976165751e-01, 4.169141864330028757e-01, +# 4.124544659971611793e-01, 4.080481831520323954e-01, +# 4.036940125305302773e-01, 3.993906844752310725e-01, +# 3.951369818332901573e-01, 3.909317369847971069e-01, +# 3.867738290841376547e-01, 3.826621814960098344e-01, +# 3.785957594095807899e-01, 3.745735676159021588e-01, +# 3.705946484351460013e-01, 3.666580797815141568e-01, +# 3.627629733548177748e-01, 3.589084729487497794e-01, +# 3.550937528667874599e-01, 3.513180164374833381e-01, +# 3.475804946216369817e-01, 3.438804447045024082e-01, +# 3.402171490667800224e-01, 3.365899140286776059e-01, +# 3.329980687618089852e-01, 3.294409642641363267e-01, +# 3.259179723935561879e-01, 3.224284849560891675e-01, +# 3.189719128449572394e-01, 3.155476852271289490e-01, +# 3.121552487741795501e-01, 3.087940669345601852e-01, +# 3.054636192445902565e-01, 3.021634006756935276e-01, +# 2.988929210155817917e-01, 2.956517042812611962e-01, +# 2.924392881618925744e-01, 2.892552234896777485e-01, +# 2.860990737370768255e-01, 2.829704145387807457e-01, +# 2.798688332369729248e-01, 2.767939284485173568e-01, +# 2.737453096528029706e-01, 2.707225967990600224e-01, +# 2.677254199320447947e-01, 2.647534188350622042e-01, +# 2.618062426893629779e-01, 2.588835497490162285e-01, +# 2.559850070304153791e-01, 2.531102900156294577e-01, +# 2.502590823688622956e-01, 2.474310756653276266e-01, +# 2.446259691318921070e-01, 2.418434693988772144e-01, +# 2.390832902624491774e-01, 2.363451524570596429e-01, +# 2.336287834374333461e-01, 2.309339171696274118e-01, +# 2.282602939307167011e-01, 2.256076601166840667e-01, +# 2.229757680581201940e-01, 2.203643758433594946e-01, +# 2.177732471487005272e-01, 2.152021510753786837e-01, +# 2.126508619929782795e-01, 2.101191593889882581e-01, +# 2.076068277242220372e-01, 2.051136562938377095e-01, +# 2.026394390937090173e-01, 2.001839746919112650e-01, +# 1.977470661050988732e-01, 1.953285206795632167e-01, +# 1.929281499767713515e-01, 1.905457696631953912e-01, +# 1.881811994042543179e-01, 1.858342627621971110e-01, +# 1.835047870977674633e-01, 1.811926034754962889e-01, +# 1.788975465724783054e-01, 1.766194545904948843e-01, +# 1.743581691713534942e-01, 1.721135353153200598e-01, +# 1.698854013025276610e-01, 1.676736186172501919e-01, +# 1.654780418749360049e-01, 1.632985287519018169e-01, +# 1.611349399175920349e-01, 1.589871389693142123e-01, +# 1.568549923693652315e-01, 1.547383693844680830e-01, +# 1.526371420274428570e-01, 1.505511850010398944e-01, +# 1.484803756438667910e-01, 1.464245938783449441e-01, +# 1.443837221606347754e-01, 1.423576454324722018e-01, +# 1.403462510748624548e-01, 1.383494288635802039e-01, +# 1.363670709264288572e-01, 1.343990717022136294e-01, +# 1.324453279013875218e-01, 1.305057384683307731e-01, +# 1.285802045452281717e-01, 1.266686294375106714e-01, +# 1.247709185808309612e-01, 1.228869795095451356e-01, +# 1.210167218266748335e-01, 1.191600571753276827e-01, +# 1.173168992115555670e-01, 1.154871635786335338e-01, +# 1.136707678827443141e-01, 1.118676316700562973e-01, +# 1.100776764051853845e-01, 1.083008254510337970e-01, +# 1.065370040500016602e-01, 1.047861393065701724e-01, +# 1.030481601712577161e-01, 1.013229974259536315e-01, +# 9.961058367063713170e-02, 9.791085331149219917e-02, +# 9.622374255043279756e-02, 9.454918937605585882e-02, +# 9.288713355604354127e-02, 9.123751663104015530e-02, +# 8.960028191003285847e-02, 8.797537446727021759e-02, +# 8.636274114075691288e-02, 8.476233053236811865e-02, +# 8.317409300963238272e-02, 8.159798070923741931e-02, +# 8.003394754231990538e-02, 7.848194920160642130e-02, +# 7.694194317048050347e-02, 7.541388873405840965e-02, +# 7.389774699236474620e-02, 7.239348087570873780e-02, +# 7.090105516237182881e-02, 6.942043649872875477e-02, +# 6.795159342193660135e-02, 6.649449638533977414e-02, +# 6.504911778675374900e-02, 6.361543199980733421e-02, +# 6.219341540854099459e-02, 6.078304644547963265e-02, +# 5.938430563342026597e-02, 5.799717563120065922e-02, +# 5.662164128374287675e-02, 5.525768967669703741e-02, +# 5.390531019604608703e-02, 5.256449459307169225e-02, +# 5.123523705512628146e-02, 4.991753428270637172e-02, +# 4.861138557337949667e-02, 4.731679291318154762e-02, +# 4.603376107617516977e-02, 4.476229773294328196e-02, +# 4.350241356888818328e-02, 4.225412241331623353e-02, +# 4.101744138041481941e-02, 3.979239102337412542e-02, +# 3.857899550307485742e-02, 3.737728277295936097e-02, +# 3.618728478193142251e-02, 3.500903769739741045e-02, +# 3.384258215087432992e-02, 3.268796350895953468e-02, +# 3.154523217289360859e-02, 3.041444391046660423e-02, +# 2.929566022463739317e-02, 2.818894876397863569e-02, +# 2.709438378095579969e-02, 2.601204664513421735e-02, +# 2.494202641973178314e-02, 2.388442051155817078e-02, +# 2.283933540638524023e-02, 2.180688750428358066e-02, +# 2.078720407257811723e-02, 1.978042433800974303e-02, +# 1.878670074469603046e-02, 1.780620041091136169e-02, +# 1.683910682603994777e-02, 1.588562183997316302e-02, +# 1.494596801169114850e-02, 1.402039140318193759e-02, +# 1.310916493125499106e-02, 1.221259242625538123e-02, +# 1.133101359783459695e-02, 1.046481018102997894e-02, +# 9.614413642502209895e-03, 8.780314985808975251e-03, +# 7.963077438017040002e-03, 7.163353183634983863e-03, +# 6.381905937319179087e-03, 5.619642207205483020e-03, +# 4.877655983542392333e-03, 4.157295120833795314e-03, +# 3.460264777836904049e-03, 2.788798793574076128e-03, +# 2.145967743718906265e-03, 1.536299780301572356e-03, +# 9.672692823271745359e-04, 4.541343538414967652e-04}; +# +# static const uint32_t ke_float[] = { +# 0x00714851UL, 0x00000000UL, 0x004DF56FUL, 0x0061BBD6UL, 0x006A6EDDUL, +# 0x006F44A0UL, 0x00725474UL, 0x00746FF9UL, 0x0075F96FUL, 0x007724D3UL, +# 0x00781027UL, 0x0078CDEEUL, 0x00796A2CUL, 0x0079ED08UL, 0x007A5C37UL, +# 0x007ABBD7UL, 0x007B0EF4UL, 0x007B57DCUL, 0x007B9853UL, 0x007BD1BBUL, +# 0x007C052EUL, 0x007C338CUL, 0x007C5D8EUL, 0x007C83C8UL, 0x007CA6B8UL, +# 0x007CC6C6UL, 0x007CE449UL, 0x007CFF8CUL, 0x007D18CDUL, 0x007D3043UL, +# 0x007D461DUL, 0x007D5A84UL, 0x007D6D9BUL, 0x007D7F82UL, 0x007D9053UL, +# 0x007DA028UL, 0x007DAF15UL, 0x007DBD2DUL, 0x007DCA82UL, 0x007DD722UL, +# 0x007DE31CUL, 0x007DEE7CUL, 0x007DF94DUL, 0x007E0399UL, 0x007E0D69UL, +# 0x007E16C6UL, 0x007E1FB6UL, 0x007E2842UL, 0x007E306FUL, 0x007E3843UL, +# 0x007E3FC4UL, 0x007E46F6UL, 0x007E4DDFUL, 0x007E5481UL, 0x007E5AE2UL, +# 0x007E6104UL, 0x007E66ECUL, 0x007E6C9BUL, 0x007E7215UL, 0x007E775DUL, +# 0x007E7C76UL, 0x007E8160UL, 0x007E8620UL, 0x007E8AB6UL, 0x007E8F24UL, +# 0x007E936DUL, 0x007E9793UL, 0x007E9B95UL, 0x007E9F77UL, 0x007EA33AUL, +# 0x007EA6DEUL, 0x007EAA66UL, 0x007EADD1UL, 0x007EB123UL, 0x007EB45AUL, +# 0x007EB779UL, 0x007EBA80UL, 0x007EBD71UL, 0x007EC04BUL, 0x007EC310UL, +# 0x007EC5C1UL, 0x007EC85EUL, 0x007ECAE9UL, 0x007ECD61UL, 0x007ECFC7UL, +# 0x007ED21CUL, 0x007ED460UL, 0x007ED694UL, 0x007ED8B9UL, 0x007EDACEUL, +# 0x007EDCD5UL, 0x007EDECEUL, 0x007EE0B8UL, 0x007EE296UL, 0x007EE466UL, +# 0x007EE62AUL, 0x007EE7E2UL, 0x007EE98DUL, 0x007EEB2DUL, 0x007EECC1UL, +# 0x007EEE4AUL, 0x007EEFC9UL, 0x007EF13DUL, 0x007EF2A7UL, 0x007EF406UL, +# 0x007EF55CUL, 0x007EF6A8UL, 0x007EF7EBUL, 0x007EF924UL, 0x007EFA55UL, +# 0x007EFB7DUL, 0x007EFC9CUL, 0x007EFDB2UL, 0x007EFEC1UL, 0x007EFFC7UL, +# 0x007F00C5UL, 0x007F01BBUL, 0x007F02AAUL, 0x007F0391UL, 0x007F0470UL, +# 0x007F0548UL, 0x007F0618UL, 0x007F06E2UL, 0x007F07A4UL, 0x007F0860UL, +# 0x007F0914UL, 0x007F09C2UL, 0x007F0A69UL, 0x007F0B09UL, 0x007F0BA3UL, +# 0x007F0C36UL, 0x007F0CC2UL, 0x007F0D48UL, 0x007F0DC8UL, 0x007F0E41UL, +# 0x007F0EB4UL, 0x007F0F21UL, 0x007F0F88UL, 0x007F0FE8UL, 0x007F1042UL, +# 0x007F1096UL, 0x007F10E4UL, 0x007F112BUL, 0x007F116DUL, 0x007F11A8UL, +# 0x007F11DDUL, 0x007F120CUL, 0x007F1235UL, 0x007F1258UL, 0x007F1274UL, +# 0x007F128AUL, 0x007F129AUL, 0x007F12A4UL, 0x007F12A7UL, 0x007F12A4UL, +# 0x007F129BUL, 0x007F128BUL, 0x007F1274UL, 0x007F1257UL, 0x007F1233UL, +# 0x007F1209UL, 0x007F11D8UL, 0x007F119FUL, 0x007F1160UL, 0x007F111AUL, +# 0x007F10CCUL, 0x007F1077UL, 0x007F101BUL, 0x007F0FB7UL, 0x007F0F4BUL, +# 0x007F0ED7UL, 0x007F0E5CUL, 0x007F0DD8UL, 0x007F0D4CUL, 0x007F0CB7UL, +# 0x007F0C19UL, 0x007F0B73UL, 0x007F0AC3UL, 0x007F0A0AUL, 0x007F0947UL, +# 0x007F087BUL, 0x007F07A4UL, 0x007F06C2UL, 0x007F05D6UL, 0x007F04DFUL, +# 0x007F03DCUL, 0x007F02CDUL, 0x007F01B2UL, 0x007F008BUL, 0x007EFF56UL, +# 0x007EFE13UL, 0x007EFCC3UL, 0x007EFB64UL, 0x007EF9F6UL, 0x007EF878UL, +# 0x007EF6EAUL, 0x007EF54BUL, 0x007EF39AUL, 0x007EF1D6UL, 0x007EEFFFUL, +# 0x007EEE14UL, 0x007EEC13UL, 0x007EE9FDUL, 0x007EE7CFUL, 0x007EE589UL, +# 0x007EE329UL, 0x007EE0AEUL, 0x007EDE16UL, 0x007EDB61UL, 0x007ED88CUL, +# 0x007ED595UL, 0x007ED27BUL, 0x007ECF3BUL, 0x007ECBD3UL, 0x007EC841UL, +# 0x007EC481UL, 0x007EC091UL, 0x007EBC6DUL, 0x007EB811UL, 0x007EB37AUL, +# 0x007EAEA4UL, 0x007EA988UL, 0x007EA422UL, 0x007E9E6BUL, 0x007E985DUL, +# 0x007E91EFUL, 0x007E8B1AUL, 0x007E83D4UL, 0x007E7C11UL, 0x007E73C5UL, +# 0x007E6AE1UL, 0x007E6155UL, 0x007E570FUL, 0x007E4BF7UL, 0x007E3FF3UL, +# 0x007E32E6UL, 0x007E24ACUL, 0x007E1518UL, 0x007E03F7UL, 0x007DF10AUL, +# 0x007DDC03UL, 0x007DC480UL, 0x007DAA09UL, 0x007D8C00UL, 0x007D699AUL, +# 0x007D41C9UL, 0x007D131EUL, 0x007CDB97UL, 0x007C9851UL, 0x007C44F8UL, +# 0x007BDABCUL, 0x007B4E33UL, 0x007A8A98UL, 0x00796587UL, 0x007777D9UL, +# 0x00736D37UL, +# }; +# static const float we_float[] = { +# 1.03677719e-06F, 7.61177108e-09F, 1.24977240e-08F, 1.63680292e-08F, +# 1.96847466e-08F, 2.26448404e-08F, 2.53524197e-08F, 2.78699974e-08F, +# 3.02384333e-08F, 3.24861032e-08F, 3.46336312e-08F, 3.66965478e-08F, +# 3.86868855e-08F, 4.06141855e-08F, 4.24861622e-08F, 4.43091566e-08F, +# 4.60884545e-08F, 4.78285168e-08F, 4.95331490e-08F, 5.12056279e-08F, +# 5.28488000e-08F, 5.44651557e-08F, 5.60568899e-08F, 5.76259484e-08F, +# 5.91740662e-08F, 6.07027987e-08F, 6.22135462e-08F, 6.37075759e-08F, +# 6.51860386e-08F, 6.66499836e-08F, 6.81003709e-08F, 6.95380822e-08F, +# 7.09639292e-08F, 7.23786618e-08F, 7.37829746e-08F, 7.51775128e-08F, +# 7.65628768e-08F, 7.79396272e-08F, 7.93082883e-08F, 8.06693516e-08F, +# 8.20232788e-08F, 8.33705045e-08F, 8.47114385e-08F, 8.60464681e-08F, +# 8.73759596e-08F, 8.87002606e-08F, 9.00197010e-08F, 9.13345948e-08F, +# 9.26452410e-08F, 9.39519249e-08F, 9.52549192e-08F, 9.65544849e-08F, +# 9.78508719e-08F, 9.91443202e-08F, 1.00435060e-07F, 1.01723315e-07F, +# 1.03009296e-07F, 1.04293211e-07F, 1.05575259e-07F, 1.06855633e-07F, +# 1.08134518e-07F, 1.09412096e-07F, 1.10688542e-07F, 1.11964025e-07F, +# 1.13238713e-07F, 1.14512767e-07F, 1.15786343e-07F, 1.17059595e-07F, +# 1.18332673e-07F, 1.19605723e-07F, 1.20878890e-07F, 1.22152313e-07F, +# 1.23426131e-07F, 1.24700479e-07F, 1.25975490e-07F, 1.27251294e-07F, +# 1.28528022e-07F, 1.29805799e-07F, 1.31084751e-07F, 1.32365001e-07F, +# 1.33646673e-07F, 1.34929886e-07F, 1.36214760e-07F, 1.37501415e-07F, +# 1.38789966e-07F, 1.40080532e-07F, 1.41373228e-07F, 1.42668169e-07F, +# 1.43965470e-07F, 1.45265245e-07F, 1.46567606e-07F, 1.47872669e-07F, +# 1.49180545e-07F, 1.50491348e-07F, 1.51805191e-07F, 1.53122186e-07F, +# 1.54442445e-07F, 1.55766083e-07F, 1.57093212e-07F, 1.58423946e-07F, +# 1.59758399e-07F, 1.61096684e-07F, 1.62438917e-07F, 1.63785214e-07F, +# 1.65135690e-07F, 1.66490462e-07F, 1.67849647e-07F, 1.69213364e-07F, +# 1.70581733e-07F, 1.71954874e-07F, 1.73332908e-07F, 1.74715958e-07F, +# 1.76104148e-07F, 1.77497602e-07F, 1.78896448e-07F, 1.80300814e-07F, +# 1.81710828e-07F, 1.83126623e-07F, 1.84548331e-07F, 1.85976086e-07F, +# 1.87410026e-07F, 1.88850288e-07F, 1.90297012e-07F, 1.91750343e-07F, +# 1.93210424e-07F, 1.94677403e-07F, 1.96151428e-07F, 1.97632653e-07F, +# 1.99121231e-07F, 2.00617321e-07F, 2.02121082e-07F, 2.03632677e-07F, +# 2.05152273e-07F, 2.06680040e-07F, 2.08216149e-07F, 2.09760777e-07F, +# 2.11314104e-07F, 2.12876312e-07F, 2.14447590e-07F, 2.16028129e-07F, +# 2.17618123e-07F, 2.19217773e-07F, 2.20827283e-07F, 2.22446862e-07F, +# 2.24076723e-07F, 2.25717086e-07F, 2.27368174e-07F, 2.29030216e-07F, +# 2.30703448e-07F, 2.32388110e-07F, 2.34084450e-07F, 2.35792720e-07F, +# 2.37513182e-07F, 2.39246101e-07F, 2.40991752e-07F, 2.42750416e-07F, +# 2.44522382e-07F, 2.46307948e-07F, 2.48107418e-07F, 2.49921109e-07F, +# 2.51749342e-07F, 2.53592452e-07F, 2.55450781e-07F, 2.57324683e-07F, +# 2.59214522e-07F, 2.61120673e-07F, 2.63043524e-07F, 2.64983476e-07F, +# 2.66940939e-07F, 2.68916342e-07F, 2.70910123e-07F, 2.72922739e-07F, +# 2.74954660e-07F, 2.77006373e-07F, 2.79078382e-07F, 2.81171210e-07F, +# 2.83285396e-07F, 2.85421503e-07F, 2.87580110e-07F, 2.89761822e-07F, +# 2.91967265e-07F, 2.94197089e-07F, 2.96451969e-07F, 2.98732610e-07F, +# 3.01039742e-07F, 3.03374127e-07F, 3.05736557e-07F, 3.08127859e-07F, +# 3.10548894e-07F, 3.13000563e-07F, 3.15483804e-07F, 3.17999599e-07F, +# 3.20548974e-07F, 3.23133003e-07F, 3.25752811e-07F, 3.28409576e-07F, +# 3.31104534e-07F, 3.33838984e-07F, 3.36614287e-07F, 3.39431878e-07F, +# 3.42293264e-07F, 3.45200034e-07F, 3.48153864e-07F, 3.51156520e-07F, +# 3.54209871e-07F, 3.57315892e-07F, 3.60476673e-07F, 3.63694431e-07F, +# 3.66971518e-07F, 3.70310433e-07F, 3.73713834e-07F, 3.77184553e-07F, +# 3.80725611e-07F, 3.84340234e-07F, 3.88031877e-07F, 3.91804239e-07F, +# 3.95661291e-07F, 3.99607304e-07F, 4.03646879e-07F, 4.07784981e-07F, +# 4.12026980e-07F, 4.16378695e-07F, 4.20846449e-07F, 4.25437124e-07F, +# 4.30158235e-07F, 4.35018005e-07F, 4.40025460e-07F, 4.45190536e-07F, +# 4.50524210e-07F, 4.56038644e-07F, 4.61747369e-07F, 4.67665494e-07F, +# 4.73809965e-07F, 4.80199879e-07F, 4.86856855e-07F, 4.93805512e-07F, +# 5.01074042e-07F, 5.08694944e-07F, 5.16705952e-07F, 5.25151216e-07F, +# 5.34082859e-07F, 5.43563016e-07F, 5.53666578e-07F, 5.64484953e-07F, +# 5.76131313e-07F, 5.88748108e-07F, 6.02518140e-07F, 6.17681418e-07F, +# 6.34561837e-07F, 6.53611496e-07F, 6.75488730e-07F, 7.01206245e-07F, +# 7.32441505e-07F, 7.72282898e-07F, 8.27435688e-07F, 9.17567905e-07F, +# }; +# static const float fe_float[] = { +# 1.00000000e+00F, 9.38143681e-01F, 9.00469930e-01F, 8.71704332e-01F, +# 8.47785501e-01F, 8.26993297e-01F, 8.08421652e-01F, 7.91527637e-01F, +# 7.75956852e-01F, 7.61463389e-01F, 7.47868622e-01F, 7.35038092e-01F, +# 7.22867660e-01F, 7.11274761e-01F, 7.00192655e-01F, 6.89566496e-01F, +# 6.79350572e-01F, 6.69506317e-01F, 6.60000841e-01F, 6.50805833e-01F, +# 6.41896716e-01F, 6.33251994e-01F, 6.24852739e-01F, 6.16682181e-01F, +# 6.08725382e-01F, 6.00968966e-01F, 5.93400902e-01F, 5.86010318e-01F, +# 5.78787359e-01F, 5.71723049e-01F, 5.64809193e-01F, 5.58038282e-01F, +# 5.51403417e-01F, 5.44898238e-01F, 5.38516872e-01F, 5.32253880e-01F, +# 5.26104214e-01F, 5.20063177e-01F, 5.14126394e-01F, 5.08289776e-01F, +# 5.02549502e-01F, 4.96901987e-01F, 4.91343870e-01F, 4.85871987e-01F, +# 4.80483364e-01F, 4.75175193e-01F, 4.69944825e-01F, 4.64789756e-01F, +# 4.59707616e-01F, 4.54696157e-01F, 4.49753251e-01F, 4.44876873e-01F, +# 4.40065101e-01F, 4.35316103e-01F, 4.30628137e-01F, 4.25999541e-01F, +# 4.21428729e-01F, 4.16914186e-01F, 4.12454466e-01F, 4.08048183e-01F, +# 4.03694013e-01F, 3.99390684e-01F, 3.95136982e-01F, 3.90931737e-01F, +# 3.86773829e-01F, 3.82662181e-01F, 3.78595759e-01F, 3.74573568e-01F, +# 3.70594648e-01F, 3.66658080e-01F, 3.62762973e-01F, 3.58908473e-01F, +# 3.55093753e-01F, 3.51318016e-01F, 3.47580495e-01F, 3.43880445e-01F, +# 3.40217149e-01F, 3.36589914e-01F, 3.32998069e-01F, 3.29440964e-01F, +# 3.25917972e-01F, 3.22428485e-01F, 3.18971913e-01F, 3.15547685e-01F, +# 3.12155249e-01F, 3.08794067e-01F, 3.05463619e-01F, 3.02163401e-01F, +# 2.98892921e-01F, 2.95651704e-01F, 2.92439288e-01F, 2.89255223e-01F, +# 2.86099074e-01F, 2.82970415e-01F, 2.79868833e-01F, 2.76793928e-01F, +# 2.73745310e-01F, 2.70722597e-01F, 2.67725420e-01F, 2.64753419e-01F, +# 2.61806243e-01F, 2.58883550e-01F, 2.55985007e-01F, 2.53110290e-01F, +# 2.50259082e-01F, 2.47431076e-01F, 2.44625969e-01F, 2.41843469e-01F, +# 2.39083290e-01F, 2.36345152e-01F, 2.33628783e-01F, 2.30933917e-01F, +# 2.28260294e-01F, 2.25607660e-01F, 2.22975768e-01F, 2.20364376e-01F, +# 2.17773247e-01F, 2.15202151e-01F, 2.12650862e-01F, 2.10119159e-01F, +# 2.07606828e-01F, 2.05113656e-01F, 2.02639439e-01F, 2.00183975e-01F, +# 1.97747066e-01F, 1.95328521e-01F, 1.92928150e-01F, 1.90545770e-01F, +# 1.88181199e-01F, 1.85834263e-01F, 1.83504787e-01F, 1.81192603e-01F, +# 1.78897547e-01F, 1.76619455e-01F, 1.74358169e-01F, 1.72113535e-01F, +# 1.69885401e-01F, 1.67673619e-01F, 1.65478042e-01F, 1.63298529e-01F, +# 1.61134940e-01F, 1.58987139e-01F, 1.56854992e-01F, 1.54738369e-01F, +# 1.52637142e-01F, 1.50551185e-01F, 1.48480376e-01F, 1.46424594e-01F, +# 1.44383722e-01F, 1.42357645e-01F, 1.40346251e-01F, 1.38349429e-01F, +# 1.36367071e-01F, 1.34399072e-01F, 1.32445328e-01F, 1.30505738e-01F, +# 1.28580205e-01F, 1.26668629e-01F, 1.24770919e-01F, 1.22886980e-01F, +# 1.21016722e-01F, 1.19160057e-01F, 1.17316899e-01F, 1.15487164e-01F, +# 1.13670768e-01F, 1.11867632e-01F, 1.10077676e-01F, 1.08300825e-01F, +# 1.06537004e-01F, 1.04786139e-01F, 1.03048160e-01F, 1.01322997e-01F, +# 9.96105837e-02F, 9.79108533e-02F, 9.62237426e-02F, 9.45491894e-02F, +# 9.28871336e-02F, 9.12375166e-02F, 8.96002819e-02F, 8.79753745e-02F, +# 8.63627411e-02F, 8.47623305e-02F, 8.31740930e-02F, 8.15979807e-02F, +# 8.00339475e-02F, 7.84819492e-02F, 7.69419432e-02F, 7.54138887e-02F, +# 7.38977470e-02F, 7.23934809e-02F, 7.09010552e-02F, 6.94204365e-02F, +# 6.79515934e-02F, 6.64944964e-02F, 6.50491178e-02F, 6.36154320e-02F, +# 6.21934154e-02F, 6.07830464e-02F, 5.93843056e-02F, 5.79971756e-02F, +# 5.66216413e-02F, 5.52576897e-02F, 5.39053102e-02F, 5.25644946e-02F, +# 5.12352371e-02F, 4.99175343e-02F, 4.86113856e-02F, 4.73167929e-02F, +# 4.60337611e-02F, 4.47622977e-02F, 4.35024136e-02F, 4.22541224e-02F, +# 4.10174414e-02F, 3.97923910e-02F, 3.85789955e-02F, 3.73772828e-02F, +# 3.61872848e-02F, 3.50090377e-02F, 3.38425822e-02F, 3.26879635e-02F, +# 3.15452322e-02F, 3.04144439e-02F, 2.92956602e-02F, 2.81889488e-02F, +# 2.70943838e-02F, 2.60120466e-02F, 2.49420264e-02F, 2.38844205e-02F, +# 2.28393354e-02F, 2.18068875e-02F, 2.07872041e-02F, 1.97804243e-02F, +# 1.87867007e-02F, 1.78062004e-02F, 1.68391068e-02F, 1.58856218e-02F, +# 1.49459680e-02F, 1.40203914e-02F, 1.31091649e-02F, 1.22125924e-02F, +# 1.13310136e-02F, 1.04648102e-02F, 9.61441364e-03F, 8.78031499e-03F, +# 7.96307744e-03F, 7.16335318e-03F, 6.38190594e-03F, 5.61964221e-03F, +# 4.87765598e-03F, 4.15729512e-03F, 3.46026478e-03F, 2.78879879e-03F, +# 2.14596774e-03F, 1.53629978e-03F, 9.67269282e-04F, 4.54134354e-04F, +# }; +# +# +# static const double ziggurat_nor_r = 3.6541528853610087963519472518; +# static const double ziggurat_nor_inv_r = +# 0.27366123732975827203338247596; // 1.0 / ziggurat_nor_r; +# static const double ziggurat_exp_r = 7.6971174701310497140446280481; +# +# static const float ziggurat_nor_r_f = 3.6541528853610087963519472518f; +# static const float ziggurat_nor_inv_r_f = 0.27366123732975827203338247596f; +# static const float ziggurat_exp_r_f = 7.6971174701310497140446280481f; +# +# +# double random_standard_normal(bitgen_t *bitgen_state) { +# uint64_t r; +# int sign; +# uint64_t rabs; +# int idx; +# double x, xx, yy; +# for (;;) { +# /* r = e3n52sb8 */ +# r = next_uint64(bitgen_state); +# idx = r & 0xff; +# r >>= 8; +# sign = r & 0x1; +# rabs = (r >> 1) & 0x000fffffffffffff; +# x = rabs * wi_double[idx]; +# if (sign & 0x1) +# x = -x; +# if (rabs < ki_double[idx]) +# return x; /* 99.3% of the time return here */ +# if (idx == 0) { +# for (;;) { +# /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */ +# xx = -ziggurat_nor_inv_r * npy_log1p(-next_double(bitgen_state)); +# yy = -npy_log1p(-next_double(bitgen_state)); +# if (yy + yy > xx * xx) +# return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r + xx) +# : ziggurat_nor_r + xx; +# } +# } else { +# if (((fi_double[idx - 1] - fi_double[idx]) * next_double(bitgen_state) + +# fi_double[idx]) < exp(-0.5 * x * x)) +# return x; +# } +# } +# } + +from __future__ import annotations + +import hashlib + +# --------------------------------------------------------------------------- +# Ziggurat tables (parsed from the C source comments at the top of this file) +# --------------------------------------------------------------------------- +import math as math +import re as re +from typing import Literal + +import cffi +import numba as nb +import numpy as np + +_FFI = cffi.FFI() + +_ZIGGURAT_NOR_R = 3.6541528853610087963519472518 +_ZIGGURAT_NOR_INV_R = 0.27366123732975827203338247596 # 1.0 / ziggurat_nor_r + +_KI_DOUBLE = None +_WI_DOUBLE = None +_FI_DOUBLE = None + + +def _load_ziggurat_tables(): + """Parse the C-comment Ziggurat tables embedded at the top of this module.""" + global _KI_DOUBLE, _WI_DOUBLE, _FI_DOUBLE + with open(__file__) as fh: + text = fh.read() + + def _extract_block(name): + m = re.search( + r"#\s*static const \w+\s+" + name + r"\[\]\s*=\s*\{(.*?)\};", + text, + re.S, + ) + if m is None: + raise RuntimeError(f"Could not locate Ziggurat table {name!r}") + # strip leading '#' on each line + return re.sub(r"^#\s?", "", m.group(1), flags=re.M) + + ki_body = _extract_block("ki_double").replace("ULL", "").replace("LL", "") + ki_tokens = re.findall(r"0x[0-9A-Fa-f]+", ki_body) + _KI_DOUBLE = np.array([int(t, 16) for t in ki_tokens], dtype=np.uint64) + + float_re = r"[-+]?\d+\.\d+(?:[eE][-+]?\d+)?|[-+]?\d+(?:[eE][-+]?\d+)?" + wi_tokens = re.findall(float_re, _extract_block("wi_double")) + _WI_DOUBLE = np.array([float(t) for t in wi_tokens], dtype=np.float64) + fi_tokens = re.findall(float_re, _extract_block("fi_double")) + _FI_DOUBLE = np.array([float(t) for t in fi_tokens], dtype=np.float64) + + if not (len(_KI_DOUBLE) == len(_WI_DOUBLE) == len(_FI_DOUBLE) == 256): + raise RuntimeError( + "Unexpected Ziggurat table sizes: " + f"ki={len(_KI_DOUBLE)} wi={len(_WI_DOUBLE)} fi={len(_FI_DOUBLE)}" + ) + + +_load_ziggurat_tables() + + +@nb.njit +def _single_random_standard_normal(_next_double, _next_uint64, _state_address) -> float: + """ + Return one sample from the standard normal distribution, :math:`\\mathcal{N}(0, 1)`. + + Uses the module-level PCG64 bit-generator directly via its CFFI pointer + (``_state_address``). Advances the global RNG state in-place. + + Returns + ------- + float + A single draw from :math:`\\mathcal{N}(0, 1)`. + + Notes + ----- + Direct Python port of NumPy's ``random_standard_normal`` C implementation + (see the C source reproduced in the comment block at the top of this + module). The Ziggurat algorithm divides the probability density function + into 256 horizontal slabs and uses the precomputed ``ki_double``, + ``wi_double`` and ``fi_double`` tables (also reproduced in the top-of-file + C comments) to quickly accept the common case (~99.3%). When the fast + accept fails, the function falls back to either a tail sample (when + ``idx == 0``) or a wedge-rejection test against ``exp(-x^2/2)``. + """ + ki = _KI_DOUBLE + wi = _WI_DOUBLE + fi = _FI_DOUBLE + nor_r = _ZIGGURAT_NOR_R + nor_inv_r = _ZIGGURAT_NOR_INV_R + log1p = math.log1p + exp = math.exp + + while True: + r = _next_uint64(_state_address) + idx = r & 0xFF + r >>= 8 + sign = r & 0x1 + rabs = (r >> 1) & 0x000FFFFFFFFFFFFF # low 52 bits + x = rabs * float(wi[idx]) + if sign & 0x1: + x = -x + if rabs < int(ki[idx]): + # 99.3% of the time return here + return x + if idx == 0: + # Tail of the distribution: sample using the marsaglia tail method. + while True: + # Switch to 1.0 - U to avoid log(0.0) + xx = -nor_inv_r * log1p(-_next_double(_state_address)) + yy = -log1p(-_next_double(_state_address)) + if yy + yy > xx * xx: + if (rabs >> 8) & 0x1: + return -(nor_r + xx) + return nor_r + xx + else: + # Wedge: accept x if it lies under exp(-x^2/2) + fi_im1 = float(fi[idx - 1]) + fi_i = float(fi[idx]) + if (fi_im1 - fi_i) * _next_double(_state_address) + fi_i < exp( + -0.5 * x * x + ): + return x + + +@nb.njit +def _several_random_standard_normal( + state_array: np.ndarray, + state_bytes: np.ndarray, + size: int, + _next_double, + _next_uint64, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw ``size`` standard-normal samples for a single RNG state row. + + Loads the RNG state from *state_array* into the shared bit-generator, + fills a 1-D array of length *size* with Ziggurat normal samples, then + writes the updated state back to *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 1-D ``uint64`` array of length ≥ 4 holding the PCG64 state for one + agent/row. Mutated in-place with the post-draw state. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + Used as the staging area when injecting and extracting PCG64 state. + size : int, optional + Number of samples to draw. Defaults to 1. + + Returns + ------- + np.ndarray + 1-D ``float64`` array of shape ``(size,)`` containing draws from + :math:`\\mathcal{N}(0, 1)`. + + Notes + ----- + Direct Python port of NumPy's ``random_standard_normal`` C implementation + (see the C source reproduced in the comment block at the top of this + module). The Ziggurat algorithm divides the probability density function + into 256 horizontal slabs and uses the precomputed ``ki_double``, + ``wi_double`` and ``fi_double`` tables (also reproduced in the top-of-file + C comments) to quickly accept the common case (~99.3%). When the fast + accept fails, the function falls back to either a tail sample (when + ``idx == 0``) or a wedge-rejection test against ``exp(-x^2/2)``. + """ + # write into the BIT_GENERATOR's state from the given state_array + _state_uint64_ = state_bytes.view(np.uint64)[_slice_start:_slice_end] + _state_uint64_[:] = state_array[:4] + + try: + result = np.empty(size, dtype=np.float64) + for n in range(size): + result[n] = _single_random_standard_normal( + _next_double, _next_uint64, _state_address + ) + return result + finally: + # write back the updated state into the given state_array + state_array[:4] = _state_uint64_[:4] + + +@nb.njit +def _vector_random_standard_normal( + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _next_uint64, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-normal samples for every row of *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per agent equals ``prod(shape)``. Defaults to ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(n_agents, prod(shape))``. Callers + that want the trailing dimensions split out should call + ``.reshape(n_agents, *shape)`` on the result. + """ + flat_size = 1 + for s in shape: + flat_size *= s + + result = np.empty((state_array.shape[0], flat_size), dtype=np.float64) + for idx in range(state_array.shape[0]): + result[idx] = _several_random_standard_normal( + state_array[idx], + state_bytes, + size=flat_size, + _next_double=_next_double, + _next_uint64=_next_uint64, + _state_address=_state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + return result + + +@nb.njit +def _selected_vector_random_standard_normal( + selected_positions: np.ndarray, + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _next_uint64, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-normal samples for a subset of rows of *state_array*. + + Parameters + ---------- + selected_positions : np.ndarray + 1-D integer array whose values are row indices into *state_array*. + Only the indicated rows are advanced and sampled. + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place for the selected rows. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per selected agent equals ``prod(shape)``. Defaults to + ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(len(selected_positions), prod(shape))``. + Callers that want the trailing dimensions split out should call + ``.reshape(len(selected_positions), *shape)`` on the result. + """ + # compute the size for the unfolded (flat) trailing dimension. + flat_size = 1 + for s in shape: + flat_size *= s + + # Allocate a 2-D buffer (n_selected, flat_size) so the row assignment + # below matches the 1-D array returned by _several_random_standard_normal + # without needing a per-iteration reshape. + n = selected_positions.shape[0] + flat = np.empty((n, flat_size), dtype=np.float64) + + for idx in range(n): + flat[idx] = _several_random_standard_normal( + state_array[selected_positions[idx]], + state_bytes, + size=flat_size, + _next_double=_next_double, + _next_uint64=_next_uint64, + _state_address=_state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + + # Reshape so the trailing dimensions match the caller-provided `shape`. + # Tuple concatenation keeps the result shape a homogeneous int tuple, + # which numba requires for `reshape` in nopython mode. + return flat + + +### UNIFORM + + +@nb.njit +def _several_random_standard_uniform( + state_array: np.ndarray, + state_bytes: np.ndarray, + size: int, + _next_double, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw ``size`` standard-uniform samples for a single RNG state row. + + Loads the RNG state from *state_array* into the shared bit-generator, + fills a 1-D array of length *size* with U[0, 1) samples, then writes the + updated state back to *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 1-D ``uint64`` array of length ≥ 4 holding the PCG64 state for one + agent/row. Mutated in-place with the post-draw state. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + Used as the staging area when injecting and extracting PCG64 state. + size : int, optional + Number of samples to draw. Defaults to 1. + + Returns + ------- + np.ndarray + 1-D ``float64`` array of shape ``(size,)`` containing draws from + U[0, 1). + """ + + # write into the BIT_GENERATOR's state from the given state_array + _state_uint64_ = state_bytes.view(np.uint64)[_slice_start:_slice_end] + _state_uint64_[:] = state_array[:4] + try: + result = np.empty(size, dtype=np.float64) + for n in range(size): + result[n] = _next_double(_state_address) + return result + finally: + # write back the updated state into the given state_array + state_array[:4] = _state_uint64_[:4] + + +@nb.njit +def _vector_random_standard_uniform( + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-uniform samples for every row of *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per agent equals ``prod(shape)``. Defaults to ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(n_agents, prod(shape))``. Callers + that want trailing dimensions split out should call + ``.reshape(n_agents, *shape)`` on the result. + """ + # compute the size for the unfolded (flat) trailing dimension. + flat_size = 1 + for s in shape: + flat_size *= s + + result = np.empty((state_array.shape[0], flat_size), dtype=np.float64) + for idx in range(state_array.shape[0]): + result[idx] = _several_random_standard_uniform( + state_array[idx], + state_bytes, + flat_size, + _next_double, + _state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + return result + + +@nb.njit +def _selected_vector_random_standard_uniform( + selected_positions: np.ndarray, + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-uniform samples for a subset of rows of *state_array*. + + Parameters + ---------- + selected_positions : np.ndarray + 1-D integer array whose values are row indices into *state_array*. + Only the indicated rows are advanced and sampled. + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place for the selected rows. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per selected agent equals ``prod(shape)``. Defaults to + ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(len(selected_positions), prod(shape))``. + Callers that want the trailing dimensions split out should call + ``.reshape(len(selected_positions), *shape)`` on the result. + """ + # compute the size for the unfolded (flat) trailing dimension. + flat_size = 1 + for s in shape: + flat_size *= s + + result = np.empty((selected_positions.shape[0], flat_size), dtype=np.float64) + for idx in range(selected_positions.shape[0]): + result[idx] = _several_random_standard_uniform( + state_array[selected_positions[idx]], + state_bytes, + flat_size, + _next_double, + _state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + return result + + +def quick_entropy(*seeds): + """Generate a quick-and-dirty entropy array from the given seeds. + + This is a less robust starting seed than generated by numpy.random.SeedSequence, + but it is very fast to compute and should be sufficient for most purposes. + The resulting array is deterministic for a given set of seeds, but will change + if the seeds change in any way (e.g. different order). + """ + h = hashlib.sha256() + for s in seeds: + h.update(str(s).encode()) + h.update(b"|") + return np.frombuffer(h.digest(), dtype=np.uint64) + + +class FastGenerator: + def __init__( + self, seed: int = 42, bit_gen: Literal["PCG64", "SFC64"] = "PCG64" + ) -> None: + self._bit_gen_class = bit_gen + self._bit_generator = self._new_bit_generator(seed) + self._next_uint64 = self._bit_generator.cffi.next_uint64 + self._next_double = self._bit_generator.cffi.next_double + self._state_address = self._bit_generator.cffi.state_address + self._state_ptr = _FFI.cast("uint8_t(*)[128]", self._state_address) + self._state_bytes = np.frombuffer(_FFI.buffer(self._state_ptr), dtype=np.uint8) + + # Determine the values currently in the `state` + state_array = np.empty(shape=[4], dtype=np.uint64) + _state = self._bit_generator.state["state"] + if bit_gen == "PCG64": + state_array[0] = _state["state"] & 0xFFFFFFFFFFFFFFFF + state_array[1] = _state["state"] >> 64 + state_array[2] = _state["inc"] & 0xFFFFFFFFFFFFFFFF + state_array[3] = _state["inc"] >> 64 + elif bit_gen == "SFC64": + state_array[:] = _state["state"][:] + + # There are 4 values in state_array, which correspond to 4 values + # within _state_bytes.view(np.uint64), in a contiguous block but + # not necessarily in the same order. Find them + viewer = self._state_bytes.view(np.uint64) + positions = [] + for j in range(4): + target = state_array[j] + for k in range(16): + if viewer[k] == target: + break + if k >= 15: + raise ValueError("the state array is not found in raw memory") + positions.append(k) + + # Determine where the block of state values is, and verify that it is contiguous + self._slice_start = min(positions) + self._slice_end = max(positions) + 1 + if self._slice_start + 4 != self._slice_end: + raise ValueError("the state array is not contiguous") + + self._slice_positions = np.asarray(positions) - self._slice_start + + def _new_bit_generator(self, seed): + if self._bit_gen_class == "PCG64": + return np.random.PCG64(seed=seed) + elif self._bit_gen_class == "SFC64": + return np.random.SFC64(seed=seed) + else: + raise ValueError("bit_gen must be one of 'PCG64' or 'SFC64'") + + def get_state_array(self, seed) -> np.ndarray: + """Get the state array for a given seed, in the order expected by the numba functions.""" + bit_generator = self._new_bit_generator(seed=seed) + _state = bit_generator.state["state"] + state_array = np.empty(shape=[4], dtype=np.uint64) + if self._bit_gen_class == "PCG64": + state_array[self._slice_positions[0]] = _state["state"] & 0xFFFFFFFFFFFFFFFF + state_array[self._slice_positions[1]] = _state["state"] >> 64 + state_array[self._slice_positions[2]] = _state["inc"] & 0xFFFFFFFFFFFFFFFF + state_array[self._slice_positions[3]] = _state["inc"] >> 64 + elif self._bit_gen_class == "SFC64": + for i in range(4): + state_array[self._slice_positions[i]] = _state["state"][i] + return state_array + + def vector_random_standard_normal( + self, + state_array: np.ndarray, + selected_positions: np.ndarray | None = None, + shape: int | tuple[int, ...] = (1,), + ) -> np.ndarray: + """ + Draw standard-normal samples for agents, optionally restricted to a subset. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + selected_positions : np.ndarray or None, optional + 1-D integer array of row indices into *state_array*. When provided, + only these agents are sampled; otherwise all agents are sampled. + shape : int or tuple of int, optional + Shape of the per-agent sample block. A plain ``int`` is treated as a + 1-element tuple, e.g. ``3`` → ``(3,)``. Defaults to ``(1,)``. + + Returns + ------- + np.ndarray + ``float64`` array of shape + ``(n_selected, *shape)`` when *selected_positions* is given, or + ``(n_agents, *shape)`` otherwise. + """ + if shape is None: + shape = 1 + if isinstance(shape, (int, np.integer)): + shape = (shape,) + if selected_positions is not None: + return _selected_vector_random_standard_normal( + selected_positions, + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _next_uint64=self._next_uint64, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) + else: + return _vector_random_standard_normal( + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _next_uint64=self._next_uint64, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) + + def vector_random_standard_uniform( + self, + state_array: np.ndarray, + selected_positions: np.ndarray | None = None, + shape: int | tuple[int, ...] = 1, + ) -> np.ndarray: + """ + Draw standard-uniform samples for agents, optionally restricted to a subset. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + selected_positions : np.ndarray or None, optional + 1-D integer array of row indices into *state_array*. When provided, + only these agents are sampled; otherwise all agents are sampled. + shape : int or tuple of int, optional + Shape of the per-agent sample block. A plain ``int`` is treated as a + 1-element tuple, e.g. ``3`` → ``(3,)``. Defaults to ``1``, i.e. one + scalar draw per agent. + + Returns + ------- + np.ndarray + ``float64`` array of shape + ``(n_selected, *shape)`` when *selected_positions* is given, or + ``(n_agents, *shape)`` otherwise; values drawn from U[0, 1). + """ + if shape is None: + shape = 1 + if isinstance(shape, (int, np.integer)): + shape = (shape,) + if selected_positions is not None: + return _selected_vector_random_standard_uniform( + selected_positions, + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) + else: + return _vector_random_standard_uniform( + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) diff --git a/activitysim/core/random.py b/activitysim/core/random.py index 37b1976403..771d83f7b4 100644 --- a/activitysim/core/random.py +++ b/activitysim/core/random.py @@ -9,9 +9,10 @@ import numpy as np import pandas as pd -from activitysim.core.util import reindex from activitysim.core.exceptions import DuplicateLoadableObjectError, TableIndexError +from activitysim.core.util import reindex +from .fast_random import FastChannel from .tracing import print_elapsed_time logger = logging.getLogger(__name__) @@ -372,7 +373,7 @@ def choice_for_df(self, df, step_name, a, size, replace): class Random(object): - def __init__(self): + def __init__(self, channel_type: str = "fast"): self.channels = {} # dict mapping df index name to channel name @@ -383,6 +384,12 @@ def __init__(self): self.base_seed = 0 self.global_rng = np.random.RandomState() + if channel_type not in ("fast", "faster", "simple"): + raise ValueError( + f"channel_type must be 'fast', 'faster' or 'simple', got {channel_type!r}" + ) + self.channel_type = channel_type + def get_channel_for_df(self, df): """ Return the channel for this df. Channel should already have been loaded/added. @@ -450,7 +457,7 @@ def end_step(self, step_name): # channel management - def add_channel(self, channel_name, domain_df): + def add_channel(self, channel_name, domain_df, fast: bool | None = None): """ Create or extend a channel for generating random number streams for domain_df. @@ -467,8 +474,15 @@ def add_channel(self, channel_name, domain_df): channel_name : str expected channel name provided as a consistency check + fast : bool, optional + If ``None`` (default), the channel implementation is selected + based on ``self.channel_type``. Pass ``True`` / ``False`` to + force a specific implementation for this channel only. """ + if fast is None: + fast = self.channel_type in {"fast", "faster"} + if channel_name in self.channels: assert channel_name == self.index_to_channel[domain_df.index.name] logger.debug( @@ -484,8 +498,14 @@ def add_channel(self, channel_name, domain_df): "Adding channel '%s' %s ids" % (channel_name, len(domain_df.index)) ) - channel = SimpleChannel( - channel_name, self.base_seed, domain_df, self.step_name + channel_class = FastChannel if fast else SimpleChannel + channel_args = {} + if fast and self.channel_type == "faster": + channel_args = {"bit_generator": "SFC64", "entropy_type": "quick"} + if fast and self.channel_type == "fast": + channel_args = {"bit_generator": "PCG64", "entropy_type": "robust"} + channel = channel_class( + channel_name, self.base_seed, domain_df, self.step_name, **channel_args ) self.channels[channel_name] = channel diff --git a/activitysim/core/test/test_fast_channel.py b/activitysim/core/test/test_fast_channel.py new file mode 100644 index 0000000000..9ca3e6b3e7 --- /dev/null +++ b/activitysim/core/test/test_fast_channel.py @@ -0,0 +1,509 @@ +# ActivitySim +# See full license in LICENSE.txt. +"""Unit tests for activitysim.core.random.FastChannel.""" + +from __future__ import annotations + +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest + +from activitysim.core.fast_random import FastChannel + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_df(index_values, name="household_id"): + """Return a minimal DataFrame with a named index.""" + df = pd.DataFrame({"x": 0}, index=pd.Index(index_values, name=name)) + return df + + +def _make_channel( + index_values=None, base_seed=0, channel_name="households", step_name="" +): + if index_values is None: + index_values = [1, 2, 3, 4, 5] + domain_df = _make_df(index_values) + return FastChannel(channel_name, base_seed, domain_df, step_name=step_name) + + +# --------------------------------------------------------------------------- +# __init__ / construction +# --------------------------------------------------------------------------- + + +class TestInit: + def test_attributes_set(self): + domain_df = _make_df([10, 20, 30]) + ch = FastChannel("persons", 42, domain_df) + assert ch.channel_name == "persons" + assert ch.base_seed == 42 + assert len(ch.domain_index) == 3 + assert ch.step_name is None + assert ch._state_array is None + + def test_domain_index_is_copy(self): + domain_df = _make_df([1, 2, 3]) + ch = FastChannel("h", 0, domain_df) + # mutating original index should not affect stored domain_index + original_index = domain_df.index.copy() + assert list(ch.domain_index) == list(original_index) + + def test_step_started_when_step_name_given(self): + ch = _make_channel(step_name="my_step") + assert ch.step_name == "my_step" + assert ch._state_array is None + ch._reseed_step() # ensure state is populated + assert ch._state_array is not None + assert ch._state_array.shape == (5, 4) + + def test_no_step_when_step_name_empty(self): + ch = _make_channel(step_name="") + assert ch.step_name is None + assert ch._state_array is None + + def test_channel_seed_differs_by_name(self): + d = _make_df([1, 2, 3]) + ch_a = FastChannel("alpha", 0, d) + ch_b = FastChannel("beta", 0, d) + assert ch_a.channel_seed != ch_b.channel_seed + + +# --------------------------------------------------------------------------- +# begin_step / end_step +# --------------------------------------------------------------------------- + + +class TestBeginEndStep: + def test_begin_step_populates_state(self): + ch = _make_channel() + ch.begin_step("step1") + assert ch.step_name == "step1" + assert ch._state_array is None # no state before a draw + ch._reseed_step() # ensure state is populated + assert ch._state_array is not None + assert ch._state_array.shape == (5, 4) + assert ch._state_array.dtype == np.uint64 + + def test_begin_step_raises_if_already_active(self): + ch = _make_channel() + ch.begin_step("step1") + with pytest.raises(AssertionError): + ch.begin_step("step2") + + def test_end_step_clears_state(self): + ch = _make_channel() + ch.begin_step("step1") + ch.end_step("step1") + assert ch.step_name is None + assert ch.step_seed is None + assert ch._state_array is None + + def test_end_step_consistency_check_passes(self): + ch = _make_channel() + ch.begin_step("step1") + # should not raise + ch.end_step("step1") + + def test_end_step_consistency_check_fails(self): + ch = _make_channel() + ch.begin_step("step1") + with pytest.raises(AssertionError): + ch.end_step("wrong_step") + + def test_end_step_no_name_skips_check(self): + ch = _make_channel() + ch.begin_step("step1") + ch.end_step() # no name → no assertion + assert ch.step_name is None + + def test_begin_step_same_name_reproducible(self): + """Re-running the same step name must reproduce the same state array.""" + ch = _make_channel() + ch.begin_step("step1") + ch._reseed_step() # ensure state is populated + state1 = ch._state_array.copy() + ch.end_step() + ch.begin_step("step1") + ch._reseed_step() # ensure state is populated + state2 = ch._state_array.copy() + npt.assert_array_equal(state1, state2) + + def test_begin_step_different_names_give_different_states(self): + ch = _make_channel() + ch.begin_step("step_a") + ch._reseed_step() # ensure state is populated + state_a = ch._state_array.copy() + ch.end_step() + ch.begin_step("step_b") + ch._reseed_step() # ensure state is populated + state_b = ch._state_array.copy() + assert not np.array_equal(state_a, state_b) + + +# --------------------------------------------------------------------------- +# _check_valid_df +# --------------------------------------------------------------------------- + + +class TestCheckValidDf: + def test_returns_positions_for_full_domain(self): + ch = _make_channel(index_values=[10, 20, 30]) + ch.begin_step("s") + df = _make_df([10, 20, 30]) + pos = ch._check_valid_df(df) + npt.assert_array_equal(pos, [0, 1, 2]) + + def test_returns_positions_for_subset(self): + ch = _make_channel(index_values=[10, 20, 30]) + ch.begin_step("s") + df = _make_df([30, 10]) + pos = ch._check_valid_df(df) + npt.assert_array_equal(pos, [2, 0]) + + def test_raises_on_duplicate_index(self): + ch = _make_channel(index_values=[1, 2, 3]) + ch.begin_step("s") + df = _make_df([1, 1, 2]) + with pytest.raises(ValueError, match="unique index"): + ch._check_valid_df(df) + + def test_raises_on_index_not_in_domain(self): + ch = _make_channel(index_values=[1, 2, 3]) + ch.begin_step("s") + df = _make_df([1, 99]) + with pytest.raises(ValueError, match="not found in the domain"): + ch._check_valid_df(df) + + def test_raises_outside_step(self): + ch = _make_channel(index_values=[1, 2, 3]) + # no begin_step called + df = _make_df([1, 2]) + with pytest.raises(ValueError, match="outside of a defined step"): + ch._check_valid_df(df) + + +# --------------------------------------------------------------------------- +# random_for_df +# --------------------------------------------------------------------------- + + +class TestRandomForDf: + def test_output_shape_single_draw(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + rands = ch.random_for_df(df, "s", n=1) + assert rands.shape == (5, 1) + + def test_output_shape_multiple_draws(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + rands = ch.random_for_df(df, "s", n=3) + assert rands.shape == (5, 3) + + def test_values_in_unit_interval(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + rands = ch.random_for_df(df, "s", n=10) + assert np.all(rands >= 0.0) + assert np.all(rands < 1.0) + + def test_successive_calls_advance_stream(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + r1 = ch.random_for_df(df, "s") + r2 = ch.random_for_df(df, "s") + assert not np.array_equal(r1, r2) + + def test_reproducible_across_steps(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("s") + r1 = ch.random_for_df(df, "s").copy() + ch.end_step() + ch.begin_step("s") + r2 = ch.random_for_df(df, "s").copy() + npt.assert_array_equal(r1, r2) + + def test_different_steps_give_different_streams(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("step_a") + r_a = ch.random_for_df(df, "step_a").copy() + ch.end_step() + ch.begin_step("step_b") + r_b = ch.random_for_df(df, "step_b").copy() + assert not np.array_equal(r_a, r_b) + + def test_different_base_seeds_give_different_streams(self): + df = _make_df([1, 2, 3]) + ch0 = FastChannel("h", 0, df) + ch1 = FastChannel("h", 1, df) + ch0.begin_step("s") + ch1.begin_step("s") + r0 = ch0.random_for_df(df, "s") + r1 = ch1.random_for_df(df, "s") + assert not np.array_equal(r0, r1) + + def test_different_channel_names_give_different_streams(self): + df = _make_df([1, 2, 3]) + ch_a = FastChannel("alpha", 0, df) + ch_b = FastChannel("beta", 0, df) + ch_a.begin_step("s") + ch_b.begin_step("s") + r_a = ch_a.random_for_df(df, "s") + r_b = ch_b.random_for_df(df, "s") + assert not np.array_equal(r_a, r_b) + + def test_subset_matches_full_domain_rows(self): + """Draws for a subset of rows should equal draws for those rows from the full domain.""" + ch_full = _make_channel(index_values=[1, 2, 3, 4, 5]) + ch_sub = _make_channel(index_values=[2, 4]) + + ch_full.begin_step("s") + ch_sub.begin_step("s") + + df_full = _make_df([1, 2, 3, 4, 5]) + df_sub = _make_df([2, 4]) + + r_full = ch_full.random_for_df(df_full, "s") + r_sub = ch_sub.random_for_df(df_sub, "s") + + # Rows for index 2 and 4 should match regardless of which other rows exist + npt.assert_array_equal(r_full[[1, 3]], r_sub) + + def test_wrong_step_name_raises(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2]) + with pytest.raises(AssertionError): + ch.random_for_df(df, "wrong_step") + + +# --------------------------------------------------------------------------- +# normal_for_df +# --------------------------------------------------------------------------- + + +class TestNormalForDf: + def test_output_shape(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + result = ch.normal_for_df(df, "s") + assert result.shape == (3, 1) + + def test_output_shape_with_size(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + result = ch.normal_for_df(df, "s", size=4) + assert result.shape == (3, 4) + + def test_mu_sigma_applied(self): + """With large sigma, values should spread away from mu.""" + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + result = ch.normal_for_df(df, "s", mu=100.0, sigma=0.0001) + # all values should be very close to mu + npt.assert_allclose(result.flatten(), 100.0, atol=0.01) + + def test_lognormal_flag_positive(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + result = ch.normal_for_df(df, "s", lognormal=True) + assert np.all(result > 0), "lognormal values must be positive" + + def test_successive_calls_advance_stream(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + r1 = ch.normal_for_df(df, "s").copy() + r2 = ch.normal_for_df(df, "s").copy() + assert not np.array_equal(r1, r2) + + def test_reproducible_across_steps(self): + ch = _make_channel() + df = _make_df([1, 2, 3]) + ch.begin_step("s") + r1 = ch.normal_for_df(df, "s").copy() + ch.end_step() + ch.begin_step("s") + r2 = ch.normal_for_df(df, "s").copy() + npt.assert_array_equal(r1, r2) + + def test_scalar_mu_sigma_broadcast(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + # should not raise; shape should still be (5, 1) + result = ch.normal_for_df(df, "s", mu=5.0, sigma=2.0) + assert result.shape == (5, 1) + + def test_lognormal_vs_normal_exp_relationship(self): + """exp(normal_for_df) should equal lognormal_for_df for same step.""" + domain_df = _make_df([1, 2, 3, 4, 5]) + ch_n = FastChannel("h", 7, domain_df) + ch_l = FastChannel("h", 7, domain_df) + ch_n.begin_step("s") + ch_l.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + r_normal = ch_n.normal_for_df(df, "s", mu=0, sigma=1, lognormal=False) + r_lognormal = ch_l.normal_for_df(df, "s", mu=0, sigma=1, lognormal=True) + npt.assert_allclose(np.exp(r_normal), r_lognormal) + + +# --------------------------------------------------------------------------- +# choice_for_df +# --------------------------------------------------------------------------- + + +class TestChoiceForDf: + def test_output_length_with_replace(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + choices = ch.choice_for_df(df, "s", a=10, size=4, replace=True) + assert choices.shape == (12,) # 3 rows × 4 + + def test_output_length_without_replace(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + choices = ch.choice_for_df(df, "s", a=np.arange(10), size=3, replace=False) + assert choices.shape == (9,) # 3 rows × 3 + + def test_int_population_values_in_range(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + choices = ch.choice_for_df(df, "s", a=7, size=10, replace=True) + assert np.all(choices >= 0) + assert np.all(choices < 7) + + def test_array_population_values_from_array(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + a = np.array([10, 20, 30, 40]) + choices = ch.choice_for_df(df, "s", a=a, size=5, replace=True) + assert set(choices).issubset({10, 20, 30, 40}) + + def test_without_replace_no_duplicates_per_row(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + n_pop = 8 + size = n_pop # draw all elements → no duplicates allowed + choices = ch.choice_for_df(df, "s", a=n_pop, size=size, replace=False) + for row_choices in choices.reshape(len(df), size): + assert len(set(row_choices)) == size + + def test_without_replace_raises_when_size_exceeds_population(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2]) + with pytest.raises(ValueError, match="replace=False"): + ch.choice_for_df(df, "s", a=3, size=5, replace=False) + + def test_successive_calls_advance_stream(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + c1 = ch.choice_for_df(df, "s", a=100, size=5, replace=True).copy() + c2 = ch.choice_for_df(df, "s", a=100, size=5, replace=True).copy() + assert not np.array_equal(c1, c2) + + def test_reproducible_across_steps(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("s") + c1 = ch.choice_for_df(df, "s", a=20, size=4, replace=True).copy() + ch.end_step() + ch.begin_step("s") + c2 = ch.choice_for_df(df, "s", a=20, size=4, replace=True).copy() + npt.assert_array_equal(c1, c2) + + def test_different_steps_give_different_choices(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("step_a") + c_a = ch.choice_for_df(df, "step_a", a=50, size=10, replace=True).copy() + ch.end_step() + ch.begin_step("step_b") + c_b = ch.choice_for_df(df, "step_b", a=50, size=10, replace=True).copy() + assert not np.array_equal(c_a, c_b) + + def test_output_dtype_int(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + choices = ch.choice_for_df(df, "s", a=10, size=3, replace=True) + assert np.issubdtype(choices.dtype, np.integer) + + def test_array_population_without_replace_values_in_array(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + a = np.array([100, 200, 300, 400, 500]) + choices = ch.choice_for_df(df, "s", a=a, size=3, replace=False) + assert set(choices).issubset(set(a)) + + def test_wrong_step_name_raises(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2]) + with pytest.raises(AssertionError): + ch.choice_for_df(df, "wrong", a=5, size=2, replace=True) + + +# --------------------------------------------------------------------------- +# Cross-method stream independence +# --------------------------------------------------------------------------- + + +class TestStreamIndependence: + def test_random_and_normal_advance_independently(self): + """random_for_df and normal_for_df on the same channel should each + advance the underlying streams, so their second calls differ from first.""" + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + r1 = ch.random_for_df(df, "s").copy() + n1 = ch.normal_for_df(df, "s").copy() + r2 = ch.random_for_df(df, "s").copy() + n2 = ch.normal_for_df(df, "s").copy() + assert not np.array_equal(r1, r2) + assert not np.array_equal(n1, n2) + + def test_two_channels_same_seed_independent(self): + """Two FastChannels with the same base_seed but different names should + produce independent (different) streams for the same index values.""" + df = _make_df([1, 2, 3, 4, 5]) + ch_a = FastChannel("channel_a", 0, df) + ch_b = FastChannel("channel_b", 0, df) + ch_a.begin_step("s") + ch_b.begin_step("s") + r_a = ch_a.random_for_df(df, "s") + r_b = ch_b.random_for_df(df, "s") + assert not np.array_equal(r_a, r_b) + + def test_row_streams_are_independent(self): + """Each row should have its own stream; draws for one row should not + correlate trivially with another row.""" + ch = _make_channel(index_values=list(range(100))) + ch.begin_step("s") + df = _make_df(list(range(100))) + rands = ch.random_for_df(df, "s", n=10) + # Check that not all rows are identical + assert not np.all(rands == rands[0]) diff --git a/activitysim/core/test/test_fast_random.py b/activitysim/core/test/test_fast_random.py new file mode 100644 index 0000000000..63cfa3dfec --- /dev/null +++ b/activitysim/core/test/test_fast_random.py @@ -0,0 +1,306 @@ +"""Tests for vector_random_standard_normal and vector_random_standard_uniform.""" + +from __future__ import annotations + +import numpy as np + +from activitysim.core.fast_random._fast_random import FastGenerator + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_fast_generator(): + return FastGenerator() + + +def make_state_array( + fg: FastGenerator, n_agents: int, base_seed: int = 0 +) -> np.ndarray: + """Return a (n_agents, 4) uint64 state array seeded from PCG64 generators. + + Each row is a valid PCG64 state obtained by seeding a fresh generator with + ``base_seed + i`` and reading the current raw CFFI state buffer. This + mirrors exactly the layout consumed by the private ``_several_random_*`` + functions. + """ + uint64_view = fg._state_bytes.view(np.uint64) + state = np.zeros((n_agents, 4), dtype=np.uint64) + for i in range(n_agents): + fg._bit_generator.state = np.random.PCG64(seed=base_seed + i).state + state[i] = uint64_view[fg._slice_start : fg._slice_end].copy() + return state + + +# --------------------------------------------------------------------------- +# vector_random_standard_uniform +# --------------------------------------------------------------------------- + + +class TestVectorRandomStandardUniform: + """Tests for vector_random_standard_uniform.""" + + # --- output dtype and basic shape --- + + def test_returns_float64(self): + fg = make_fast_generator() + state = make_state_array(fg, 5) + result = fg.vector_random_standard_uniform(state) + assert result.dtype == np.float64 + + def test_default_shape_all_agents(self): + n = 7 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state) + assert result.shape == (n, 1) + + def test_int_shape(self): + n = 6 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state, shape=4) + assert result.shape == (n, 4) + + def test_tuple_shape_1d(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state, shape=(3,)) + assert result.shape == (n, 3) + + def test_tuple_shape_2d(self): + n = 4 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state, shape=(2, 3)) + assert result.shape == (n, 2, 3) + + # --- values in [0, 1) --- + + def test_values_in_unit_interval(self): + fg = make_fast_generator() + state = make_state_array(fg, 50) + result = fg.vector_random_standard_uniform(state, shape=100) + assert np.all(result >= 0.0) + assert np.all(result < 1.0) + + # --- selected_positions --- + + def test_selected_positions_shape(self): + n = 10 + fg = make_fast_generator() + state = make_state_array(fg, n) + sel = np.array([0, 2, 5], dtype=np.intp) + result = fg.vector_random_standard_uniform( + state, selected_positions=sel, shape=4 + ) + assert result.shape == (3, 4) + + def test_selected_positions_values_in_unit_interval(self): + n = 10 + fg = make_fast_generator() + state = make_state_array(fg, n) + sel = np.array([1, 4, 7], dtype=np.intp) + result = fg.vector_random_standard_uniform( + state, selected_positions=sel, shape=20 + ) + assert np.all(result >= 0.0) + assert np.all(result < 1.0) + + def test_selected_positions_only_selected_rows_mutated(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + state_copy = state.copy() + sel = np.array([1, 3], dtype=np.intp) + fg.vector_random_standard_uniform(state, selected_positions=sel, shape=1) + # Rows NOT in sel must be unchanged. + for i in range(n): + if i not in sel: + np.testing.assert_array_equal( + state[i], + state_copy[i], + err_msg=f"Row {i} should not have been mutated", + ) + # Rows IN sel must have changed. + for i in sel: + assert not np.array_equal( + state[i], state_copy[i] + ), f"Row {i} should have been mutated" + + # --- state mutation --- + + def test_state_mutated_in_place(self): + fg = make_fast_generator() + state = make_state_array(fg, 4) + state_before = state.copy() + fg.vector_random_standard_uniform(state, shape=3) + assert not np.array_equal(state, state_before) + + # --- reproducibility --- + + def test_reproducibility(self): + """Identical initial state must produce identical output.""" + fg = make_fast_generator() + state_a = make_state_array(fg, 8, base_seed=99) + state_b = state_a.copy() + out_a = fg.vector_random_standard_uniform(state_a, shape=10) + out_b = fg.vector_random_standard_uniform(state_b, shape=10) + np.testing.assert_array_equal(out_a, out_b) + + def test_different_seeds_differ(self): + fg = make_fast_generator() + state_a = make_state_array(fg, 5, base_seed=0) + state_b = make_state_array(fg, 5, base_seed=1000) + out_a = fg.vector_random_standard_uniform(state_a, shape=20) + out_b = fg.vector_random_standard_uniform(state_b, shape=20) + assert not np.array_equal(out_a, out_b) + + # --- statistical sanity (large sample) --- + + def test_mean_close_to_half(self): + fg = make_fast_generator() + state = make_state_array(fg, 200) + result = fg.vector_random_standard_uniform(state, shape=500) + assert abs(result.mean() - 0.5) < 0.01 + + def test_no_exact_ones(self): + """U[0, 1) must never produce exactly 1.0.""" + fg = make_fast_generator() + state = make_state_array(fg, 100) + result = fg.vector_random_standard_uniform(state, shape=1000) + assert not np.any(result == 1.0) + + +# --------------------------------------------------------------------------- +# vector_random_standard_normal +# --------------------------------------------------------------------------- + + +class TestVectorRandomStandardNormal: + """Tests for vector_random_standard_normal.""" + + # --- output dtype and basic shape --- + + def test_returns_float64(self): + fg = make_fast_generator() + state = make_state_array(fg, 5) + result = fg.vector_random_standard_normal(state) + assert result.dtype == np.float64 + + def test_default_shape_all_agents(self): + n = 7 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state) + assert result.shape == (n, 1) + + def test_int_shape(self): + n = 6 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state, shape=4) + assert result.shape == (n, 4) + + def test_tuple_shape_1d(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state, shape=(3,)) + assert result.shape == (n, 3) + + def test_tuple_shape_2d(self): + n = 4 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state, shape=(2, 3)) + assert result.shape == (n, 2, 3) + + # --- selected_positions --- + + def test_selected_positions_shape(self): + n = 10 + fg = make_fast_generator() + state = make_state_array(fg, n) + sel = np.array([0, 2, 5], dtype=np.intp) + result = fg.vector_random_standard_normal( + state, selected_positions=sel, shape=4 + ) + assert result.shape == (3, 4) + + def test_selected_positions_only_selected_rows_mutated(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + state_copy = state.copy() + sel = np.array([0, 4], dtype=np.intp) + fg.vector_random_standard_normal(state, selected_positions=sel, shape=1) + for i in range(n): + if i not in sel: + np.testing.assert_array_equal( + state[i], + state_copy[i], + err_msg=f"Row {i} should not have been mutated", + ) + for i in sel: + assert not np.array_equal( + state[i], state_copy[i] + ), f"Row {i} should have been mutated" + + # --- state mutation --- + + def test_state_mutated_in_place(self): + fg = make_fast_generator() + state = make_state_array(fg, 4) + state_before = state.copy() + fg.vector_random_standard_normal(state, shape=3) + assert not np.array_equal(state, state_before) + + # --- reproducibility --- + + def test_reproducibility(self): + """Identical initial state must produce identical output.""" + fg = make_fast_generator() + state_a = make_state_array(fg, 8, base_seed=77) + state_b = state_a.copy() + out_a = fg.vector_random_standard_normal(state_a, shape=10) + out_b = fg.vector_random_standard_normal(state_b, shape=10) + np.testing.assert_array_equal(out_a, out_b) + + def test_different_seeds_differ(self): + fg = make_fast_generator() + state_a = make_state_array(fg, 5, base_seed=0) + state_b = make_state_array(fg, 5, base_seed=1000) + out_a = fg.vector_random_standard_normal(state_a, shape=20) + out_b = fg.vector_random_standard_normal(state_b, shape=20) + assert not np.array_equal(out_a, out_b) + + # --- statistical sanity (large sample) --- + + def test_mean_close_to_zero(self): + fg = make_fast_generator() + state = make_state_array(fg, 200) + result = fg.vector_random_standard_normal(state, shape=500) + assert abs(result.mean()) < 0.05 + + def test_std_close_to_one(self): + fg = make_fast_generator() + state = make_state_array(fg, 200) + result = fg.vector_random_standard_normal(state, shape=500) + assert abs(result.std() - 1.0) < 0.05 + + def test_values_are_finite(self): + fg = make_fast_generator() + state = make_state_array(fg, 50) + result = fg.vector_random_standard_normal(state, shape=200) + assert np.all(np.isfinite(result)) + + def test_distribution_symmetry(self): + """Mean of absolute values should be close to sqrt(2/pi) ≈ 0.7979.""" + fg = make_fast_generator() + expected_mean_abs = np.sqrt(2.0 / np.pi) + state = make_state_array(fg, 200) + result = fg.vector_random_standard_normal(state, shape=500) + assert abs(np.abs(result).mean() - expected_mean_abs) < 0.05 diff --git a/activitysim/core/test/test_random.py b/activitysim/core/test/test_random.py index bcbc602685..b442087b58 100644 --- a/activitysim/core/test/test_random.py +++ b/activitysim/core/test/test_random.py @@ -2,6 +2,8 @@ # See full license in LICENSE.txt. from __future__ import annotations +from typing import Literal + import numpy as np import numpy.testing as npt import pandas as pd @@ -33,12 +35,13 @@ def test_basic(): assert "call set_base_seed before the first step" in str(excinfo.value) -def test_channel(): +@pytest.mark.parametrize("channel_type", ["simple", "fast", "faster"]) +def test_channel(channel_type: Literal["simple", "fast", "faster"]): channels = { "households": "household_id", "persons": "person_id", } - rng = random.Random() + rng = random.Random(channel_type=channel_type) persons = pd.DataFrame( { @@ -66,12 +69,22 @@ def test_channel(): print("rands", np.asanyarray(rands).flatten()) assert rands.shape == (5, 1) - test1_expected_rands = [0.1733218, 0.1255693, 0.7384256, 0.3485183, 0.9012387] + if channel_type == "fast": + test1_expected_rands = [0.4072658, 0.5591271, 0.0297283, 0.6235138, 0.6921163] + elif channel_type == "faster": + test1_expected_rands = [0.4580108, 0.531716, 0.6470319, 0.6762532, 0.7392374] + else: + test1_expected_rands = [0.1733218, 0.1255693, 0.7384256, 0.3485183, 0.9012387] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands) # second call should return something different rands = rng.random_for_df(persons) - test1_expected_rands2 = [0.9105223, 0.5718418, 0.7222742, 0.9062284, 0.3929369] + if channel_type == "fast": + test1_expected_rands2 = [0.336963, 0.5420581, 0.4396565, 0.9702927, 0.0251327] + elif channel_type == "faster": + test1_expected_rands2 = [0.1690983, 0.933964, 0.3887059, 0.7922818, 0.4179632] + else: + test1_expected_rands2 = [0.9105223, 0.5718418, 0.7222742, 0.9062284, 0.3929369] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands2) rng.end_step("test_step") @@ -79,16 +92,31 @@ def test_channel(): rng.begin_step("test_step2") rands = rng.random_for_df(households) - expected_rands = [0.417278, 0.2994774, 0.8653719, 0.4429748, 0.5101697] + if channel_type == "fast": + expected_rands = [0.1571023, 0.2709219, 0.2515827, 0.9444831, 0.6816792] + elif channel_type == "faster": + expected_rands = [0.1934219, 0.3369451, 0.8455883, 0.6440651, 0.3889942] + else: + expected_rands = [0.417278, 0.2994774, 0.8653719, 0.4429748, 0.5101697] npt.assert_almost_equal(np.asanyarray(rands).flatten(), expected_rands) choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) - expected_choices = [2, 1, 3, 3, 4, 2, 4, 1, 4, 1] + if channel_type == "fast": + expected_choices = [4, 1, 4, 3, 2, 1, 3, 1, 1, 4] + elif channel_type == "faster": + expected_choices = [3, 4, 4, 3, 4, 2, 4, 1, 2, 3] + else: + expected_choices = [2, 1, 3, 3, 4, 2, 4, 1, 4, 1] npt.assert_almost_equal(choices, expected_choices) # should be DIFFERENT the second time choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) - expected_choices = [3, 1, 4, 3, 3, 2, 2, 1, 4, 2] + if channel_type == "fast": + expected_choices = [1, 4, 2, 1, 2, 3, 1, 2, 2, 4] + elif channel_type == "faster": + expected_choices = [4, 1, 3, 3, 4, 1, 4, 2, 3, 2] + else: + expected_choices = [3, 1, 4, 3, 3, 2, 2, 1, 4, 2] npt.assert_almost_equal(choices, expected_choices) rng.end_step("test_step2") @@ -97,18 +125,45 @@ def test_channel(): rands = rng.random_for_df(households, n=2) - expected_rands = [ - 0.3157928, - 0.3321823, - 0.5194067, - 0.9340083, - 0.9002048, - 0.8754209, - 0.3898816, - 0.4101094, - 0.7351484, - 0.1741092, - ] + if channel_type == "fast": + expected_rands = [ + 0.0728735, + 0.9764697, + 0.6611142, + 0.8802973, + 0.0122184, + 0.8770089, + 0.9944639, + 0.2064867, + 0.6051138, + 0.1666114, + ] + elif channel_type == "faster": + expected_rands = [ + 0.2677105, + 0.7688408, + 0.9949042, + 0.909176, + 0.9348486, + 0.069542, + 0.7039883, + 0.89629, + 0.7469927, + 0.3387263, + ] + else: + expected_rands = [ + 0.3157928, + 0.3321823, + 0.5194067, + 0.9340083, + 0.9002048, + 0.8754209, + 0.3898816, + 0.4101094, + 0.7351484, + 0.1741092, + ] npt.assert_almost_equal(np.asanyarray(rands).flatten(), expected_rands) diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index de22b0687d..b13e021c21 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -20,7 +20,7 @@ import activitysim.core.random from activitysim.core.configuration import FileSystem, NetworkSettings, Settings -from activitysim.core.exceptions import StateAccessError, CheckpointNameNotFoundError +from activitysim.core.exceptions import CheckpointNameNotFoundError, StateAccessError from activitysim.core.workflow.checkpoint import LAST_CHECKPOINT, Checkpoints from activitysim.core.workflow.chunking import Chunking from activitysim.core.workflow.dataset import Datasets @@ -181,14 +181,17 @@ def init_state(self) -> None: def _initialize_prng(self, base_seed=None): from activitysim.core.random import Random - self._context["prng"] = Random() - if base_seed is None: - try: - self.settings - except StateAccessError: + try: + self.settings + except StateAccessError: + channel_type = "fast" + if base_seed is None: base_seed = 0 - else: + else: + channel_type = getattr(self.settings, "rng_channel_type", "fast") + if base_seed is None: base_seed = self.settings.rng_base_seed + self._context["prng"] = Random(channel_type=channel_type) self._context["prng"].set_base_seed(base_seed) def import_extensions(self, ext: str | Iterable[str] = None, append=True) -> None: @@ -858,7 +861,19 @@ def extract(self, func): def rng(self) -> activitysim.core.random.Random: if "prng" not in self._context: self._initialize_prng() - return self._context["prng"] + prng = self._context["prng"] + # Allow channel_type to be picked up from settings even when prng was + # initialised before settings were loaded. Safe to update as long as + # no channels have been registered yet. + try: + desired_channel_type = getattr( + self.settings, "rng_channel_type", prng.channel_type + ) + except StateAccessError: + desired_channel_type = prng.channel_type + if desired_channel_type != prng.channel_type and not prng.channels: + prng.channel_type = desired_channel_type + return prng def pipeline_table_key(self, table_name, checkpoint_name): if checkpoint_name: diff --git a/other_resources/performance-checks/fast-channel-random.py b/other_resources/performance-checks/fast-channel-random.py new file mode 100644 index 0000000000..a7fbb9ac58 --- /dev/null +++ b/other_resources/performance-checks/fast-channel-random.py @@ -0,0 +1,383 @@ +""" +Random Number Generation Performance Benchmark +=============================================== +Compares the "fast" channel (new implementation) vs. the "slow" channel +(legacy implementation) for various random generation functions in +activitysim.core.random. + +Run this script from the activitysim repo root with: + python other_resources/scripts/random-performance.py +""" + +from __future__ import annotations + +import sys +import textwrap +import timeit + +import numpy as np +import pandas as pd + +from activitysim.core.random import Random + +# Windows consoles default to a locale-specific encoding (e.g. cp1252) that +# cannot represent the box-drawing characters used below. Reconfiguring stdout +# and stderr to UTF-8 here means the script works correctly regardless of the +# active code page, without requiring the caller to set PYTHONUTF8=1. +if hasattr(sys.stdout, "reconfigure"): + sys.stdout.reconfigure(encoding="utf-8") +if hasattr(sys.stderr, "reconfigure"): + sys.stderr.reconfigure(encoding="utf-8") + + +# ── helpers ──────────────────────────────────────────────────────────────────── + +SEP_WIDE = "=" * 72 +SEP_THIN = "-" * 72 +INDENT = " " + + +def section(title): + print() + print(SEP_WIDE) + print(f" {title}") + print(SEP_WIDE) + + +def subsection(title): + print() + print(f"{INDENT}{SEP_THIN[2:]}") + print(f"{INDENT}{title}") + print(f"{INDENT}{SEP_THIN[2:]}") + + +def note(text, indent=2): + prefix = " " * indent + "│ " + wrapped = textwrap.fill( + text, width=68, initial_indent=prefix, subsequent_indent=prefix + ) + print(wrapped) + + +def result(label, elapsed_ms, n=None): + if n is not None: + avg_ms = elapsed_ms / n + print( + f"{INDENT} {label:<40s} {avg_ms:8.3f} ms/call " + f"(total {elapsed_ms:.1f} ms × {n})" + ) + else: + print(f"{INDENT} {label:<40s} {elapsed_ms:8.3f} ms") + + +def speedup(fast_ms, slow_ms, fast_label="fast", slow_label="slow"): + if fast_ms > 0: + ratio = slow_ms / fast_ms + print(f"{INDENT} --> {slow_label} is {ratio:.1f}x slower than {fast_label}") + + +def bench(fn, number=10, repeat=3): + """Return best average time in milliseconds.""" + times = timeit.repeat(fn, number=number, repeat=repeat) + best_total = min(times) # best total over `number` calls + return best_total / number * 1_000 # convert to ms per call + + +# ── setup ────────────────────────────────────────────────────────────────────── + +section("Setup") + +note( + "Building a household index of 250 000 unique random integers drawn " + "from [0, 1 000 000 000). This mimics a realistic, sparse household " + "ID space as seen in large-scale travel-demand models." +) + +prng = np.random.default_rng(seed=12345) +idxs = pd.Index(np.sort(prng.integers(1_000_000_000, size=250_000))).unique() +print(f"\n{INDENT} Household IDs generated: {idxs.size:,}") + +hh = pd.DataFrame(index=idxs, columns=[]) +hh.index.name = "household_id" +hh["dummy"] = 1 + +# fast channel dataframe +shh = hh.copy() +shh.index.name = "slow_hhid" + +# small slices for later +hh3 = hh.iloc[2:5] +shh3 = shh.iloc[2:5] + +note( + "Creating the Random object and registering one FAST channel " + "(new implementation) and one SLOW channel (legacy implementation).", + indent=2, +) + +r = Random(channel_type="faster") +r.set_base_seed(42) +r.add_channel("households", hh, fast=True) +r.add_channel("slow_households", shh, fast=False) + +print(f"\n{INDENT} Random object created.") +print(f"{INDENT} Base seed : 42") +print(f"{INDENT} Fast channel index name : '{hh.index.name}'") +print(f"{INDENT} Slow channel index name : '{shh.index.name}'") + +# ── benchmark: first-call costs ──────────────────────────────────────────────── + +section("First-call costs (Numba JIT compilation + reseeding)") + +note( + "The very first call to random_for_df on a fast channel within an " + "ActivitySim session triggers Numba ahead-of-time compilation. " + "This one-time cost disappears for all subsequent calls in the same " + "session. The slow channel does NOT use Numba." +) + +r.begin_step("peekaboo") + +t0 = timeit.default_timer() +r.random_for_df(hh) +first_fast_ms = (timeit.default_timer() - t0) * 1_000 + +t0 = timeit.default_timer() +r.random_for_df(shh) +first_slow_ms = (timeit.default_timer() - t0) * 1_000 + +print() +result("Fast channel – first call (w/ JIT + reseed)", first_fast_ms) +result("Slow channel – first call (w/ reseed) ", first_slow_ms) +note( + "The fast channel first-call cost includes Numba compilation. " + "This cost is paid only once per process, not once per step." +) + +# ── benchmark: random_for_df (subsequent calls) ──────────────────────────────── + +section("random_for_df — subsequent calls within the same step") + +note( + "After the first call, the fast channel has already compiled its Numba " + "kernels and cached the reseeded state. Subsequent calls within the " + "same step are pure number generation — very fast. " + "The slow channel must reseed from scratch on every single call, " + "making it consistently slow regardless of call order." +) + +fast_rand_ms = bench(lambda: r.random_for_df(hh), number=10, repeat=3) +slow_rand_ms = bench(lambda: r.random_for_df(shh), number=3, repeat=5) + +print() +result("Fast channel random_for_df (n=250k)", fast_rand_ms) +result("Slow channel random_for_df (n=250k)", slow_rand_ms) +speedup(fast_rand_ms, slow_rand_ms) + +# ── benchmark: normal_for_df ─────────────────────────────────────────────────── + +section("normal_for_df — uniform-to-normal transform (μ=3.0, σ=1.5)") + +note( + "Generating normally-distributed draws adds a Box–Muller / inverse-CDF " + "transform on top of the uniform draws. The fast channel benefits from " + "the same Numba-compiled, pre-seeded state as before. " + "The slow channel pays the full reseeding cost every call." +) + +# Trigger any remaining first-call compilation for normal_for_df +r.normal_for_df(hh, mu=3.0, sigma=1.5) + +fast_norm_ms = bench( + lambda: r.normal_for_df(hh, mu=3.0, sigma=1.5), number=10, repeat=3 +) +slow_norm_ms = bench( + lambda: r.normal_for_df(shh, mu=3.0, sigma=1.5), number=3, repeat=5 +) + +print() +result("Fast channel normal_for_df (n=250k)", fast_norm_ms) +result("Slow channel normal_for_df (n=250k)", slow_norm_ms) +speedup(fast_norm_ms, slow_norm_ms) + +# ── benchmark: choice_for_df ─────────────────────────────────────────────────── + +section("choice_for_df — sampling without replacement (size=5 from [1..7])") + +note( + "choice_for_df is built atop random_for_df, so by this point the fast " + "channel's seed state is already warm. Drawing 5 choices per row " + "without replacement means each row independently samples from [1,2,3,4,5,6,7]. " + "The slow channel is again penalised by per-call reseeding." +) + +fast_choice_ms = bench( + lambda: r.choice_for_df(hh, a=[1, 2, 3, 4, 5, 6, 7], size=5, replace=False), + number=5, + repeat=3, +) +slow_choice_ms = bench( + lambda: r.choice_for_df(shh, a=[1, 2, 3, 4, 5, 6, 7], size=5, replace=False), + number=3, + repeat=3, +) + +print() +result("Fast channel choice_for_df (n=250k, size=5)", fast_choice_ms) +result("Slow channel choice_for_df (n=250k, size=5)", slow_choice_ms) +speedup(fast_choice_ms, slow_choice_ms) + +r.end_step("peekaboo") + +# ── benchmark: cross-step reproducibility & reseed cost ─────────────────────── + +section("Cross-step reproducibility: reseed cost at start of a new step") + +note( + "When a new step begins (or the same step name is restarted), the random " + "state must be reseeded. For the fast channel this happens exactly once " + "— on the first draw after begin_step(). For the slow channel the cost " + "is paid on every single draw, every step." +) + +r.begin_step("peekaboo") + +t0 = timeit.default_timer() +r.normal_for_df(hh, mu=3.0, sigma=1.5) +fast_reseed_ms = (timeit.default_timer() - t0) * 1_000 + +t0 = timeit.default_timer() +r.normal_for_df(shh, mu=3.0, sigma=1.5) +slow_reseed_ms = (timeit.default_timer() - t0) * 1_000 + +print() +result("Fast channel – first draw after new step (reseed + generate)", fast_reseed_ms) +result("Slow channel – first draw after new step (reseed + generate)", slow_reseed_ms) +note( + "The slow channel's cost here is representative of EVERY subsequent " + "call too, since it reseeds every time. For the fast channel this " + "cost is only paid once per step." +) + +r.end_step("peekaboo") + +# ── benchmark: same step name ⇒ same numbers ────────────────────────────────── + +section("Reproducibility: re-running the same step name") + +note( + "Restarting a step with the same name must yield identical random draws. " + "Here we verify this and measure the cost of the reseed-on-first-draw " + "pattern again." +) + +r.begin_step("peekaboo") + +arr1_fast = r.normal_for_df(hh, mu=3.0, sigma=1.5) +arr1_slow = r.normal_for_df(shh, mu=3.0, sigma=1.5) + +r.end_step("peekaboo") +r.begin_step("peekaboo") + +arr2_fast = r.normal_for_df(hh, mu=3.0, sigma=1.5) +arr2_slow = r.normal_for_df(shh, mu=3.0, sigma=1.5) + +fast_repro = np.allclose(arr1_fast, arr2_fast) +slow_repro = np.allclose(arr1_slow, arr2_slow) + +print() +print(f"{INDENT} Fast channel reproducible across step restarts: {fast_repro}") +print(f"{INDENT} Slow channel reproducible across step restarts: {slow_repro}") +note( + "Both channels produce identical draws when the same step name is reused, " + "guaranteeing reproducibility across model runs." +) + +r.end_step("peekaboo") + +# ── benchmark: small subsets ─────────────────────────────────────────────────── + +section("Performance on small subsets (3 rows)") + +note( + "Even when only a handful of rows need random numbers, the slow channel " + "must still reseed the entire household table before slicing. " + "The fast channel uses the pre-computed entropy to seed only the rows " + "requested, making it proportionally cheaper on small subsets too." +) + +r.begin_step("peekaboo") +# warm up fast channel reseed for this step +r.normal_for_df(hh, mu=3.0, sigma=1.5) + +fast_small_ms = bench( + lambda: r.normal_for_df(hh3, mu=3.0, sigma=1.5), number=3, repeat=5 +) +slow_small_ms = bench( + lambda: r.normal_for_df(shh3, mu=3.0, sigma=1.5), number=3, repeat=5 +) + +print() +result("Fast channel normal_for_df (n=3)", fast_small_ms) +result("Slow channel normal_for_df (n=3)", slow_small_ms) +speedup(fast_small_ms, slow_small_ms, fast_label="fast (warm)", slow_label="slow") +note( + "Even for a 3-row draw the fast channel wins because the seeding work " + "was already done at the first draw of this step." +) + +r.end_step("peekaboo") + +# ── summary ──────────────────────────────────────────────────────────────────── + +section("Summary") + +rows = [ + ("random_for_df 250k rows", fast_rand_ms, slow_rand_ms), + ("normal_for_df 250k rows", fast_norm_ms, slow_norm_ms), + ("choice_for_df 250k rows x5", fast_choice_ms, slow_choice_ms), + ("normal_for_df 3 rows (warm)", fast_small_ms, slow_small_ms), +] + +col_w = 38 +print() +print( + f"{INDENT} {'Operation':<{col_w}} {'Fast (ms)':>10} {'Slow (ms)':>10} {'Speedup':>8}" +) +print(f"{INDENT} {'-'*col_w} {'-'*10} {'-'*10} {'-'*8}") +for label, f, s in rows: + ratio = s / f if f > 0 else float("inf") + print(f"{INDENT} {label:<{col_w}} {f:10.3f} {s:10.3f} {ratio:7.1f}x") + +print() +note( + "The fast channel should be faster in every scenario. The gap should be largest " + "on repeated within-step calls because the slow channel pays the full " + "reseeding cost each time, while the fast channel pays it only once per " + "step. Both channels are fully reproducible when the same step name " + "is reused." +) + +# ── sanity check ─────────────────────────────────────────────────────────────── + +failures = [label for label, f, s in rows if s <= f] +if failures: + print() + print(f"{INDENT} !! PERFORMANCE REGRESSION DETECTED !!") + for label in failures: + print( + f"{INDENT} · '{label}': slow channel was NOT slower than fast channel" + ) + note( + "A 'slow' result that is faster than (or equal to) the corresponding " + "'fast' result indicates a regression in the fast-channel implementation " + "or an anomaly in the benchmark environment. Investigate before merging." + ) + print() + print(SEP_WIDE) + print() + sys.exit(1) + +print() +print(SEP_WIDE) +print() diff --git a/other_resources/scripts/random-performance.ipynb b/other_resources/scripts/random-performance.ipynb new file mode 100644 index 0000000000..9108634f2e --- /dev/null +++ b/other_resources/scripts/random-performance.ipynb @@ -0,0 +1,727 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "9b2e3b94", + "metadata": {}, + "outputs": [], + "source": [ + "from activitysim.core.random import Random\n", + "\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d39b9739", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Length of idxs: 249972\n" + ] + } + ], + "source": [ + "prng = np.random.default_rng(seed=12345)\n", + "idxs = pd.Index(np.sort(prng.integers(1_000_000_000, size=250_000)))\n", + "idxs = idxs.unique()\n", + "print(f\"Length of idxs: {idxs.size}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "82aeb86c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | dummy | \n", + "
|---|---|
| household_id | \n", + "\n", + " |
| 5176 | \n", + "1 | \n", + "
| 6338 | \n", + "1 | \n", + "
| 8523 | \n", + "1 | \n", + "
| 11455 | \n", + "1 | \n", + "
| 19127 | \n", + "1 | \n", + "
| ... | \n", + "... | \n", + "
| 999980188 | \n", + "1 | \n", + "
| 999983015 | \n", + "1 | \n", + "
| 999987465 | \n", + "1 | \n", + "
| 999991572 | \n", + "1 | \n", + "
| 999994200 | \n", + "1 | \n", + "
249972 rows × 1 columns
\n", + "