From c5c5e749b973f70715f313fb23700a512a8153e8 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Thu, 30 Apr 2026 11:43:53 -0500 Subject: [PATCH 01/21] merge reroll --- activitysim/core/fast_random/__init__.py | 8 + activitysim/core/fast_random/_fast_channel.py | 381 ++++ activitysim/core/fast_random/_fast_random.py | 1902 +++++++++++++++++ activitysim/core/random.py | 8 +- activitysim/core/test/test_fast_channel.py | 501 +++++ activitysim/core/test/test_fast_random.py | 306 +++ 6 files changed, 3103 insertions(+), 3 deletions(-) create mode 100644 activitysim/core/fast_random/__init__.py create mode 100644 activitysim/core/fast_random/_fast_channel.py create mode 100644 activitysim/core/fast_random/_fast_random.py create mode 100644 activitysim/core/test/test_fast_channel.py create mode 100644 activitysim/core/test/test_fast_random.py diff --git a/activitysim/core/fast_random/__init__.py b/activitysim/core/fast_random/__init__.py new file mode 100644 index 0000000000..0eecb22bed --- /dev/null +++ b/activitysim/core/fast_random/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from ._fast_channel import FastChannel # noqa: F403 + +__all__ = ( # noqa: F405 + # TODO: Add all public symbols here. + "FastChannel", +) diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py new file mode 100644 index 0000000000..5d6f29ee75 --- /dev/null +++ b/activitysim/core/fast_random/_fast_channel.py @@ -0,0 +1,381 @@ +from __future__ import annotations + +import hashlib + +import numpy as np +import pandas as pd +from cffi import FFI + +from ._fast_random import FastGenerator + +# one more than 0xFFFFFFFF so we can wrap using: int64 % _MAX_SEED +_MAX_SEED = 1 << 32 +_SEED_MASK = 0xFFFFFFFF + +_FFI = FFI() + + +def hash32(s): + """ + + Parameters + ---------- + s: str + + Returns + ------- + 32 bit unsigned hash + """ + s = s.encode("utf8") + h = hashlib.md5(s).hexdigest() + return int(h, base=16) & _SEED_MASK + + +class FastChannel: + def __init__( + self, + channel_name: str, + base_seed: int, + domain_df: pd.DataFrame, + step_name: str = "", + ) -> None: + """ + Create a new FastChannel for vectorised PCG64-based random number generation. + + Each row in *domain_df* gets its own independent PCG64 bit-generator whose + initial state is derived from the combination of *base_seed*, + *channel_name*, the current step name, and the row's index value. This + guarantees reproducibility across runs while keeping every row's stream + independent. + + Parameters + ---------- + channel_name : str + Unique name for this channel (e.g. ``"households"``, ``"persons"``). + Hashed into the per-row seed so that different channels produce + distinct streams even when their domain index values overlap. + base_seed : int + Global base seed added to every per-row seed sequence. Change this + value to shift all random streams without breaking their relative + independence. + domain_df : pandas.DataFrame + DataFrame whose index defines the set of agents (rows) managed by + this channel. The index is copied and stored; the DataFrame columns + are ignored. + step_name : str, optional + If non-empty, ``begin_step(step_name)`` is called immediately after + construction so the channel is ready to generate numbers straight + away. Defaults to ``""`` (no step started). + """ + self.base_seed = base_seed + self.channel_name = channel_name + self.channel_seed = hash32(self.channel_name) + self.domain_index = domain_df.index.copy() + self.step_name = None + self.step_seed = None + self._fast_generator = FastGenerator() + self._state_array = None + if step_name: + self.begin_step(step_name) + + def begin_step(self, step_name: str) -> None: + """ + Initialise (or re-initialise) the per-row PCG64 states for a new step. + + Must be called before any random-number methods are used within a step. + The method seeds every row's bit-generator from the four-integer sequence + ``[base_seed, channel_seed, step_seed, row_index]`` via + :class:`numpy.random.SeedSequence`, ensuring that: + + * the same step always produces the same stream (reproducibility), and + * different steps produce independent streams (no cross-step correlation). + + Parameters + ---------- + step_name : str + Name of the pipeline step being started (e.g. ``"auto_ownership"``). + Hashed into the seed so that different steps yield distinct streams. + + Raises + ------ + AssertionError + If a step is already active (``end_step`` was not called first). + """ + + assert self.step_name is None + + self.step_name = step_name + self.step_seed = hash32(self.step_name) + + # Seed the bit generators, extracting state along the way + state_array = np.empty(shape=[len(self.domain_index), 4], dtype=np.uint64) + for n, i in enumerate(self.domain_index): + ss = np.random.SeedSequence( + [self.base_seed, self.channel_seed, self.step_seed, i] + ) + state_array[n, :] = self._fast_generator.get_state_array(ss) + + self._state_array = state_array + + def end_step(self, step_name: str = "") -> None: + """ + Tear down the per-row PCG64 states at the end of a step. + + Clears ``step_name``, ``step_seed``, ``_bitgenerator``, and + ``_state_array`` so that accidental use of stale state after a step + boundary raises an error rather than silently producing wrong numbers. + + Parameters + ---------- + step_name : str, optional + When provided, asserts that the currently active step matches + *step_name* as a consistency check. Pass an empty string (the + default) to skip the check. + + Raises + ------ + AssertionError + If *step_name* is provided and does not match the active step name. + """ + if step_name: + assert self.step_name == step_name + self.step_name = None + self.step_seed = None + self._state_array = None + + def _check_valid_df(self, df: pd.DataFrame) -> np.ndarray: + """ + Validate *df* against the channel's domain and return row positions. + + Performs three checks: + + 1. *df* has no duplicate index values. + 2. Every index value in *df* exists in the channel's domain index. + 3. A step is currently active (``_state_array`` is not ``None``). + + Parameters + ---------- + df : pandas.DataFrame + DataFrame whose index is to be validated. Columns are ignored. + + Returns + ------- + selected_positions : numpy.ndarray + 1-D integer array of shape ``(len(df),)`` containing the positional + indices into ``self.domain_index`` (and therefore into + ``self._state_array``) that correspond to each row of *df*. + + Raises + ------ + ValueError + If *df* contains duplicate index values, if any index value is + absent from the domain, or if no step is currently active. + """ + # check that df.index has no duplicates + if len(df.index.unique()) != len(df.index): + raise ValueError("DataFrame must have unique index") + + selected_positions = self.domain_index.get_indexer(df.index) + + # check that all df.index values were found in self.domain_index + if selected_positions.min() < 0: + raise ValueError("DataFrame has index values not found in the domain") + + if self._state_array is None: + raise ValueError("outside of a defined step") + + return selected_positions + + def normal_for_df( + self, + df: pd.DataFrame, + step_name: str, + mu: float | np.ndarray = 0, + sigma: float | np.ndarray = 1, + lognormal: bool = False, + size: int | tuple[int, ...] = 1, + ) -> np.ndarray: + """ + Draw normal (or lognormal) random variates for each row in *df*. + + Uses the vectorised PCG64 state array to generate standard-normal + samples, then affinely transforms them with *mu* and *sigma*. + Successive calls within the same step advance each row's stream + independently. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame whose index selects which agents receive draws. Columns + are ignored. + step_name : str + Name of the currently active step; checked for consistency. + mu : float or array-like, optional + Mean of the normal distribution. A scalar is broadcast across all + rows; an array must have one element per row in *df*. Defaults to + ``0``. + sigma : float or array-like, optional + Standard deviation of the normal distribution. Same broadcasting + rules as *mu*. Defaults to ``1``. + lognormal : bool, optional + When ``True``, return ``exp(normal_sample)`` so that the result + follows a lognormal distribution with the given underlying-normal + parameters. Defaults to ``False``. + size : int or tuple of int, optional + Number of draws per agent. A plain ``int`` *k* yields *k* draws + per row; a tuple gives the per-row shape. Defaults to ``1``. + + Returns + ------- + result : numpy.ndarray + Array of shape ``(len(df), *size)`` containing the random variates. + + Raises + ------ + AssertionError + If *step_name* is ``None`` or does not match the active step. + ValueError + If *df* fails the domain validation performed by + :meth:`_check_valid_df`. + """ + assert step_name is not None + assert step_name == self.step_name + selected_positions = self._check_valid_df(df) + + mu = np.asarray(mu) + sigma = np.asarray(sigma) + result = self._fast_generator.vector_random_standard_normal( + self._state_array, selected_positions=selected_positions, shape=size + ) + result = result * sigma + mu + if lognormal: + result = np.exp(result) + return result + + def random_for_df( + self, + df: pd.DataFrame, + step_name: str, + n: int | tuple[int, ...] = 1, + ) -> np.ndarray: + """ + Draw standard-uniform random variates for each row in *df*. + + Uses the vectorised PCG64 state array to generate draws from U[0, 1). + Successive calls within the same step advance each row's stream + independently. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame whose index selects which agents receive draws. Columns + are ignored. + step_name : str + Name of the currently active step; checked for consistency. + n : int or tuple of int, optional + Number of draws per agent. A plain ``int`` *k* yields *k* draws + per row; a tuple gives the per-row shape. Defaults to ``1``. + + Returns + ------- + rands : numpy.ndarray + Array of shape ``(len(df), *n)`` with values in U[0, 1). + + Raises + ------ + AssertionError + If *step_name* is ``None`` or does not match the active step. + ValueError + If *df* fails the domain validation performed by + :meth:`_check_valid_df`. + """ + assert step_name is not None + assert step_name == self.step_name + selected_positions = self._check_valid_df(df) + return self._fast_generator.vector_random_standard_uniform( + self._state_array, selected_positions=selected_positions, shape=n + ) + + def choice_for_df( + self, + df: pd.DataFrame, + step_name: str, + a: int | np.ndarray, + size: int | tuple[int, ...] = 1, + replace: bool = False, + ) -> np.ndarray: + """ + Apply numpy.random.choice once for each row in df + using the appropriate random channel for each row. + + Concatenate the the choice arrays for every row into a single 1-D ndarray + The resulting array will be of length: size * len(df.index) + This method is designed to support creation of a interaction_dataset + + The columns in df are ignored; the index name and values are used to determine + which random number sequence to to use. + + Parameters + ---------- + df : pandas.DataFrame + df with index name and values corresponding to a registered channel + step_name : str + current step name so we can update row_states seed info + a : 1-D array-like or int + If an ndarray, a random sample is generated from its elements. + If an int, the random sample is generated as if a was np.arange(a). + size : int or tuple of ints + Output shape (per df row). + replace : bool + Whether the sample is with or without replacement. + + Returns + ------- + choices : 1-D ndarray of length: prod(size) * len(df.index) + The generated random samples for each row concatenated into a + single (flat) array. + """ + assert step_name is not None + assert step_name == self.step_name + selected_positions = self._check_valid_df(df) + + # total number of draws required per row + if isinstance(size, (int, np.integer)): + total = int(size) + else: + total = int(np.prod(size)) + + # population to sample from + if isinstance(a, (int, np.integer)): + a_arr = np.arange(int(a)) + else: + a_arr = np.asarray(a) + n_pop = len(a_arr) + + if replace: + # draw `total` uniforms per selected row and map to indices in a + rands = self._fast_generator.vector_random_standard_uniform( + self._state_array, + selected_positions=selected_positions, + shape=total, + ) + idx = (rands * n_pop).astype(np.int64) + # guard against the (vanishingly rare) case rands == 1.0 - epsilon edge + np.minimum(idx, n_pop - 1, out=idx) + sample = a_arr[idx].reshape(-1) + else: + if total > n_pop: + raise ValueError( + "Cannot take a larger sample than population when 'replace=False'" + ) + # draw n_pop uniforms per selected row; argsort produces a random + # permutation of [0, n_pop), and we take the first `total` entries. + rands = self._fast_generator.vector_random_standard_uniform( + self._state_array, + selected_positions=selected_positions, + shape=n_pop, + ) + order = np.argsort(rands, axis=1)[:, :total] + sample = a_arr[order].reshape(-1) + + return sample diff --git a/activitysim/core/fast_random/_fast_random.py b/activitysim/core/fast_random/_fast_random.py new file mode 100644 index 0000000000..a4663f7d6d --- /dev/null +++ b/activitysim/core/fast_random/_fast_random.py @@ -0,0 +1,1902 @@ +# The following C code was lovingly borrowed from numpy: +# https://github.com/numpy/numpy/blob/main/numpy/random/src/distributions/ziggurat_constants.h +# https://github.com/numpy/numpy/blob/main/numpy/random/src/distributions/distributions.c +# +# Copyright (c) 2005-2025, NumPy Developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of the NumPy Developers nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# +# static const uint64_t ki_double[] = { +# 0x000EF33D8025EF6AULL, 0x0000000000000000ULL, 0x000C08BE98FBC6A8ULL, +# 0x000DA354FABD8142ULL, 0x000E51F67EC1EEEAULL, 0x000EB255E9D3F77EULL, +# 0x000EEF4B817ECAB9ULL, 0x000F19470AFA44AAULL, 0x000F37ED61FFCB18ULL, +# 0x000F4F469561255CULL, 0x000F61A5E41BA396ULL, 0x000F707A755396A4ULL, +# 0x000F7CB2EC28449AULL, 0x000F86F10C6357D3ULL, 0x000F8FA6578325DEULL, +# 0x000F9724C74DD0DAULL, 0x000F9DA907DBF509ULL, 0x000FA360F581FA74ULL, +# 0x000FA86FDE5B4BF8ULL, 0x000FACF160D354DCULL, 0x000FB0FB6718B90FULL, +# 0x000FB49F8D5374C6ULL, 0x000FB7EC2366FE77ULL, 0x000FBAECE9A1E50EULL, +# 0x000FBDAB9D040BEDULL, 0x000FC03060FF6C57ULL, 0x000FC2821037A248ULL, +# 0x000FC4A67AE25BD1ULL, 0x000FC6A2977AEE31ULL, 0x000FC87AA92896A4ULL, +# 0x000FCA325E4BDE85ULL, 0x000FCBCCE902231AULL, 0x000FCD4D12F839C4ULL, +# 0x000FCEB54D8FEC99ULL, 0x000FD007BF1DC930ULL, 0x000FD1464DD6C4E6ULL, +# 0x000FD272A8E2F450ULL, 0x000FD38E4FF0C91EULL, 0x000FD49A9990B478ULL, +# 0x000FD598B8920F53ULL, 0x000FD689C08E99ECULL, 0x000FD76EA9C8E832ULL, +# 0x000FD848547B08E8ULL, 0x000FD9178BAD2C8CULL, 0x000FD9DD07A7ADD2ULL, +# 0x000FDA9970105E8CULL, 0x000FDB4D5DC02E20ULL, 0x000FDBF95C5BFCD0ULL, +# 0x000FDC9DEBB99A7DULL, 0x000FDD3B8118729DULL, 0x000FDDD288342F90ULL, +# 0x000FDE6364369F64ULL, 0x000FDEEE708D514EULL, 0x000FDF7401A6B42EULL, +# 0x000FDFF46599ED40ULL, 0x000FE06FE4BC24F2ULL, 0x000FE0E6C225A258ULL, +# 0x000FE1593C28B84CULL, 0x000FE1C78CBC3F99ULL, 0x000FE231E9DB1CAAULL, +# 0x000FE29885DA1B91ULL, 0x000FE2FB8FB54186ULL, 0x000FE35B33558D4AULL, +# 0x000FE3B799D0002AULL, 0x000FE410E99EAD7FULL, 0x000FE46746D47734ULL, +# 0x000FE4BAD34C095CULL, 0x000FE50BAED29524ULL, 0x000FE559F74EBC78ULL, +# 0x000FE5A5C8E41212ULL, 0x000FE5EF3E138689ULL, 0x000FE6366FD91078ULL, +# 0x000FE67B75C6D578ULL, 0x000FE6BE661E11AAULL, 0x000FE6FF55E5F4F2ULL, +# 0x000FE73E5900A702ULL, 0x000FE77B823E9E39ULL, 0x000FE7B6E37070A2ULL, +# 0x000FE7F08D774243ULL, 0x000FE8289053F08CULL, 0x000FE85EFB35173AULL, +# 0x000FE893DC840864ULL, 0x000FE8C741F0CEBCULL, 0x000FE8F9387D4EF6ULL, +# 0x000FE929CC879B1DULL, 0x000FE95909D388EAULL, 0x000FE986FB939AA2ULL, +# 0x000FE9B3AC714866ULL, 0x000FE9DF2694B6D5ULL, 0x000FEA0973ABE67CULL, +# 0x000FEA329CF166A4ULL, 0x000FEA5AAB32952CULL, 0x000FEA81A6D5741AULL, +# 0x000FEAA797DE1CF0ULL, 0x000FEACC85F3D920ULL, 0x000FEAF07865E63CULL, +# 0x000FEB13762FEC13ULL, 0x000FEB3585FE2A4AULL, 0x000FEB56AE3162B4ULL, +# 0x000FEB76F4E284FAULL, 0x000FEB965FE62014ULL, 0x000FEBB4F4CF9D7CULL, +# 0x000FEBD2B8F449D0ULL, 0x000FEBEFB16E2E3EULL, 0x000FEC0BE31EBDE8ULL, +# 0x000FEC2752B15A15ULL, 0x000FEC42049DAFD3ULL, 0x000FEC5BFD29F196ULL, +# 0x000FEC75406CEEF4ULL, 0x000FEC8DD2500CB4ULL, 0x000FECA5B6911F12ULL, +# 0x000FECBCF0C427FEULL, 0x000FECD38454FB15ULL, 0x000FECE97488C8B3ULL, +# 0x000FECFEC47F91B7ULL, 0x000FED1377358528ULL, 0x000FED278F844903ULL, +# 0x000FED3B10242F4CULL, 0x000FED4DFBAD586EULL, 0x000FED605498C3DDULL, +# 0x000FED721D414FE8ULL, 0x000FED8357E4A982ULL, 0x000FED9406A42CC8ULL, +# 0x000FEDA42B85B704ULL, 0x000FEDB3C8746AB4ULL, 0x000FEDC2DF416652ULL, +# 0x000FEDD171A46E52ULL, 0x000FEDDF813C8AD3ULL, 0x000FEDED0F909980ULL, +# 0x000FEDFA1E0FD414ULL, 0x000FEE06AE124BC4ULL, 0x000FEE12C0D95A06ULL, +# 0x000FEE1E579006E0ULL, 0x000FEE29734B6524ULL, 0x000FEE34150AE4BCULL, +# 0x000FEE3E3DB89B3CULL, 0x000FEE47EE2982F4ULL, 0x000FEE51271DB086ULL, +# 0x000FEE59E9407F41ULL, 0x000FEE623528B42EULL, 0x000FEE6A0B5897F1ULL, +# 0x000FEE716C3E077AULL, 0x000FEE7858327B82ULL, 0x000FEE7ECF7B06BAULL, +# 0x000FEE84D2484AB2ULL, 0x000FEE8A60B66343ULL, 0x000FEE8F7ACCC851ULL, +# 0x000FEE94207E25DAULL, 0x000FEE9851A829EAULL, 0x000FEE9C0E13485CULL, +# 0x000FEE9F557273F4ULL, 0x000FEEA22762CCAEULL, 0x000FEEA4836B42ACULL, +# 0x000FEEA668FC2D71ULL, 0x000FEEA7D76ED6FAULL, 0x000FEEA8CE04FA0AULL, +# 0x000FEEA94BE8333BULL, 0x000FEEA950296410ULL, 0x000FEEA8D9C0075EULL, +# 0x000FEEA7E7897654ULL, 0x000FEEA678481D24ULL, 0x000FEEA48AA29E83ULL, +# 0x000FEEA21D22E4DAULL, 0x000FEE9F2E352024ULL, 0x000FEE9BBC26AF2EULL, +# 0x000FEE97C524F2E4ULL, 0x000FEE93473C0A3AULL, 0x000FEE8E40557516ULL, +# 0x000FEE88AE369C7AULL, 0x000FEE828E7F3DFDULL, 0x000FEE7BDEA7B888ULL, +# 0x000FEE749BFF37FFULL, 0x000FEE6CC3A9BD5EULL, 0x000FEE64529E007EULL, +# 0x000FEE5B45A32888ULL, 0x000FEE51994E57B6ULL, 0x000FEE474A0006CFULL, +# 0x000FEE3C53E12C50ULL, 0x000FEE30B2E02AD8ULL, 0x000FEE2462AD8205ULL, +# 0x000FEE175EB83C5AULL, 0x000FEE09A22A1447ULL, 0x000FEDFB27E349CCULL, +# 0x000FEDEBEA76216CULL, 0x000FEDDBE422047EULL, 0x000FEDCB0ECE39D3ULL, +# 0x000FEDB964042CF4ULL, 0x000FEDA6DCE938C9ULL, 0x000FED937237E98DULL, +# 0x000FED7F1C38A836ULL, 0x000FED69D2B9C02BULL, 0x000FED538D06AE00ULL, +# 0x000FED3C41DEA422ULL, 0x000FED23E76A2FD8ULL, 0x000FED0A732FE644ULL, +# 0x000FECEFDA07FE34ULL, 0x000FECD4100EB7B8ULL, 0x000FECB708956EB4ULL, +# 0x000FEC98B61230C1ULL, 0x000FEC790A0DA978ULL, 0x000FEC57F50F31FEULL, +# 0x000FEC356686C962ULL, 0x000FEC114CB4B335ULL, 0x000FEBEB948E6FD0ULL, +# 0x000FEBC429A0B692ULL, 0x000FEB9AF5EE0CDCULL, 0x000FEB6FE1C98542ULL, +# 0x000FEB42D3AD1F9EULL, 0x000FEB13B00B2D4BULL, 0x000FEAE2591A02E9ULL, +# 0x000FEAAEAE992257ULL, 0x000FEA788D8EE326ULL, 0x000FEA3FCFFD73E5ULL, +# 0x000FEA044C8DD9F6ULL, 0x000FE9C5D62F563BULL, 0x000FE9843BA947A4ULL, +# 0x000FE93F471D4728ULL, 0x000FE8F6BD76C5D6ULL, 0x000FE8AA5DC4E8E6ULL, +# 0x000FE859E07AB1EAULL, 0x000FE804F690A940ULL, 0x000FE7AB488233C0ULL, +# 0x000FE74C751F6AA5ULL, 0x000FE6E8102AA202ULL, 0x000FE67DA0B6ABD8ULL, +# 0x000FE60C9F38307EULL, 0x000FE5947338F742ULL, 0x000FE51470977280ULL, +# 0x000FE48BD436F458ULL, 0x000FE3F9BFFD1E37ULL, 0x000FE35D35EEB19CULL, +# 0x000FE2B5122FE4FEULL, 0x000FE20003995557ULL, 0x000FE13C82788314ULL, +# 0x000FE068C4EE67B0ULL, 0x000FDF82B02B71AAULL, 0x000FDE87C57EFEAAULL, +# 0x000FDD7509C63BFDULL, 0x000FDC46E529BF13ULL, 0x000FDAF8F82E0282ULL, +# 0x000FD985E1B2BA75ULL, 0x000FD7E6EF48CF04ULL, 0x000FD613ADBD650BULL, +# 0x000FD40149E2F012ULL, 0x000FD1A1A7B4C7ACULL, 0x000FCEE204761F9EULL, +# 0x000FCBA8D85E11B2ULL, 0x000FC7D26ECD2D22ULL, 0x000FC32B2F1E22EDULL, +# 0x000FBD6581C0B83AULL, 0x000FB606C4005434ULL, 0x000FAC40582A2874ULL, +# 0x000F9E971E014598ULL, 0x000F89FA48A41DFCULL, 0x000F66C5F7F0302CULL, +# 0x000F1A5A4B331C4AULL}; +# +# static const double wi_double[] = { +# 8.68362706080130616677e-16, 4.77933017572773682428e-17, +# 6.35435241740526230246e-17, 7.45487048124769627714e-17, +# 8.32936681579309972857e-17, 9.06806040505948228243e-17, +# 9.71486007656776183958e-17, 1.02947503142410192108e-16, +# 1.08234302884476839838e-16, 1.13114701961090307945e-16, +# 1.17663594570229211411e-16, 1.21936172787143633280e-16, +# 1.25974399146370927864e-16, 1.29810998862640315416e-16, +# 1.33472037368241227547e-16, 1.36978648425712032797e-16, +# 1.40348230012423820659e-16, 1.43595294520569430270e-16, +# 1.46732087423644219083e-16, 1.49769046683910367425e-16, +# 1.52715150035961979750e-16, 1.55578181694607639484e-16, +# 1.58364940092908853989e-16, 1.61081401752749279325e-16, +# 1.63732852039698532012e-16, 1.66323990584208352778e-16, +# 1.68859017086765964015e-16, 1.71341701765596607184e-16, +# 1.73775443658648593310e-16, 1.76163319230009959832e-16, +# 1.78508123169767272927e-16, 1.80812402857991522674e-16, +# 1.83078487648267501776e-16, 1.85308513886180189386e-16, +# 1.87504446393738816849e-16, 1.89668097007747596212e-16, +# 1.91801140648386198029e-16, 1.93905129306251037069e-16, +# 1.95981504266288244037e-16, 1.98031606831281739736e-16, +# 2.00056687762733300198e-16, 2.02057915620716538808e-16, +# 2.04036384154802118313e-16, 2.05993118874037063144e-16, +# 2.07929082904140197311e-16, 2.09845182223703516690e-16, +# 2.11742270357603418769e-16, 2.13621152594498681022e-16, +# 2.15482589785814580926e-16, 2.17327301775643674990e-16, +# 2.19155970504272708519e-16, 2.20969242822353175995e-16, +# 2.22767733047895534948e-16, 2.24552025294143552381e-16, +# 2.26322675592856786566e-16, 2.28080213834501706782e-16, +# 2.29825145544246839061e-16, 2.31557953510408037008e-16, +# 2.33279099280043561128e-16, 2.34989024534709550938e-16, +# 2.36688152357916037468e-16, 2.38376888404542434981e-16, +# 2.40055621981350627349e-16, 2.41724727046750252175e-16, +# 2.43384563137110286400e-16, 2.45035476226149539878e-16, +# 2.46677799523270498158e-16, 2.48311854216108767769e-16, +# 2.49937950162045242375e-16, 2.51556386532965786439e-16, +# 2.53167452417135826983e-16, 2.54771427381694417303e-16, +# 2.56368581998939683749e-16, 2.57959178339286723500e-16, +# 2.59543470433517070146e-16, 2.61121704706701939097e-16, +# 2.62694120385972564623e-16, 2.64260949884118951286e-16, +# 2.65822419160830680292e-16, 2.67378748063236329361e-16, +# 2.68930150647261591777e-16, 2.70476835481199518794e-16, +# 2.72019005932773206655e-16, 2.73556860440867908686e-16, +# 2.75090592773016664571e-16, 2.76620392269639032183e-16, +# 2.78146444075954410103e-16, 2.79668929362423005309e-16, +# 2.81188025534502074329e-16, 2.82703906432447923059e-16, +# 2.84216742521840606520e-16, 2.85726701075460149289e-16, +# 2.87233946347097994381e-16, 2.88738639737848191815e-16, +# 2.90240939955384233230e-16, 2.91741003166694553259e-16, +# 2.93238983144718163965e-16, 2.94735031409293489611e-16, +# 2.96229297362806647792e-16, 2.97721928420902891115e-16, +# 2.99213070138601307081e-16, 3.00702866332133102993e-16, +# 3.02191459196806151971e-16, 3.03678989421180184427e-16, +# 3.05165596297821922381e-16, 3.06651417830895451744e-16, +# 3.08136590840829717032e-16, 3.09621251066292253306e-16, +# 3.11105533263689296831e-16, 3.12589571304399892784e-16, +# 3.14073498269944617203e-16, 3.15557446545280064031e-16, +# 3.17041547910402852545e-16, 3.18525933630440648871e-16, +# 3.20010734544401137886e-16, 3.21496081152744704901e-16, +# 3.22982103703941557538e-16, 3.24468932280169778077e-16, +# 3.25956696882307838340e-16, 3.27445527514370671802e-16, +# 3.28935554267536967851e-16, 3.30426907403912838589e-16, +# 3.31919717440175233652e-16, 3.33414115231237245918e-16, +# 3.34910232054077845412e-16, 3.36408199691876507948e-16, +# 3.37908150518594979994e-16, 3.39410217584148914282e-16, +# 3.40914534700312603713e-16, 3.42421236527501816058e-16, +# 3.43930458662583133920e-16, 3.45442337727858401604e-16, +# 3.46957011461378353333e-16, 3.48474618808741370700e-16, +# 3.49995300016538099813e-16, 3.51519196727607440975e-16, +# 3.53046452078274009054e-16, 3.54577210797743572160e-16, +# 3.56111619309838843415e-16, 3.57649825837265051035e-16, +# 3.59191980508602994994e-16, 3.60738235468235137839e-16, +# 3.62288744989419151904e-16, 3.63843665590734438546e-16, +# 3.65403156156136995766e-16, 3.66967378058870090021e-16, +# 3.68536495289491401456e-16, 3.70110674588289834952e-16, +# 3.71690085582382297792e-16, 3.73274900927794352614e-16, +# 3.74865296456848868882e-16, 3.76461451331202869131e-16, +# 3.78063548200896037651e-16, 3.79671773369794425924e-16, +# 3.81286316967837738238e-16, 3.82907373130524317507e-16, +# 3.84535140186095955858e-16, 3.86169820850914927119e-16, +# 3.87811622433558721164e-16, 3.89460757048192620674e-16, +# 3.91117441837820542060e-16, 3.92781899208054153270e-16, +# 3.94454357072087711446e-16, 3.96135049107613542983e-16, +# 3.97824215026468259474e-16, 3.99522100857856502444e-16, +# 4.01228959246062907451e-16, 4.02945049763632792393e-16, +# 4.04670639241074995115e-16, 4.06406002114225038723e-16, +# 4.08151420790493873480e-16, 4.09907186035326643447e-16, +# 4.11673597380302570170e-16, 4.13450963554423599878e-16, +# 4.15239602940268833891e-16, 4.17039844056831587498e-16, +# 4.18852026071011229572e-16, 4.20676499339901510978e-16, +# 4.22513625986204937320e-16, 4.24363780509307796137e-16, +# 4.26227350434779809917e-16, 4.28104737005311666397e-16, +# 4.29996355916383230161e-16, 4.31902638100262944617e-16, +# 4.33824030562279080411e-16, 4.35760997273684900553e-16, +# 4.37714020125858747008e-16, 4.39683599951052137423e-16, +# 4.41670257615420348435e-16, 4.43674535190656726604e-16, +# 4.45696997211204306674e-16, 4.47738232024753387312e-16, +# 4.49798853244554968009e-16, 4.51879501313005876278e-16, +# 4.53980845187003400947e-16, 4.56103584156742206384e-16, +# 4.58248449810956667052e-16, 4.60416208163115281428e-16, +# 4.62607661954784567754e-16, 4.64823653154320737780e-16, +# 4.67065065671263059081e-16, 4.69332828309332890697e-16, +# 4.71627917983835129766e-16, 4.73951363232586715165e-16, +# 4.76304248053313737663e-16, 4.78687716104872284247e-16, +# 4.81102975314741720538e-16, 4.83551302941152515162e-16, +# 4.86034051145081195402e-16, 4.88552653135360343280e-16, +# 4.91108629959526955862e-16, 4.93703598024033454728e-16, +# 4.96339277440398725619e-16, 4.99017501309182245754e-16, +# 5.01740226071808946011e-16, 5.04509543081872748637e-16, +# 5.07327691573354207058e-16, 5.10197073234156184149e-16, +# 5.13120268630678373200e-16, 5.16100055774322824569e-16, +# 5.19139431175769859873e-16, 5.22241633800023428760e-16, +# 5.25410172417759732697e-16, 5.28648856950494511482e-16, +# 5.31961834533840037535e-16, 5.35353631181649688145e-16, +# 5.38829200133405320160e-16, 5.42393978220171234073e-16, +# 5.46053951907478041166e-16, 5.49815735089281410703e-16, +# 5.53686661246787600374e-16, 5.57674893292657647836e-16, +# 5.61789555355541665830e-16, 5.66040892008242216739e-16, +# 5.70440462129138908417e-16, 5.75001376891989523684e-16, +# 5.79738594572459365014e-16, 5.84669289345547900201e-16, +# 5.89813317647789942685e-16, 5.95193814964144415532e-16, +# 6.00837969627190832234e-16, 6.06778040933344851394e-16, +# 6.13052720872528159123e-16, 6.19708989458162555387e-16, +# 6.26804696330128439415e-16, 6.34412240712750598627e-16, +# 6.42623965954805540945e-16, 6.51560331734499356881e-16, +# 6.61382788509766415145e-16, 6.72315046250558662913e-16, +# 6.84680341756425875856e-16, 6.98971833638761995415e-16, +# 7.15999493483066421560e-16, 7.37242430179879890722e-16, +# 7.65893637080557275482e-16, 8.11384933765648418565e-16}; +# +# static const double fi_double[] = { +# 1.00000000000000000000e+00, 9.77101701267671596263e-01, +# 9.59879091800106665211e-01, 9.45198953442299649730e-01, +# 9.32060075959230460718e-01, 9.19991505039347012840e-01, +# 9.08726440052130879366e-01, 8.98095921898343418910e-01, +# 8.87984660755833377088e-01, 8.78309655808917399966e-01, +# 8.69008688036857046555e-01, 8.60033621196331532488e-01, +# 8.51346258458677951353e-01, 8.42915653112204177333e-01, +# 8.34716292986883434679e-01, 8.26726833946221373317e-01, +# 8.18929191603702366642e-01, 8.11307874312656274185e-01, +# 8.03849483170964274059e-01, 7.96542330422958966274e-01, +# 7.89376143566024590648e-01, 7.82341832654802504798e-01, +# 7.75431304981187174974e-01, 7.68637315798486264740e-01, +# 7.61953346836795386565e-01, 7.55373506507096115214e-01, +# 7.48892447219156820459e-01, 7.42505296340151055290e-01, +# 7.36207598126862650112e-01, 7.29995264561476231435e-01, +# 7.23864533468630222401e-01, 7.17811932630721960535e-01, +# 7.11834248878248421200e-01, 7.05928501332754310127e-01, +# 7.00091918136511615067e-01, 6.94321916126116711609e-01, +# 6.88616083004671808432e-01, 6.82972161644994857355e-01, +# 6.77388036218773526009e-01, 6.71861719897082099173e-01, +# 6.66391343908750100056e-01, 6.60975147776663107813e-01, +# 6.55611470579697264149e-01, 6.50298743110816701574e-01, +# 6.45035480820822293424e-01, 6.39820277453056585060e-01, +# 6.34651799287623608059e-01, 6.29528779924836690007e-01, +# 6.24450015547026504592e-01, 6.19414360605834324325e-01, +# 6.14420723888913888899e-01, 6.09468064925773433949e-01, +# 6.04555390697467776029e-01, 5.99681752619125263415e-01, +# 5.94846243767987448159e-01, 5.90047996332826008015e-01, +# 5.85286179263371453274e-01, 5.80559996100790898232e-01, +# 5.75868682972353718164e-01, 5.71211506735253227163e-01, +# 5.66587763256164445025e-01, 5.61996775814524340831e-01, +# 5.57437893618765945014e-01, 5.52910490425832290562e-01, +# 5.48413963255265812791e-01, 5.43947731190026262382e-01, +# 5.39511234256952132426e-01, 5.35103932380457614215e-01, +# 5.30725304403662057062e-01, 5.26374847171684479008e-01, +# 5.22052074672321841931e-01, 5.17756517229756352272e-01, +# 5.13487720747326958914e-01, 5.09245245995747941592e-01, +# 5.05028667943468123624e-01, 5.00837575126148681903e-01, +# 4.96671569052489714213e-01, 4.92530263643868537748e-01, +# 4.88413284705458028423e-01, 4.84320269426683325253e-01, +# 4.80250865909046753544e-01, 4.76204732719505863248e-01, +# 4.72181538467730199660e-01, 4.68180961405693596422e-01, +# 4.64202689048174355069e-01, 4.60246417812842867345e-01, +# 4.56311852678716434184e-01, 4.52398706861848520777e-01, +# 4.48506701507203064949e-01, 4.44635565395739396077e-01, +# 4.40785034665803987508e-01, 4.36954852547985550526e-01, +# 4.33144769112652261445e-01, 4.29354541029441427735e-01, +# 4.25583931338021970170e-01, 4.21832709229495894654e-01, +# 4.18100649837848226120e-01, 4.14387534040891125642e-01, +# 4.10693148270188157500e-01, 4.07017284329473372217e-01, +# 4.03359739221114510510e-01, 3.99720314980197222177e-01, +# 3.96098818515832451492e-01, 3.92495061459315619512e-01, +# 3.88908860018788715696e-01, 3.85340034840077283462e-01, +# 3.81788410873393657674e-01, 3.78253817245619183840e-01, +# 3.74736087137891138443e-01, 3.71235057668239498696e-01, +# 3.67750569779032587814e-01, 3.64282468129004055601e-01, +# 3.60830600989648031529e-01, 3.57394820145780500731e-01, +# 3.53974980800076777232e-01, 3.50570941481406106455e-01, +# 3.47182563956793643900e-01, 3.43809713146850715049e-01, +# 3.40452257044521866547e-01, 3.37110066637006045021e-01, +# 3.33783015830718454708e-01, 3.30470981379163586400e-01, +# 3.27173842813601400970e-01, 3.23891482376391093290e-01, +# 3.20623784956905355514e-01, 3.17370638029913609834e-01, +# 3.14131931596337177215e-01, 3.10907558126286509559e-01, +# 3.07697412504292056035e-01, 3.04501391976649993243e-01, +# 3.01319396100803049698e-01, 2.98151326696685481377e-01, +# 2.94997087799961810184e-01, 2.91856585617095209972e-01, +# 2.88729728482182923521e-01, 2.85616426815501756042e-01, +# 2.82516593083707578948e-01, 2.79430141761637940157e-01, +# 2.76356989295668320494e-01, 2.73297054068577072172e-01, +# 2.70250256365875463072e-01, 2.67216518343561471038e-01, +# 2.64195763997261190426e-01, 2.61187919132721213522e-01, +# 2.58192911337619235290e-01, 2.55210669954661961700e-01, +# 2.52241126055942177508e-01, 2.49284212418528522415e-01, +# 2.46339863501263828249e-01, 2.43408015422750312329e-01, +# 2.40488605940500588254e-01, 2.37581574431238090606e-01, +# 2.34686861872330010392e-01, 2.31804410824338724684e-01, +# 2.28934165414680340644e-01, 2.26076071322380278694e-01, +# 2.23230075763917484855e-01, 2.20396127480151998723e-01, +# 2.17574176724331130872e-01, 2.14764175251173583536e-01, +# 2.11966076307030182324e-01, 2.09179834621125076977e-01, +# 2.06405406397880797353e-01, 2.03642749310334908452e-01, +# 2.00891822494656591136e-01, 1.98152586545775138971e-01, +# 1.95425003514134304483e-01, 1.92709036903589175926e-01, +# 1.90004651670464985713e-01, 1.87311814223800304768e-01, +# 1.84630492426799269756e-01, 1.81960655599522513892e-01, +# 1.79302274522847582272e-01, 1.76655321443734858455e-01, +# 1.74019770081838553999e-01, 1.71395595637505754327e-01, +# 1.68782774801211288285e-01, 1.66181285764481906364e-01, +# 1.63591108232365584074e-01, 1.61012223437511009516e-01, +# 1.58444614155924284882e-01, 1.55888264724479197465e-01, +# 1.53343161060262855866e-01, 1.50809290681845675763e-01, +# 1.48286642732574552861e-01, 1.45775208005994028060e-01, +# 1.43274978973513461566e-01, 1.40785949814444699690e-01, +# 1.38308116448550733057e-01, 1.35841476571253755301e-01, +# 1.33386029691669155683e-01, 1.30941777173644358090e-01, +# 1.28508722279999570981e-01, 1.26086870220185887081e-01, +# 1.23676228201596571932e-01, 1.21276805484790306533e-01, +# 1.18888613442910059947e-01, 1.16511665625610869035e-01, +# 1.14145977827838487895e-01, 1.11791568163838089811e-01, +# 1.09448457146811797824e-01, 1.07116667774683801961e-01, +# 1.04796225622487068629e-01, 1.02487158941935246892e-01, +# 1.00189498768810017482e-01, 9.79032790388624646338e-02, +# 9.56285367130089991594e-02, 9.33653119126910124859e-02, +# 9.11136480663737591268e-02, 8.88735920682758862021e-02, +# 8.66451944505580717859e-02, 8.44285095703534715916e-02, +# 8.22235958132029043366e-02, 8.00305158146630696292e-02, +# 7.78493367020961224423e-02, 7.56801303589271778804e-02, +# 7.35229737139813238622e-02, 7.13779490588904025339e-02, +# 6.92451443970067553879e-02, 6.71246538277884968737e-02, +# 6.50165779712428976156e-02, 6.29210244377581412456e-02, +# 6.08381083495398780614e-02, 5.87679529209337372930e-02, +# 5.67106901062029017391e-02, 5.46664613248889208474e-02, +# 5.26354182767921896513e-02, 5.06177238609477817000e-02, +# 4.86135532158685421122e-02, 4.66230949019303814174e-02, +# 4.46465522512944634759e-02, 4.26841449164744590750e-02, +# 4.07361106559409394401e-02, 3.88027074045261474722e-02, +# 3.68842156885673053135e-02, 3.49809414617161251737e-02, +# 3.30932194585785779961e-02, 3.12214171919203004046e-02, +# 2.93659397581333588001e-02, 2.75272356696031131329e-02, +# 2.57058040085489103443e-02, 2.39022033057958785407e-02, +# 2.21170627073088502113e-02, 2.03510962300445102935e-02, +# 1.86051212757246224594e-02, 1.68800831525431419000e-02, +# 1.51770883079353092332e-02, 1.34974506017398673818e-02, +# 1.18427578579078790488e-02, 1.02149714397014590439e-02, +# 8.61658276939872638800e-03, 7.05087547137322242369e-03, +# 5.52240329925099155545e-03, 4.03797259336302356153e-03, +# 2.60907274610215926189e-03, 1.26028593049859797236e-03}; +# +# static const uint32_t ki_float[] = { +# 0x007799ECUL, 0x00000000UL, 0x006045F5UL, 0x006D1AA8UL, 0x00728FB4UL, +# 0x007592AFUL, 0x00777A5CUL, 0x0078CA38UL, 0x0079BF6BUL, 0x007A7A35UL, +# 0x007B0D2FUL, 0x007B83D4UL, 0x007BE597UL, 0x007C3788UL, 0x007C7D33UL, +# 0x007CB926UL, 0x007CED48UL, 0x007D1B08UL, 0x007D437FUL, 0x007D678BUL, +# 0x007D87DBUL, 0x007DA4FCUL, 0x007DBF61UL, 0x007DD767UL, 0x007DED5DUL, +# 0x007E0183UL, 0x007E1411UL, 0x007E2534UL, 0x007E3515UL, 0x007E43D5UL, +# 0x007E5193UL, 0x007E5E67UL, 0x007E6A69UL, 0x007E75AAUL, 0x007E803EUL, +# 0x007E8A32UL, 0x007E9395UL, 0x007E9C72UL, 0x007EA4D5UL, 0x007EACC6UL, +# 0x007EB44EUL, 0x007EBB75UL, 0x007EC243UL, 0x007EC8BCUL, 0x007ECEE8UL, +# 0x007ED4CCUL, 0x007EDA6BUL, 0x007EDFCBUL, 0x007EE4EFUL, 0x007EE9DCUL, +# 0x007EEE94UL, 0x007EF31BUL, 0x007EF774UL, 0x007EFBA0UL, 0x007EFFA3UL, +# 0x007F037FUL, 0x007F0736UL, 0x007F0ACAUL, 0x007F0E3CUL, 0x007F118FUL, +# 0x007F14C4UL, 0x007F17DCUL, 0x007F1ADAUL, 0x007F1DBDUL, 0x007F2087UL, +# 0x007F233AUL, 0x007F25D7UL, 0x007F285DUL, 0x007F2AD0UL, 0x007F2D2EUL, +# 0x007F2F7AUL, 0x007F31B3UL, 0x007F33DCUL, 0x007F35F3UL, 0x007F37FBUL, +# 0x007F39F3UL, 0x007F3BDCUL, 0x007F3DB7UL, 0x007F3F84UL, 0x007F4145UL, +# 0x007F42F8UL, 0x007F449FUL, 0x007F463AUL, 0x007F47CAUL, 0x007F494EUL, +# 0x007F4AC8UL, 0x007F4C38UL, 0x007F4D9DUL, 0x007F4EF9UL, 0x007F504CUL, +# 0x007F5195UL, 0x007F52D5UL, 0x007F540DUL, 0x007F553DUL, 0x007F5664UL, +# 0x007F5784UL, 0x007F589CUL, 0x007F59ACUL, 0x007F5AB5UL, 0x007F5BB8UL, +# 0x007F5CB3UL, 0x007F5DA8UL, 0x007F5E96UL, 0x007F5F7EUL, 0x007F605FUL, +# 0x007F613BUL, 0x007F6210UL, 0x007F62E0UL, 0x007F63AAUL, 0x007F646FUL, +# 0x007F652EUL, 0x007F65E8UL, 0x007F669CUL, 0x007F674CUL, 0x007F67F6UL, +# 0x007F689CUL, 0x007F693CUL, 0x007F69D9UL, 0x007F6A70UL, 0x007F6B03UL, +# 0x007F6B91UL, 0x007F6C1BUL, 0x007F6CA0UL, 0x007F6D21UL, 0x007F6D9EUL, +# 0x007F6E17UL, 0x007F6E8CUL, 0x007F6EFCUL, 0x007F6F68UL, 0x007F6FD1UL, +# 0x007F7035UL, 0x007F7096UL, 0x007F70F3UL, 0x007F714CUL, 0x007F71A1UL, +# 0x007F71F2UL, 0x007F723FUL, 0x007F7289UL, 0x007F72CFUL, 0x007F7312UL, +# 0x007F7350UL, 0x007F738BUL, 0x007F73C3UL, 0x007F73F6UL, 0x007F7427UL, +# 0x007F7453UL, 0x007F747CUL, 0x007F74A1UL, 0x007F74C3UL, 0x007F74E0UL, +# 0x007F74FBUL, 0x007F7511UL, 0x007F7524UL, 0x007F7533UL, 0x007F753FUL, +# 0x007F7546UL, 0x007F754AUL, 0x007F754BUL, 0x007F7547UL, 0x007F753FUL, +# 0x007F7534UL, 0x007F7524UL, 0x007F7511UL, 0x007F74F9UL, 0x007F74DEUL, +# 0x007F74BEUL, 0x007F749AUL, 0x007F7472UL, 0x007F7445UL, 0x007F7414UL, +# 0x007F73DFUL, 0x007F73A5UL, 0x007F7366UL, 0x007F7323UL, 0x007F72DAUL, +# 0x007F728DUL, 0x007F723AUL, 0x007F71E3UL, 0x007F7186UL, 0x007F7123UL, +# 0x007F70BBUL, 0x007F704DUL, 0x007F6FD9UL, 0x007F6F5FUL, 0x007F6EDFUL, +# 0x007F6E58UL, 0x007F6DCBUL, 0x007F6D37UL, 0x007F6C9CUL, 0x007F6BF9UL, +# 0x007F6B4FUL, 0x007F6A9CUL, 0x007F69E2UL, 0x007F691FUL, 0x007F6854UL, +# 0x007F677FUL, 0x007F66A1UL, 0x007F65B8UL, 0x007F64C6UL, 0x007F63C8UL, +# 0x007F62C0UL, 0x007F61ABUL, 0x007F608AUL, 0x007F5F5DUL, 0x007F5E21UL, +# 0x007F5CD8UL, 0x007F5B7FUL, 0x007F5A17UL, 0x007F589EUL, 0x007F5713UL, +# 0x007F5575UL, 0x007F53C4UL, 0x007F51FEUL, 0x007F5022UL, 0x007F4E2FUL, +# 0x007F4C22UL, 0x007F49FAUL, 0x007F47B6UL, 0x007F4553UL, 0x007F42CFUL, +# 0x007F4028UL, 0x007F3D5AUL, 0x007F3A64UL, 0x007F3741UL, 0x007F33EDUL, +# 0x007F3065UL, 0x007F2CA4UL, 0x007F28A4UL, 0x007F245FUL, 0x007F1FCEUL, +# 0x007F1AEAUL, 0x007F15A9UL, 0x007F1000UL, 0x007F09E4UL, 0x007F0346UL, +# 0x007EFC16UL, 0x007EF43EUL, 0x007EEBA8UL, 0x007EE237UL, 0x007ED7C8UL, +# 0x007ECC2FUL, 0x007EBF37UL, 0x007EB09DUL, 0x007EA00AUL, 0x007E8D0DUL, +# 0x007E7710UL, 0x007E5D47UL, 0x007E3E93UL, 0x007E1959UL, 0x007DEB2CUL, +# 0x007DB036UL, 0x007D6203UL, 0x007CF4B9UL, 0x007C4FD2UL, 0x007B3630UL, +# 0x0078D2D2UL}; +# +# static const float wi_float[] = { +# 4.66198677960027669255e-07f, 2.56588335019207033255e-08f, +# 3.41146697750176784592e-08f, 4.00230311410932959821e-08f, +# 4.47179475877737745459e-08f, 4.86837785973537366722e-08f, +# 5.21562578925932412861e-08f, 5.52695199001886257153e-08f, +# 5.81078488992733116465e-08f, 6.07279932024587421409e-08f, +# 6.31701613261172047795e-08f, 6.54639842900233842742e-08f, +# 6.76319905583641815324e-08f, 6.96917493470166688656e-08f, +# 7.16572544283857476692e-08f, 7.35398519048393832969e-08f, +# 7.53488822443557479279e-08f, 7.70921367281667127885e-08f, +# 7.87761895947956022626e-08f, 8.04066446825615346857e-08f, +# 8.19883218760237408659e-08f, 8.35254002936857088917e-08f, +# 8.50215298165053411740e-08f, 8.64799190652369040985e-08f, +# 8.79034055989140110861e-08f, 8.92945125124233511541e-08f, +# 9.06554945027956262312e-08f, 9.19883756905278607229e-08f, +# 9.32949809202232869780e-08f, 9.45769618559625849039e-08f, +# 9.58358188855612866442e-08f, 9.70729196232813152662e-08f, +# 9.82895146313061088986e-08f, 9.94867508514382224721e-08f, +# 1.00665683139461669691e-07f, 1.01827284217853923044e-07f, +# 1.02972453302539369464e-07f, 1.04102023612124921572e-07f, +# 1.05216768930574060431e-07f, 1.06317409364335657741e-07f, +# 1.07404616410877866490e-07f, 1.08479017436113134283e-07f, +# 1.09541199642370962438e-07f, 1.10591713595628691212e-07f, +# 1.11631076370069356306e-07f, 1.12659774359245895023e-07f, +# 1.13678265795837113569e-07f, 1.14686983015899673063e-07f, +# 1.15686334498432158725e-07f, 1.16676706706789039179e-07f, +# 1.17658465754873988919e-07f, 1.18631958917986203582e-07f, +# 1.19597516005596215528e-07f, 1.20555450611113917226e-07f, +# 1.21506061251817163689e-07f, 1.22449632410483948386e-07f, +# 1.23386435488872536840e-07f, 1.24316729681986364321e-07f, +# 1.25240762781015530062e-07f, 1.26158771911939892267e-07f, +# 1.27070984215989333455e-07f, 1.27977617477468922011e-07f, +# 1.28878880703854958297e-07f, 1.29774974662539874521e-07f, +# 1.30666092378141980504e-07f, 1.31552419593887221722e-07f, +# 1.32434135200211397569e-07f, 1.33311411633413359243e-07f, +# 1.34184415246907777059e-07f, 1.35053306657377859830e-07f, +# 1.35918241067904315860e-07f, 1.36779368569952053923e-07f, +# 1.37636834425917531047e-07f, 1.38490779333783508675e-07f, +# 1.39341339675287344817e-07f, 1.40188647748881762555e-07f, +# 1.41032831988654882776e-07f, 1.41874017170273235693e-07f, +# 1.42712324604921442006e-07f, 1.43547872322127921816e-07f, +# 1.44380775242292721080e-07f, 1.45211145339665544509e-07f, +# 1.46039091796461362146e-07f, 1.46864721148745476208e-07f, +# 1.47688137424670065700e-07f, 1.48509442275598857119e-07f, +# 1.49328735100614641423e-07f, 1.50146113164867617390e-07f, +# 1.50961671712187416111e-07f, 1.51775504072350982845e-07f, +# 1.52587701763369746341e-07f, 1.53398354589133671168e-07f, +# 1.54207550732725568797e-07f, 1.55015376845697999657e-07f, +# 1.55821918133584372604e-07f, 1.56627258437898192833e-07f, +# 1.57431480314857468671e-07f, 1.58234665111056041043e-07f, +# 1.59036893036289199880e-07f, 1.59838243233728855017e-07f, +# 1.60638793847630850137e-07f, 1.61438622088746393909e-07f, +# 1.62237804297600106296e-07f, 1.63036416005787357730e-07f, +# 1.63834531995435479082e-07f, 1.64632226356965902954e-07f, +# 1.65429572545287097020e-07f, 1.66226643434541294491e-07f, +# 1.67023511371523209274e-07f, 1.67820248227882200051e-07f, +# 1.68616925451215588827e-07f, 1.69413614115155757272e-07f, +# 1.70210384968549673733e-07f, 1.71007308483826142122e-07f, +# 1.71804454904642543391e-07f, 1.72601894292900061024e-07f, +# 1.73399696575213681990e-07f, 1.74197931588920988271e-07f, +# 1.74996669127712165834e-07f, 1.75795978986961275677e-07f, +# 1.76595931008838063924e-07f, 1.77396595127278238022e-07f, +# 1.78198041412889183130e-07f, 1.79000340117867431104e-07f, +# 1.79803561721004406185e-07f, 1.80607776972855859813e-07f, +# 1.81413056941151359868e-07f, 1.82219473056520464354e-07f, +# 1.83027097158612474240e-07f, 1.83836001542687613069e-07f, +# 1.84646259006759307383e-07f, 1.85457942899367347876e-07f, +# 1.86271127168064649331e-07f, 1.87085886408701333260e-07f, +# 1.87902295915592424729e-07f, 1.88720431732658022414e-07f, +# 1.89540370705627262627e-07f, 1.90362190535400839128e-07f, +# 1.91185969832669990437e-07f, 1.92011788173893651535e-07f, +# 1.92839726158739913768e-07f, 1.93669865469102145482e-07f, +# 1.94502288929804890433e-07f, 1.95337080571120616772e-07f, +# 1.96174325693223683314e-07f, 1.97014110932714374919e-07f, +# 1.97856524331352952716e-07f, 1.98701655407150388211e-07f, +# 1.99549595227971635348e-07f, 2.00400436487814600236e-07f, +# 2.01254273585938820883e-07f, 2.02111202709026498408e-07f, +# 2.02971321916571014951e-07f, 2.03834731229698846698e-07f, +# 2.04701532723644121196e-07f, 2.05571830624108885378e-07f, +# 2.06445731407757185541e-07f, 2.07323343907107312957e-07f, +# 2.08204779420104330037e-07f, 2.09090151824673600213e-07f, +# 2.09979577698577670508e-07f, 2.10873176444920111011e-07f, +# 2.11771070423665379388e-07f, 2.12673385089569268965e-07f, +# 2.13580249136944118603e-07f, 2.14491794651713402832e-07f, +# 2.15408157271244625533e-07f, 2.16329476352486921685e-07f, +# 2.17255895148978920488e-07f, 2.18187560997337924713e-07f, +# 2.19124625513888206785e-07f, 2.20067244802139479285e-07f, +# 2.21015579671883851683e-07f, 2.21969795870742159701e-07f, +# 2.22930064329060010376e-07f, 2.23896561419128954210e-07f, +# 2.24869469229791575583e-07f, 2.25848975857580322189e-07f, +# 2.26835275715640744118e-07f, 2.27828569861799901001e-07f, +# 2.28829066347263833069e-07f, 2.29836980587561823183e-07f, +# 2.30852535757505260518e-07f, 2.31875963212094114516e-07f, +# 2.32907502935486642699e-07f, 2.33947404020352726160e-07f, +# 2.34995925180156140289e-07f, 2.36053335297164516378e-07f, +# 2.37119914009265667728e-07f, 2.38195952338983970691e-07f, +# 2.39281753368440712742e-07f, 2.40377632964396957621e-07f, +# 2.41483920557958384709e-07f, 2.42600959984018662258e-07f, +# 2.43729110386077326413e-07f, 2.44868747192698939290e-07f, +# 2.46020263172594533433e-07f, 2.47184069576113545901e-07f, +# 2.48360597371852893654e-07f, 2.49550298588131851232e-07f, +# 2.50753647770270890721e-07f, 2.51971143565970967140e-07f, +# 2.53203310452642767375e-07f, 2.54450700622322097890e-07f, +# 2.55713896041856770961e-07f, 2.56993510708419870887e-07f, +# 2.58290193123138874550e-07f, 2.59604629008804833146e-07f, +# 2.60937544301314385690e-07f, 2.62289708448800566945e-07f, +# 2.63661938057441759882e-07f, 2.65055100928844238758e-07f, +# 2.66470120540847889467e-07f, 2.67907981031821866252e-07f, +# 2.69369732758258246335e-07f, 2.70856498507068313229e-07f, +# 2.72369480457841388042e-07f, 2.73909968006952220135e-07f, +# 2.75479346585437289399e-07f, 2.77079107626811561009e-07f, +# 2.78710859870496796972e-07f, 2.80376342222588603820e-07f, +# 2.82077438439999912690e-07f, 2.83816193958769527230e-07f, +# 2.85594835255375795814e-07f, 2.87415792215003905739e-07f, +# 2.89281724087851835900e-07f, 2.91195549750371467233e-07f, +# 2.93160483161771875581e-07f, 2.95180075129332912389e-07f, +# 2.97258262785797916083e-07f, 2.99399428561531794298e-07f, +# 3.01608470935804138388e-07f, 3.03890889921758510417e-07f, +# 3.06252891144972267537e-07f, 3.08701513613258141075e-07f, +# 3.11244787989714509378e-07f, 3.13891934589336184321e-07f, +# 3.16653613755314681314e-07f, 3.19542246256559459667e-07f, +# 3.22572428717978242099e-07f, 3.25761480217458181578e-07f, +# 3.29130173358915628534e-07f, 3.32703730345002116955e-07f, +# 3.36513208964639108346e-07f, 3.40597478255417943913e-07f, +# 3.45006114675213401550e-07f, 3.49803789521323211592e-07f, +# 3.55077180848341416206e-07f, 3.60946392031859609868e-07f, +# 3.67584959507244041831e-07f, 3.75257645787954431030e-07f, +# 3.84399301057791926300e-07f, 3.95804015855768440983e-07f, +# 4.11186015434435801956e-07f, 4.35608969373823260746e-07f}; +# +# static const float fi_float[] = { +# 1.00000000000000000000e+00f, 9.77101701267671596263e-01f, +# 9.59879091800106665211e-01f, 9.45198953442299649730e-01f, +# 9.32060075959230460718e-01f, 9.19991505039347012840e-01f, +# 9.08726440052130879366e-01f, 8.98095921898343418910e-01f, +# 8.87984660755833377088e-01f, 8.78309655808917399966e-01f, +# 8.69008688036857046555e-01f, 8.60033621196331532488e-01f, +# 8.51346258458677951353e-01f, 8.42915653112204177333e-01f, +# 8.34716292986883434679e-01f, 8.26726833946221373317e-01f, +# 8.18929191603702366642e-01f, 8.11307874312656274185e-01f, +# 8.03849483170964274059e-01f, 7.96542330422958966274e-01f, +# 7.89376143566024590648e-01f, 7.82341832654802504798e-01f, +# 7.75431304981187174974e-01f, 7.68637315798486264740e-01f, +# 7.61953346836795386565e-01f, 7.55373506507096115214e-01f, +# 7.48892447219156820459e-01f, 7.42505296340151055290e-01f, +# 7.36207598126862650112e-01f, 7.29995264561476231435e-01f, +# 7.23864533468630222401e-01f, 7.17811932630721960535e-01f, +# 7.11834248878248421200e-01f, 7.05928501332754310127e-01f, +# 7.00091918136511615067e-01f, 6.94321916126116711609e-01f, +# 6.88616083004671808432e-01f, 6.82972161644994857355e-01f, +# 6.77388036218773526009e-01f, 6.71861719897082099173e-01f, +# 6.66391343908750100056e-01f, 6.60975147776663107813e-01f, +# 6.55611470579697264149e-01f, 6.50298743110816701574e-01f, +# 6.45035480820822293424e-01f, 6.39820277453056585060e-01f, +# 6.34651799287623608059e-01f, 6.29528779924836690007e-01f, +# 6.24450015547026504592e-01f, 6.19414360605834324325e-01f, +# 6.14420723888913888899e-01f, 6.09468064925773433949e-01f, +# 6.04555390697467776029e-01f, 5.99681752619125263415e-01f, +# 5.94846243767987448159e-01f, 5.90047996332826008015e-01f, +# 5.85286179263371453274e-01f, 5.80559996100790898232e-01f, +# 5.75868682972353718164e-01f, 5.71211506735253227163e-01f, +# 5.66587763256164445025e-01f, 5.61996775814524340831e-01f, +# 5.57437893618765945014e-01f, 5.52910490425832290562e-01f, +# 5.48413963255265812791e-01f, 5.43947731190026262382e-01f, +# 5.39511234256952132426e-01f, 5.35103932380457614215e-01f, +# 5.30725304403662057062e-01f, 5.26374847171684479008e-01f, +# 5.22052074672321841931e-01f, 5.17756517229756352272e-01f, +# 5.13487720747326958914e-01f, 5.09245245995747941592e-01f, +# 5.05028667943468123624e-01f, 5.00837575126148681903e-01f, +# 4.96671569052489714213e-01f, 4.92530263643868537748e-01f, +# 4.88413284705458028423e-01f, 4.84320269426683325253e-01f, +# 4.80250865909046753544e-01f, 4.76204732719505863248e-01f, +# 4.72181538467730199660e-01f, 4.68180961405693596422e-01f, +# 4.64202689048174355069e-01f, 4.60246417812842867345e-01f, +# 4.56311852678716434184e-01f, 4.52398706861848520777e-01f, +# 4.48506701507203064949e-01f, 4.44635565395739396077e-01f, +# 4.40785034665803987508e-01f, 4.36954852547985550526e-01f, +# 4.33144769112652261445e-01f, 4.29354541029441427735e-01f, +# 4.25583931338021970170e-01f, 4.21832709229495894654e-01f, +# 4.18100649837848226120e-01f, 4.14387534040891125642e-01f, +# 4.10693148270188157500e-01f, 4.07017284329473372217e-01f, +# 4.03359739221114510510e-01f, 3.99720314980197222177e-01f, +# 3.96098818515832451492e-01f, 3.92495061459315619512e-01f, +# 3.88908860018788715696e-01f, 3.85340034840077283462e-01f, +# 3.81788410873393657674e-01f, 3.78253817245619183840e-01f, +# 3.74736087137891138443e-01f, 3.71235057668239498696e-01f, +# 3.67750569779032587814e-01f, 3.64282468129004055601e-01f, +# 3.60830600989648031529e-01f, 3.57394820145780500731e-01f, +# 3.53974980800076777232e-01f, 3.50570941481406106455e-01f, +# 3.47182563956793643900e-01f, 3.43809713146850715049e-01f, +# 3.40452257044521866547e-01f, 3.37110066637006045021e-01f, +# 3.33783015830718454708e-01f, 3.30470981379163586400e-01f, +# 3.27173842813601400970e-01f, 3.23891482376391093290e-01f, +# 3.20623784956905355514e-01f, 3.17370638029913609834e-01f, +# 3.14131931596337177215e-01f, 3.10907558126286509559e-01f, +# 3.07697412504292056035e-01f, 3.04501391976649993243e-01f, +# 3.01319396100803049698e-01f, 2.98151326696685481377e-01f, +# 2.94997087799961810184e-01f, 2.91856585617095209972e-01f, +# 2.88729728482182923521e-01f, 2.85616426815501756042e-01f, +# 2.82516593083707578948e-01f, 2.79430141761637940157e-01f, +# 2.76356989295668320494e-01f, 2.73297054068577072172e-01f, +# 2.70250256365875463072e-01f, 2.67216518343561471038e-01f, +# 2.64195763997261190426e-01f, 2.61187919132721213522e-01f, +# 2.58192911337619235290e-01f, 2.55210669954661961700e-01f, +# 2.52241126055942177508e-01f, 2.49284212418528522415e-01f, +# 2.46339863501263828249e-01f, 2.43408015422750312329e-01f, +# 2.40488605940500588254e-01f, 2.37581574431238090606e-01f, +# 2.34686861872330010392e-01f, 2.31804410824338724684e-01f, +# 2.28934165414680340644e-01f, 2.26076071322380278694e-01f, +# 2.23230075763917484855e-01f, 2.20396127480151998723e-01f, +# 2.17574176724331130872e-01f, 2.14764175251173583536e-01f, +# 2.11966076307030182324e-01f, 2.09179834621125076977e-01f, +# 2.06405406397880797353e-01f, 2.03642749310334908452e-01f, +# 2.00891822494656591136e-01f, 1.98152586545775138971e-01f, +# 1.95425003514134304483e-01f, 1.92709036903589175926e-01f, +# 1.90004651670464985713e-01f, 1.87311814223800304768e-01f, +# 1.84630492426799269756e-01f, 1.81960655599522513892e-01f, +# 1.79302274522847582272e-01f, 1.76655321443734858455e-01f, +# 1.74019770081838553999e-01f, 1.71395595637505754327e-01f, +# 1.68782774801211288285e-01f, 1.66181285764481906364e-01f, +# 1.63591108232365584074e-01f, 1.61012223437511009516e-01f, +# 1.58444614155924284882e-01f, 1.55888264724479197465e-01f, +# 1.53343161060262855866e-01f, 1.50809290681845675763e-01f, +# 1.48286642732574552861e-01f, 1.45775208005994028060e-01f, +# 1.43274978973513461566e-01f, 1.40785949814444699690e-01f, +# 1.38308116448550733057e-01f, 1.35841476571253755301e-01f, +# 1.33386029691669155683e-01f, 1.30941777173644358090e-01f, +# 1.28508722279999570981e-01f, 1.26086870220185887081e-01f, +# 1.23676228201596571932e-01f, 1.21276805484790306533e-01f, +# 1.18888613442910059947e-01f, 1.16511665625610869035e-01f, +# 1.14145977827838487895e-01f, 1.11791568163838089811e-01f, +# 1.09448457146811797824e-01f, 1.07116667774683801961e-01f, +# 1.04796225622487068629e-01f, 1.02487158941935246892e-01f, +# 1.00189498768810017482e-01f, 9.79032790388624646338e-02f, +# 9.56285367130089991594e-02f, 9.33653119126910124859e-02f, +# 9.11136480663737591268e-02f, 8.88735920682758862021e-02f, +# 8.66451944505580717859e-02f, 8.44285095703534715916e-02f, +# 8.22235958132029043366e-02f, 8.00305158146630696292e-02f, +# 7.78493367020961224423e-02f, 7.56801303589271778804e-02f, +# 7.35229737139813238622e-02f, 7.13779490588904025339e-02f, +# 6.92451443970067553879e-02f, 6.71246538277884968737e-02f, +# 6.50165779712428976156e-02f, 6.29210244377581412456e-02f, +# 6.08381083495398780614e-02f, 5.87679529209337372930e-02f, +# 5.67106901062029017391e-02f, 5.46664613248889208474e-02f, +# 5.26354182767921896513e-02f, 5.06177238609477817000e-02f, +# 4.86135532158685421122e-02f, 4.66230949019303814174e-02f, +# 4.46465522512944634759e-02f, 4.26841449164744590750e-02f, +# 4.07361106559409394401e-02f, 3.88027074045261474722e-02f, +# 3.68842156885673053135e-02f, 3.49809414617161251737e-02f, +# 3.30932194585785779961e-02f, 3.12214171919203004046e-02f, +# 2.93659397581333588001e-02f, 2.75272356696031131329e-02f, +# 2.57058040085489103443e-02f, 2.39022033057958785407e-02f, +# 2.21170627073088502113e-02f, 2.03510962300445102935e-02f, +# 1.86051212757246224594e-02f, 1.68800831525431419000e-02f, +# 1.51770883079353092332e-02f, 1.34974506017398673818e-02f, +# 1.18427578579078790488e-02f, 1.02149714397014590439e-02f, +# 8.61658276939872638800e-03f, 7.05087547137322242369e-03f, +# 5.52240329925099155545e-03f, 4.03797259336302356153e-03f, +# 2.60907274610215926189e-03f, 1.26028593049859797236e-03f}; +# +# static const uint64_t ke_double[] = { +# 0x001C5214272497C6, 0x0000000000000000, 0x00137D5BD79C317E, +# 0x00186EF58E3F3C10, 0x001A9BB7320EB0AE, 0x001BD127F719447C, +# 0x001C951D0F88651A, 0x001D1BFE2D5C3972, 0x001D7E5BD56B18B2, +# 0x001DC934DD172C70, 0x001E0409DFAC9DC8, 0x001E337B71D47836, +# 0x001E5A8B177CB7A2, 0x001E7B42096F046C, 0x001E970DAF08AE3E, +# 0x001EAEF5B14EF09E, 0x001EC3BD07B46556, 0x001ED5F6F08799CE, +# 0x001EE614AE6E5688, 0x001EF46ECA361CD0, 0x001F014B76DDD4A4, +# 0x001F0CE313A796B6, 0x001F176369F1F77A, 0x001F20F20C452570, +# 0x001F29AE1951A874, 0x001F31B18FB95532, 0x001F39125157C106, +# 0x001F3FE2EB6E694C, 0x001F463332D788FA, 0x001F4C10BF1D3A0E, +# 0x001F51874C5C3322, 0x001F56A109C3ECC0, 0x001F5B66D9099996, +# 0x001F5FE08210D08C, 0x001F6414DD445772, 0x001F6809F6859678, +# 0x001F6BC52A2B02E6, 0x001F6F4B3D32E4F4, 0x001F72A07190F13A, +# 0x001F75C8974D09D6, 0x001F78C71B045CC0, 0x001F7B9F12413FF4, +# 0x001F7E5346079F8A, 0x001F80E63BE21138, 0x001F835A3DAD9162, +# 0x001F85B16056B912, 0x001F87ED89B24262, 0x001F8A10759374FA, +# 0x001F8C1BBA3D39AC, 0x001F8E10CC45D04A, 0x001F8FF102013E16, +# 0x001F91BD968358E0, 0x001F9377AC47AFD8, 0x001F95204F8B64DA, +# 0x001F96B878633892, 0x001F98410C968892, 0x001F99BAE146BA80, +# 0x001F9B26BC697F00, 0x001F9C85561B717A, 0x001F9DD759CFD802, +# 0x001F9F1D6761A1CE, 0x001FA058140936C0, 0x001FA187EB3A3338, +# 0x001FA2AD6F6BC4FC, 0x001FA3C91ACE0682, 0x001FA4DB5FEE6AA2, +# 0x001FA5E4AA4D097C, 0x001FA6E55EE46782, 0x001FA7DDDCA51EC4, +# 0x001FA8CE7CE6A874, 0x001FA9B793CE5FEE, 0x001FAA9970ADB858, +# 0x001FAB745E588232, 0x001FAC48A3740584, 0x001FAD1682BF9FE8, +# 0x001FADDE3B5782C0, 0x001FAEA008F21D6C, 0x001FAF5C2418B07E, +# 0x001FB012C25B7A12, 0x001FB0C41681DFF4, 0x001FB17050B6F1FA, +# 0x001FB2179EB2963A, 0x001FB2BA2BDFA84A, 0x001FB358217F4E18, +# 0x001FB3F1A6C9BE0C, 0x001FB486E10CACD6, 0x001FB517F3C793FC, +# 0x001FB5A500C5FDAA, 0x001FB62E2837FE58, 0x001FB6B388C9010A, +# 0x001FB7353FB50798, 0x001FB7B368DC7DA8, 0x001FB82E1ED6BA08, +# 0x001FB8A57B0347F6, 0x001FB919959A0F74, 0x001FB98A85BA7204, +# 0x001FB9F861796F26, 0x001FBA633DEEE286, 0x001FBACB2F41EC16, +# 0x001FBB3048B49144, 0x001FBB929CAEA4E2, 0x001FBBF23CC8029E, +# 0x001FBC4F39D22994, 0x001FBCA9A3E140D4, 0x001FBD018A548F9E, +# 0x001FBD56FBDE729C, 0x001FBDAA068BD66A, 0x001FBDFAB7CB3F40, +# 0x001FBE491C7364DE, 0x001FBE9540C9695E, 0x001FBEDF3086B128, +# 0x001FBF26F6DE6174, 0x001FBF6C9E828AE2, 0x001FBFB031A904C4, +# 0x001FBFF1BA0FFDB0, 0x001FC03141024588, 0x001FC06ECF5B54B2, +# 0x001FC0AA6D8B1426, 0x001FC0E42399698A, 0x001FC11BF9298A64, +# 0x001FC151F57D1942, 0x001FC1861F770F4A, 0x001FC1B87D9E74B4, +# 0x001FC1E91620EA42, 0x001FC217EED505DE, 0x001FC2450D3C83FE, +# 0x001FC27076864FC2, 0x001FC29A2F90630E, 0x001FC2C23CE98046, +# 0x001FC2E8A2D2C6B4, 0x001FC30D654122EC, 0x001FC33087DE9C0E, +# 0x001FC3520E0B7EC6, 0x001FC371FADF66F8, 0x001FC390512A2886, +# 0x001FC3AD137497FA, 0x001FC3C844013348, 0x001FC3E1E4CCAB40, +# 0x001FC3F9F78E4DA8, 0x001FC4107DB85060, 0x001FC4257877FD68, +# 0x001FC438E8B5BFC6, 0x001FC44ACF15112A, 0x001FC45B2BF447E8, +# 0x001FC469FF6C4504, 0x001FC477495001B2, 0x001FC483092BFBB8, +# 0x001FC48D3E457FF6, 0x001FC495E799D21A, 0x001FC49D03DD30B0, +# 0x001FC4A29179B432, 0x001FC4A68E8E07FC, 0x001FC4A8F8EBFB8C, +# 0x001FC4A9CE16EA9E, 0x001FC4A90B41FA34, 0x001FC4A6AD4E28A0, +# 0x001FC4A2B0C82E74, 0x001FC49D11E62DE2, 0x001FC495CC852DF4, +# 0x001FC48CDC265EC0, 0x001FC4823BEC237A, 0x001FC475E696DEE6, +# 0x001FC467D6817E82, 0x001FC458059DC036, 0x001FC4466D702E20, +# 0x001FC433070BCB98, 0x001FC41DCB0D6E0E, 0x001FC406B196BBF6, +# 0x001FC3EDB248CB62, 0x001FC3D2C43E593C, 0x001FC3B5DE0591B4, +# 0x001FC396F599614C, 0x001FC376005A4592, 0x001FC352F3069370, +# 0x001FC32DC1B22818, 0x001FC3065FBD7888, 0x001FC2DCBFCBF262, +# 0x001FC2B0D3B99F9E, 0x001FC2828C8FFCF0, 0x001FC251DA79F164, +# 0x001FC21EACB6D39E, 0x001FC1E8F18C6756, 0x001FC1B09637BB3C, +# 0x001FC17586DCCD10, 0x001FC137AE74D6B6, 0x001FC0F6F6BB2414, +# 0x001FC0B348184DA4, 0x001FC06C898BAFF0, 0x001FC022A092F364, +# 0x001FBFD5710F72B8, 0x001FBF84DD29488E, 0x001FBF30C52FC60A, +# 0x001FBED907770CC6, 0x001FBE7D80327DDA, 0x001FBE1E094BA614, +# 0x001FBDBA7A354408, 0x001FBD52A7B9F826, 0x001FBCE663C6201A, +# 0x001FBC757D2C4DE4, 0x001FBBFFBF63B7AA, 0x001FBB84F23FE6A2, +# 0x001FBB04D9A0D18C, 0x001FBA7F351A70AC, 0x001FB9F3BF92B618, +# 0x001FB9622ED4ABFC, 0x001FB8CA33174A16, 0x001FB82B76765B54, +# 0x001FB7859C5B895C, 0x001FB6D840D55594, 0x001FB622F7D96942, +# 0x001FB5654C6F37E0, 0x001FB49EBFBF69D2, 0x001FB3CEC803E746, +# 0x001FB2F4CF539C3E, 0x001FB21032442852, 0x001FB1203E5A9604, +# 0x001FB0243042E1C2, 0x001FAF1B31C479A6, 0x001FAE045767E104, +# 0x001FACDE9DBF2D72, 0x001FABA8E640060A, 0x001FAA61F399FF28, +# 0x001FA908656F66A2, 0x001FA79AB3508D3C, 0x001FA61726D1F214, +# 0x001FA47BD48BEA00, 0x001FA2C693C5C094, 0x001FA0F4F47DF314, +# 0x001F9F04336BBE0A, 0x001F9CF12B79F9BC, 0x001F9AB84415ABC4, +# 0x001F98555B782FB8, 0x001F95C3ABD03F78, 0x001F92FDA9CEF1F2, +# 0x001F8FFCDA9AE41C, 0x001F8CB99E7385F8, 0x001F892AEC479606, +# 0x001F8545F904DB8E, 0x001F80FDC336039A, 0x001F7C427839E926, +# 0x001F7700A3582ACC, 0x001F71200F1A241C, 0x001F6A8234B7352A, +# 0x001F630000A8E266, 0x001F5A66904FE3C4, 0x001F50724ECE1172, +# 0x001F44C7665C6FDA, 0x001F36E5A38A59A2, 0x001F26143450340A, +# 0x001F113E047B0414, 0x001EF6AEFA57CBE6, 0x001ED38CA188151E, +# 0x001EA2A61E122DB0, 0x001E5961C78B267C, 0x001DDDF62BAC0BB0, +# 0x001CDB4DD9E4E8C0}; +# +# static const double we_double[] = { +# 9.655740063209182975e-16, 7.089014243955414331e-18, +# 1.163941249669122378e-17, 1.524391512353216015e-17, +# 1.833284885723743916e-17, 2.108965109464486630e-17, +# 2.361128077843138196e-17, 2.595595772310893952e-17, +# 2.816173554197752338e-17, 3.025504130321382330e-17, +# 3.225508254836375280e-17, 3.417632340185027033e-17, +# 3.602996978734452488e-17, 3.782490776869649048e-17, +# 3.956832198097553231e-17, 4.126611778175946428e-17, +# 4.292321808442525631e-17, 4.454377743282371417e-17, +# 4.613133981483185932e-17, 4.768895725264635940e-17, +# 4.921928043727962847e-17, 5.072462904503147014e-17, +# 5.220704702792671737e-17, 5.366834661718192181e-17, +# 5.511014372835094717e-17, 5.653388673239667134e-17, +# 5.794088004852766616e-17, 5.933230365208943081e-17, +# 6.070922932847179572e-17, 6.207263431163193485e-17, +# 6.342341280303076511e-17, 6.476238575956142121e-17, +# 6.609030925769405241e-17, 6.740788167872722244e-17, +# 6.871574991183812442e-17, 7.001451473403929616e-17, +# 7.130473549660643409e-17, 7.258693422414648352e-17, +# 7.386159921381791997e-17, 7.512918820723728089e-17, +# 7.639013119550825792e-17, 7.764483290797848102e-17, +# 7.889367502729790548e-17, 8.013701816675454434e-17, +# 8.137520364041762206e-17, 8.260855505210038174e-17, +# 8.383737972539139383e-17, 8.506196999385323132e-17, +# 8.628260436784112996e-17, 8.749954859216182511e-17, +# 8.871305660690252281e-17, 8.992337142215357066e-17, +# 9.113072591597909173e-17, 9.233534356381788123e-17, +# 9.353743910649128938e-17, 9.473721916312949566e-17, +# 9.593488279457997317e-17, 9.713062202221521206e-17, +# 9.832462230649511362e-17, 9.951706298915071878e-17, +# 1.007081177024294931e-16, 1.018979547484694078e-16, +# 1.030867374515421954e-16, 1.042746244856188556e-16, +# 1.054617701794576406e-16, 1.066483248011914702e-16, +# 1.078344348241948498e-16, 1.090202431758350473e-16, +# 1.102058894705578110e-16, 1.113915102286197502e-16, +# 1.125772390816567488e-16, 1.137632069661684705e-16, +# 1.149495423059009298e-16, 1.161363711840218308e-16, +# 1.173238175059045788e-16, 1.185120031532669434e-16, +# 1.197010481303465158e-16, 1.208910707027385520e-16, +# 1.220821875294706151e-16, 1.232745137888415193e-16, +# 1.244681632985112523e-16, 1.256632486302898513e-16, +# 1.268598812200397542e-16, 1.280581714730749379e-16, +# 1.292582288654119552e-16, 1.304601620412028847e-16, +# 1.316640789066572582e-16, 1.328700867207380889e-16, +# 1.340782921828999433e-16, 1.352888015181175458e-16, +# 1.365017205594397770e-16, 1.377171548282880964e-16, +# 1.389352096127063919e-16, 1.401559900437571538e-16, +# 1.413796011702485188e-16, 1.426061480319665444e-16, +# 1.438357357315790180e-16, 1.450684695053687684e-16, +# 1.463044547929475721e-16, 1.475437973060951633e-16, +# 1.487866030968626066e-16, 1.500329786250736949e-16, +# 1.512830308253539427e-16, 1.525368671738125550e-16, +# 1.537945957544996933e-16, 1.550563253257577148e-16, +# 1.563221653865837505e-16, 1.575922262431176140e-16, +# 1.588666190753684151e-16, 1.601454560042916733e-16, +# 1.614288501593278662e-16, 1.627169157465130500e-16, +# 1.640097681172717950e-16, 1.653075238380036909e-16, +# 1.666103007605742067e-16, 1.679182180938228863e-16, +# 1.692313964762022267e-16, 1.705499580496629830e-16, +# 1.718740265349031656e-16, 1.732037273081008369e-16, +# 1.745391874792533975e-16, 1.758805359722491379e-16, +# 1.772279036068006489e-16, 1.785814231823732619e-16, +# 1.799412295642463721e-16, 1.813074597718501559e-16, +# 1.826802530695252266e-16, 1.840597510598587828e-16, +# 1.854460977797569461e-16, 1.868394397994192684e-16, +# 1.882399263243892051e-16, 1.896477093008616722e-16, +# 1.910629435244376536e-16, 1.924857867525243818e-16, +# 1.939163998205899420e-16, 1.953549467624909132e-16, +# 1.968015949351037382e-16, 1.982565151475019047e-16, +# 1.997198817949342081e-16, 2.011918729978734671e-16, +# 2.026726707464198289e-16, 2.041624610503588774e-16, +# 2.056614340951917875e-16, 2.071697844044737034e-16, +# 2.086877110088159721e-16, 2.102154176219292789e-16, +# 2.117531128241075913e-16, 2.133010102535779087e-16, +# 2.148593288061663316e-16, 2.164282928437604723e-16, +# 2.180081324120784027e-16, 2.195990834682870728e-16, +# 2.212013881190495942e-16, 2.228152948696180545e-16, +# 2.244410588846308588e-16, 2.260789422613173739e-16, +# 2.277292143158621037e-16, 2.293921518837311354e-16, +# 2.310680396348213318e-16, 2.327571704043534613e-16, +# 2.344598455404957859e-16, 2.361763752697773994e-16, +# 2.379070790814276700e-16, 2.396522861318623520e-16, +# 2.414123356706293277e-16, 2.431875774892255956e-16, +# 2.449783723943070217e-16, 2.467850927069288738e-16, +# 2.486081227895851719e-16, 2.504478596029557040e-16, +# 2.523047132944217013e-16, 2.541791078205812227e-16, +# 2.560714816061770759e-16, 2.579822882420530896e-16, +# 2.599119972249746917e-16, 2.618610947423924219e-16, +# 2.638300845054942823e-16, 2.658194886341845120e-16, +# 2.678298485979525166e-16, 2.698617262169488933e-16, +# 2.719157047279818500e-16, 2.739923899205814823e-16, +# 2.760924113487617126e-16, 2.782164236246436081e-16, +# 2.803651078006983464e-16, 2.825391728480253184e-16, +# 2.847393572388174091e-16, 2.869664306419817679e-16, +# 2.892211957417995598e-16, 2.915044901905293183e-16, +# 2.938171887070028633e-16, 2.961602053345465687e-16, +# 2.985344958730045276e-16, 3.009410605012618141e-16, +# 3.033809466085003416e-16, 3.058552518544860874e-16, +# 3.083651274815310004e-16, 3.109117819034266344e-16, +# 3.134964845996663118e-16, 3.161205703467105734e-16, +# 3.187854438219713117e-16, 3.214925846206797361e-16, +# 3.242435527309451638e-16, 3.270399945182240440e-16, +# 3.298836492772283149e-16, 3.327763564171671408e-16, +# 3.357200633553244075e-16, 3.387168342045505162e-16, +# 3.417688593525636996e-16, 3.448784660453423890e-16, +# 3.480481301037442286e-16, 3.512804889222979418e-16, +# 3.545783559224791863e-16, 3.579447366604276541e-16, +# 3.613828468219060593e-16, 3.648961323764542545e-16, +# 3.684882922095621322e-16, 3.721633036080207290e-16, +# 3.759254510416256532e-16, 3.797793587668874387e-16, +# 3.837300278789213687e-16, 3.877828785607895292e-16, +# 3.919437984311428867e-16, 3.962191980786774996e-16, +# 4.006160751056541688e-16, 4.051420882956573177e-16, +# 4.098056438903062509e-16, 4.146159964290904582e-16, +# 4.195833672073398926e-16, 4.247190841824385048e-16, +# 4.300357481667470702e-16, 4.355474314693952008e-16, +# 4.412699169036069903e-16, 4.472209874259932285e-16, +# 4.534207798565834480e-16, 4.598922204905932469e-16, +# 4.666615664711475780e-16, 4.737590853262492027e-16, +# 4.812199172829237933e-16, 4.890851827392209900e-16, +# 4.974034236191939753e-16, 5.062325072144159699e-16, +# 5.156421828878082953e-16, 5.257175802022274839e-16, +# 5.365640977112021618e-16, 5.483144034258703912e-16, +# 5.611387454675159622e-16, 5.752606481503331688e-16, +# 5.909817641652102998e-16, 6.087231416180907671e-16, +# 6.290979034877557049e-16, 6.530492053564040799e-16, +# 6.821393079028928626e-16, 7.192444966089361564e-16, +# 7.706095350032096755e-16, 8.545517038584027421e-16}; +# +# static const double fe_double[] = { +# 1.000000000000000000e+00, 9.381436808621747003e-01, +# 9.004699299257464817e-01, 8.717043323812035949e-01, +# 8.477855006239896074e-01, 8.269932966430503241e-01, +# 8.084216515230083777e-01, 7.915276369724956185e-01, +# 7.759568520401155522e-01, 7.614633888498962833e-01, +# 7.478686219851951034e-01, 7.350380924314234843e-01, +# 7.228676595935720206e-01, 7.112747608050760117e-01, +# 7.001926550827881623e-01, 6.895664961170779872e-01, +# 6.793505722647653622e-01, 6.695063167319247333e-01, +# 6.600008410789997004e-01, 6.508058334145710999e-01, +# 6.418967164272660897e-01, 6.332519942143660652e-01, +# 6.248527387036659775e-01, 6.166821809152076561e-01, +# 6.087253820796220127e-01, 6.009689663652322267e-01, +# 5.934009016917334289e-01, 5.860103184772680329e-01, +# 5.787873586028450257e-01, 5.717230486648258170e-01, +# 5.648091929124001709e-01, 5.580382822625874484e-01, +# 5.514034165406412891e-01, 5.448982376724396115e-01, +# 5.385168720028619127e-01, 5.322538802630433219e-01, +# 5.261042139836197284e-01, 5.200631773682335979e-01, +# 5.141263938147485613e-01, 5.082897764106428795e-01, +# 5.025495018413477233e-01, 4.969019872415495476e-01, +# 4.913438695940325340e-01, 4.858719873418849144e-01, +# 4.804833639304542103e-01, 4.751751930373773747e-01, +# 4.699448252839599771e-01, 4.647897562504261781e-01, +# 4.597076156421376902e-01, 4.546961574746155033e-01, +# 4.497532511627549967e-01, 4.448768734145485126e-01, +# 4.400651008423538957e-01, 4.353161032156365740e-01, +# 4.306281372884588343e-01, 4.259995411430343437e-01, +# 4.214287289976165751e-01, 4.169141864330028757e-01, +# 4.124544659971611793e-01, 4.080481831520323954e-01, +# 4.036940125305302773e-01, 3.993906844752310725e-01, +# 3.951369818332901573e-01, 3.909317369847971069e-01, +# 3.867738290841376547e-01, 3.826621814960098344e-01, +# 3.785957594095807899e-01, 3.745735676159021588e-01, +# 3.705946484351460013e-01, 3.666580797815141568e-01, +# 3.627629733548177748e-01, 3.589084729487497794e-01, +# 3.550937528667874599e-01, 3.513180164374833381e-01, +# 3.475804946216369817e-01, 3.438804447045024082e-01, +# 3.402171490667800224e-01, 3.365899140286776059e-01, +# 3.329980687618089852e-01, 3.294409642641363267e-01, +# 3.259179723935561879e-01, 3.224284849560891675e-01, +# 3.189719128449572394e-01, 3.155476852271289490e-01, +# 3.121552487741795501e-01, 3.087940669345601852e-01, +# 3.054636192445902565e-01, 3.021634006756935276e-01, +# 2.988929210155817917e-01, 2.956517042812611962e-01, +# 2.924392881618925744e-01, 2.892552234896777485e-01, +# 2.860990737370768255e-01, 2.829704145387807457e-01, +# 2.798688332369729248e-01, 2.767939284485173568e-01, +# 2.737453096528029706e-01, 2.707225967990600224e-01, +# 2.677254199320447947e-01, 2.647534188350622042e-01, +# 2.618062426893629779e-01, 2.588835497490162285e-01, +# 2.559850070304153791e-01, 2.531102900156294577e-01, +# 2.502590823688622956e-01, 2.474310756653276266e-01, +# 2.446259691318921070e-01, 2.418434693988772144e-01, +# 2.390832902624491774e-01, 2.363451524570596429e-01, +# 2.336287834374333461e-01, 2.309339171696274118e-01, +# 2.282602939307167011e-01, 2.256076601166840667e-01, +# 2.229757680581201940e-01, 2.203643758433594946e-01, +# 2.177732471487005272e-01, 2.152021510753786837e-01, +# 2.126508619929782795e-01, 2.101191593889882581e-01, +# 2.076068277242220372e-01, 2.051136562938377095e-01, +# 2.026394390937090173e-01, 2.001839746919112650e-01, +# 1.977470661050988732e-01, 1.953285206795632167e-01, +# 1.929281499767713515e-01, 1.905457696631953912e-01, +# 1.881811994042543179e-01, 1.858342627621971110e-01, +# 1.835047870977674633e-01, 1.811926034754962889e-01, +# 1.788975465724783054e-01, 1.766194545904948843e-01, +# 1.743581691713534942e-01, 1.721135353153200598e-01, +# 1.698854013025276610e-01, 1.676736186172501919e-01, +# 1.654780418749360049e-01, 1.632985287519018169e-01, +# 1.611349399175920349e-01, 1.589871389693142123e-01, +# 1.568549923693652315e-01, 1.547383693844680830e-01, +# 1.526371420274428570e-01, 1.505511850010398944e-01, +# 1.484803756438667910e-01, 1.464245938783449441e-01, +# 1.443837221606347754e-01, 1.423576454324722018e-01, +# 1.403462510748624548e-01, 1.383494288635802039e-01, +# 1.363670709264288572e-01, 1.343990717022136294e-01, +# 1.324453279013875218e-01, 1.305057384683307731e-01, +# 1.285802045452281717e-01, 1.266686294375106714e-01, +# 1.247709185808309612e-01, 1.228869795095451356e-01, +# 1.210167218266748335e-01, 1.191600571753276827e-01, +# 1.173168992115555670e-01, 1.154871635786335338e-01, +# 1.136707678827443141e-01, 1.118676316700562973e-01, +# 1.100776764051853845e-01, 1.083008254510337970e-01, +# 1.065370040500016602e-01, 1.047861393065701724e-01, +# 1.030481601712577161e-01, 1.013229974259536315e-01, +# 9.961058367063713170e-02, 9.791085331149219917e-02, +# 9.622374255043279756e-02, 9.454918937605585882e-02, +# 9.288713355604354127e-02, 9.123751663104015530e-02, +# 8.960028191003285847e-02, 8.797537446727021759e-02, +# 8.636274114075691288e-02, 8.476233053236811865e-02, +# 8.317409300963238272e-02, 8.159798070923741931e-02, +# 8.003394754231990538e-02, 7.848194920160642130e-02, +# 7.694194317048050347e-02, 7.541388873405840965e-02, +# 7.389774699236474620e-02, 7.239348087570873780e-02, +# 7.090105516237182881e-02, 6.942043649872875477e-02, +# 6.795159342193660135e-02, 6.649449638533977414e-02, +# 6.504911778675374900e-02, 6.361543199980733421e-02, +# 6.219341540854099459e-02, 6.078304644547963265e-02, +# 5.938430563342026597e-02, 5.799717563120065922e-02, +# 5.662164128374287675e-02, 5.525768967669703741e-02, +# 5.390531019604608703e-02, 5.256449459307169225e-02, +# 5.123523705512628146e-02, 4.991753428270637172e-02, +# 4.861138557337949667e-02, 4.731679291318154762e-02, +# 4.603376107617516977e-02, 4.476229773294328196e-02, +# 4.350241356888818328e-02, 4.225412241331623353e-02, +# 4.101744138041481941e-02, 3.979239102337412542e-02, +# 3.857899550307485742e-02, 3.737728277295936097e-02, +# 3.618728478193142251e-02, 3.500903769739741045e-02, +# 3.384258215087432992e-02, 3.268796350895953468e-02, +# 3.154523217289360859e-02, 3.041444391046660423e-02, +# 2.929566022463739317e-02, 2.818894876397863569e-02, +# 2.709438378095579969e-02, 2.601204664513421735e-02, +# 2.494202641973178314e-02, 2.388442051155817078e-02, +# 2.283933540638524023e-02, 2.180688750428358066e-02, +# 2.078720407257811723e-02, 1.978042433800974303e-02, +# 1.878670074469603046e-02, 1.780620041091136169e-02, +# 1.683910682603994777e-02, 1.588562183997316302e-02, +# 1.494596801169114850e-02, 1.402039140318193759e-02, +# 1.310916493125499106e-02, 1.221259242625538123e-02, +# 1.133101359783459695e-02, 1.046481018102997894e-02, +# 9.614413642502209895e-03, 8.780314985808975251e-03, +# 7.963077438017040002e-03, 7.163353183634983863e-03, +# 6.381905937319179087e-03, 5.619642207205483020e-03, +# 4.877655983542392333e-03, 4.157295120833795314e-03, +# 3.460264777836904049e-03, 2.788798793574076128e-03, +# 2.145967743718906265e-03, 1.536299780301572356e-03, +# 9.672692823271745359e-04, 4.541343538414967652e-04}; +# +# static const uint32_t ke_float[] = { +# 0x00714851UL, 0x00000000UL, 0x004DF56FUL, 0x0061BBD6UL, 0x006A6EDDUL, +# 0x006F44A0UL, 0x00725474UL, 0x00746FF9UL, 0x0075F96FUL, 0x007724D3UL, +# 0x00781027UL, 0x0078CDEEUL, 0x00796A2CUL, 0x0079ED08UL, 0x007A5C37UL, +# 0x007ABBD7UL, 0x007B0EF4UL, 0x007B57DCUL, 0x007B9853UL, 0x007BD1BBUL, +# 0x007C052EUL, 0x007C338CUL, 0x007C5D8EUL, 0x007C83C8UL, 0x007CA6B8UL, +# 0x007CC6C6UL, 0x007CE449UL, 0x007CFF8CUL, 0x007D18CDUL, 0x007D3043UL, +# 0x007D461DUL, 0x007D5A84UL, 0x007D6D9BUL, 0x007D7F82UL, 0x007D9053UL, +# 0x007DA028UL, 0x007DAF15UL, 0x007DBD2DUL, 0x007DCA82UL, 0x007DD722UL, +# 0x007DE31CUL, 0x007DEE7CUL, 0x007DF94DUL, 0x007E0399UL, 0x007E0D69UL, +# 0x007E16C6UL, 0x007E1FB6UL, 0x007E2842UL, 0x007E306FUL, 0x007E3843UL, +# 0x007E3FC4UL, 0x007E46F6UL, 0x007E4DDFUL, 0x007E5481UL, 0x007E5AE2UL, +# 0x007E6104UL, 0x007E66ECUL, 0x007E6C9BUL, 0x007E7215UL, 0x007E775DUL, +# 0x007E7C76UL, 0x007E8160UL, 0x007E8620UL, 0x007E8AB6UL, 0x007E8F24UL, +# 0x007E936DUL, 0x007E9793UL, 0x007E9B95UL, 0x007E9F77UL, 0x007EA33AUL, +# 0x007EA6DEUL, 0x007EAA66UL, 0x007EADD1UL, 0x007EB123UL, 0x007EB45AUL, +# 0x007EB779UL, 0x007EBA80UL, 0x007EBD71UL, 0x007EC04BUL, 0x007EC310UL, +# 0x007EC5C1UL, 0x007EC85EUL, 0x007ECAE9UL, 0x007ECD61UL, 0x007ECFC7UL, +# 0x007ED21CUL, 0x007ED460UL, 0x007ED694UL, 0x007ED8B9UL, 0x007EDACEUL, +# 0x007EDCD5UL, 0x007EDECEUL, 0x007EE0B8UL, 0x007EE296UL, 0x007EE466UL, +# 0x007EE62AUL, 0x007EE7E2UL, 0x007EE98DUL, 0x007EEB2DUL, 0x007EECC1UL, +# 0x007EEE4AUL, 0x007EEFC9UL, 0x007EF13DUL, 0x007EF2A7UL, 0x007EF406UL, +# 0x007EF55CUL, 0x007EF6A8UL, 0x007EF7EBUL, 0x007EF924UL, 0x007EFA55UL, +# 0x007EFB7DUL, 0x007EFC9CUL, 0x007EFDB2UL, 0x007EFEC1UL, 0x007EFFC7UL, +# 0x007F00C5UL, 0x007F01BBUL, 0x007F02AAUL, 0x007F0391UL, 0x007F0470UL, +# 0x007F0548UL, 0x007F0618UL, 0x007F06E2UL, 0x007F07A4UL, 0x007F0860UL, +# 0x007F0914UL, 0x007F09C2UL, 0x007F0A69UL, 0x007F0B09UL, 0x007F0BA3UL, +# 0x007F0C36UL, 0x007F0CC2UL, 0x007F0D48UL, 0x007F0DC8UL, 0x007F0E41UL, +# 0x007F0EB4UL, 0x007F0F21UL, 0x007F0F88UL, 0x007F0FE8UL, 0x007F1042UL, +# 0x007F1096UL, 0x007F10E4UL, 0x007F112BUL, 0x007F116DUL, 0x007F11A8UL, +# 0x007F11DDUL, 0x007F120CUL, 0x007F1235UL, 0x007F1258UL, 0x007F1274UL, +# 0x007F128AUL, 0x007F129AUL, 0x007F12A4UL, 0x007F12A7UL, 0x007F12A4UL, +# 0x007F129BUL, 0x007F128BUL, 0x007F1274UL, 0x007F1257UL, 0x007F1233UL, +# 0x007F1209UL, 0x007F11D8UL, 0x007F119FUL, 0x007F1160UL, 0x007F111AUL, +# 0x007F10CCUL, 0x007F1077UL, 0x007F101BUL, 0x007F0FB7UL, 0x007F0F4BUL, +# 0x007F0ED7UL, 0x007F0E5CUL, 0x007F0DD8UL, 0x007F0D4CUL, 0x007F0CB7UL, +# 0x007F0C19UL, 0x007F0B73UL, 0x007F0AC3UL, 0x007F0A0AUL, 0x007F0947UL, +# 0x007F087BUL, 0x007F07A4UL, 0x007F06C2UL, 0x007F05D6UL, 0x007F04DFUL, +# 0x007F03DCUL, 0x007F02CDUL, 0x007F01B2UL, 0x007F008BUL, 0x007EFF56UL, +# 0x007EFE13UL, 0x007EFCC3UL, 0x007EFB64UL, 0x007EF9F6UL, 0x007EF878UL, +# 0x007EF6EAUL, 0x007EF54BUL, 0x007EF39AUL, 0x007EF1D6UL, 0x007EEFFFUL, +# 0x007EEE14UL, 0x007EEC13UL, 0x007EE9FDUL, 0x007EE7CFUL, 0x007EE589UL, +# 0x007EE329UL, 0x007EE0AEUL, 0x007EDE16UL, 0x007EDB61UL, 0x007ED88CUL, +# 0x007ED595UL, 0x007ED27BUL, 0x007ECF3BUL, 0x007ECBD3UL, 0x007EC841UL, +# 0x007EC481UL, 0x007EC091UL, 0x007EBC6DUL, 0x007EB811UL, 0x007EB37AUL, +# 0x007EAEA4UL, 0x007EA988UL, 0x007EA422UL, 0x007E9E6BUL, 0x007E985DUL, +# 0x007E91EFUL, 0x007E8B1AUL, 0x007E83D4UL, 0x007E7C11UL, 0x007E73C5UL, +# 0x007E6AE1UL, 0x007E6155UL, 0x007E570FUL, 0x007E4BF7UL, 0x007E3FF3UL, +# 0x007E32E6UL, 0x007E24ACUL, 0x007E1518UL, 0x007E03F7UL, 0x007DF10AUL, +# 0x007DDC03UL, 0x007DC480UL, 0x007DAA09UL, 0x007D8C00UL, 0x007D699AUL, +# 0x007D41C9UL, 0x007D131EUL, 0x007CDB97UL, 0x007C9851UL, 0x007C44F8UL, +# 0x007BDABCUL, 0x007B4E33UL, 0x007A8A98UL, 0x00796587UL, 0x007777D9UL, +# 0x00736D37UL, +# }; +# static const float we_float[] = { +# 1.03677719e-06F, 7.61177108e-09F, 1.24977240e-08F, 1.63680292e-08F, +# 1.96847466e-08F, 2.26448404e-08F, 2.53524197e-08F, 2.78699974e-08F, +# 3.02384333e-08F, 3.24861032e-08F, 3.46336312e-08F, 3.66965478e-08F, +# 3.86868855e-08F, 4.06141855e-08F, 4.24861622e-08F, 4.43091566e-08F, +# 4.60884545e-08F, 4.78285168e-08F, 4.95331490e-08F, 5.12056279e-08F, +# 5.28488000e-08F, 5.44651557e-08F, 5.60568899e-08F, 5.76259484e-08F, +# 5.91740662e-08F, 6.07027987e-08F, 6.22135462e-08F, 6.37075759e-08F, +# 6.51860386e-08F, 6.66499836e-08F, 6.81003709e-08F, 6.95380822e-08F, +# 7.09639292e-08F, 7.23786618e-08F, 7.37829746e-08F, 7.51775128e-08F, +# 7.65628768e-08F, 7.79396272e-08F, 7.93082883e-08F, 8.06693516e-08F, +# 8.20232788e-08F, 8.33705045e-08F, 8.47114385e-08F, 8.60464681e-08F, +# 8.73759596e-08F, 8.87002606e-08F, 9.00197010e-08F, 9.13345948e-08F, +# 9.26452410e-08F, 9.39519249e-08F, 9.52549192e-08F, 9.65544849e-08F, +# 9.78508719e-08F, 9.91443202e-08F, 1.00435060e-07F, 1.01723315e-07F, +# 1.03009296e-07F, 1.04293211e-07F, 1.05575259e-07F, 1.06855633e-07F, +# 1.08134518e-07F, 1.09412096e-07F, 1.10688542e-07F, 1.11964025e-07F, +# 1.13238713e-07F, 1.14512767e-07F, 1.15786343e-07F, 1.17059595e-07F, +# 1.18332673e-07F, 1.19605723e-07F, 1.20878890e-07F, 1.22152313e-07F, +# 1.23426131e-07F, 1.24700479e-07F, 1.25975490e-07F, 1.27251294e-07F, +# 1.28528022e-07F, 1.29805799e-07F, 1.31084751e-07F, 1.32365001e-07F, +# 1.33646673e-07F, 1.34929886e-07F, 1.36214760e-07F, 1.37501415e-07F, +# 1.38789966e-07F, 1.40080532e-07F, 1.41373228e-07F, 1.42668169e-07F, +# 1.43965470e-07F, 1.45265245e-07F, 1.46567606e-07F, 1.47872669e-07F, +# 1.49180545e-07F, 1.50491348e-07F, 1.51805191e-07F, 1.53122186e-07F, +# 1.54442445e-07F, 1.55766083e-07F, 1.57093212e-07F, 1.58423946e-07F, +# 1.59758399e-07F, 1.61096684e-07F, 1.62438917e-07F, 1.63785214e-07F, +# 1.65135690e-07F, 1.66490462e-07F, 1.67849647e-07F, 1.69213364e-07F, +# 1.70581733e-07F, 1.71954874e-07F, 1.73332908e-07F, 1.74715958e-07F, +# 1.76104148e-07F, 1.77497602e-07F, 1.78896448e-07F, 1.80300814e-07F, +# 1.81710828e-07F, 1.83126623e-07F, 1.84548331e-07F, 1.85976086e-07F, +# 1.87410026e-07F, 1.88850288e-07F, 1.90297012e-07F, 1.91750343e-07F, +# 1.93210424e-07F, 1.94677403e-07F, 1.96151428e-07F, 1.97632653e-07F, +# 1.99121231e-07F, 2.00617321e-07F, 2.02121082e-07F, 2.03632677e-07F, +# 2.05152273e-07F, 2.06680040e-07F, 2.08216149e-07F, 2.09760777e-07F, +# 2.11314104e-07F, 2.12876312e-07F, 2.14447590e-07F, 2.16028129e-07F, +# 2.17618123e-07F, 2.19217773e-07F, 2.20827283e-07F, 2.22446862e-07F, +# 2.24076723e-07F, 2.25717086e-07F, 2.27368174e-07F, 2.29030216e-07F, +# 2.30703448e-07F, 2.32388110e-07F, 2.34084450e-07F, 2.35792720e-07F, +# 2.37513182e-07F, 2.39246101e-07F, 2.40991752e-07F, 2.42750416e-07F, +# 2.44522382e-07F, 2.46307948e-07F, 2.48107418e-07F, 2.49921109e-07F, +# 2.51749342e-07F, 2.53592452e-07F, 2.55450781e-07F, 2.57324683e-07F, +# 2.59214522e-07F, 2.61120673e-07F, 2.63043524e-07F, 2.64983476e-07F, +# 2.66940939e-07F, 2.68916342e-07F, 2.70910123e-07F, 2.72922739e-07F, +# 2.74954660e-07F, 2.77006373e-07F, 2.79078382e-07F, 2.81171210e-07F, +# 2.83285396e-07F, 2.85421503e-07F, 2.87580110e-07F, 2.89761822e-07F, +# 2.91967265e-07F, 2.94197089e-07F, 2.96451969e-07F, 2.98732610e-07F, +# 3.01039742e-07F, 3.03374127e-07F, 3.05736557e-07F, 3.08127859e-07F, +# 3.10548894e-07F, 3.13000563e-07F, 3.15483804e-07F, 3.17999599e-07F, +# 3.20548974e-07F, 3.23133003e-07F, 3.25752811e-07F, 3.28409576e-07F, +# 3.31104534e-07F, 3.33838984e-07F, 3.36614287e-07F, 3.39431878e-07F, +# 3.42293264e-07F, 3.45200034e-07F, 3.48153864e-07F, 3.51156520e-07F, +# 3.54209871e-07F, 3.57315892e-07F, 3.60476673e-07F, 3.63694431e-07F, +# 3.66971518e-07F, 3.70310433e-07F, 3.73713834e-07F, 3.77184553e-07F, +# 3.80725611e-07F, 3.84340234e-07F, 3.88031877e-07F, 3.91804239e-07F, +# 3.95661291e-07F, 3.99607304e-07F, 4.03646879e-07F, 4.07784981e-07F, +# 4.12026980e-07F, 4.16378695e-07F, 4.20846449e-07F, 4.25437124e-07F, +# 4.30158235e-07F, 4.35018005e-07F, 4.40025460e-07F, 4.45190536e-07F, +# 4.50524210e-07F, 4.56038644e-07F, 4.61747369e-07F, 4.67665494e-07F, +# 4.73809965e-07F, 4.80199879e-07F, 4.86856855e-07F, 4.93805512e-07F, +# 5.01074042e-07F, 5.08694944e-07F, 5.16705952e-07F, 5.25151216e-07F, +# 5.34082859e-07F, 5.43563016e-07F, 5.53666578e-07F, 5.64484953e-07F, +# 5.76131313e-07F, 5.88748108e-07F, 6.02518140e-07F, 6.17681418e-07F, +# 6.34561837e-07F, 6.53611496e-07F, 6.75488730e-07F, 7.01206245e-07F, +# 7.32441505e-07F, 7.72282898e-07F, 8.27435688e-07F, 9.17567905e-07F, +# }; +# static const float fe_float[] = { +# 1.00000000e+00F, 9.38143681e-01F, 9.00469930e-01F, 8.71704332e-01F, +# 8.47785501e-01F, 8.26993297e-01F, 8.08421652e-01F, 7.91527637e-01F, +# 7.75956852e-01F, 7.61463389e-01F, 7.47868622e-01F, 7.35038092e-01F, +# 7.22867660e-01F, 7.11274761e-01F, 7.00192655e-01F, 6.89566496e-01F, +# 6.79350572e-01F, 6.69506317e-01F, 6.60000841e-01F, 6.50805833e-01F, +# 6.41896716e-01F, 6.33251994e-01F, 6.24852739e-01F, 6.16682181e-01F, +# 6.08725382e-01F, 6.00968966e-01F, 5.93400902e-01F, 5.86010318e-01F, +# 5.78787359e-01F, 5.71723049e-01F, 5.64809193e-01F, 5.58038282e-01F, +# 5.51403417e-01F, 5.44898238e-01F, 5.38516872e-01F, 5.32253880e-01F, +# 5.26104214e-01F, 5.20063177e-01F, 5.14126394e-01F, 5.08289776e-01F, +# 5.02549502e-01F, 4.96901987e-01F, 4.91343870e-01F, 4.85871987e-01F, +# 4.80483364e-01F, 4.75175193e-01F, 4.69944825e-01F, 4.64789756e-01F, +# 4.59707616e-01F, 4.54696157e-01F, 4.49753251e-01F, 4.44876873e-01F, +# 4.40065101e-01F, 4.35316103e-01F, 4.30628137e-01F, 4.25999541e-01F, +# 4.21428729e-01F, 4.16914186e-01F, 4.12454466e-01F, 4.08048183e-01F, +# 4.03694013e-01F, 3.99390684e-01F, 3.95136982e-01F, 3.90931737e-01F, +# 3.86773829e-01F, 3.82662181e-01F, 3.78595759e-01F, 3.74573568e-01F, +# 3.70594648e-01F, 3.66658080e-01F, 3.62762973e-01F, 3.58908473e-01F, +# 3.55093753e-01F, 3.51318016e-01F, 3.47580495e-01F, 3.43880445e-01F, +# 3.40217149e-01F, 3.36589914e-01F, 3.32998069e-01F, 3.29440964e-01F, +# 3.25917972e-01F, 3.22428485e-01F, 3.18971913e-01F, 3.15547685e-01F, +# 3.12155249e-01F, 3.08794067e-01F, 3.05463619e-01F, 3.02163401e-01F, +# 2.98892921e-01F, 2.95651704e-01F, 2.92439288e-01F, 2.89255223e-01F, +# 2.86099074e-01F, 2.82970415e-01F, 2.79868833e-01F, 2.76793928e-01F, +# 2.73745310e-01F, 2.70722597e-01F, 2.67725420e-01F, 2.64753419e-01F, +# 2.61806243e-01F, 2.58883550e-01F, 2.55985007e-01F, 2.53110290e-01F, +# 2.50259082e-01F, 2.47431076e-01F, 2.44625969e-01F, 2.41843469e-01F, +# 2.39083290e-01F, 2.36345152e-01F, 2.33628783e-01F, 2.30933917e-01F, +# 2.28260294e-01F, 2.25607660e-01F, 2.22975768e-01F, 2.20364376e-01F, +# 2.17773247e-01F, 2.15202151e-01F, 2.12650862e-01F, 2.10119159e-01F, +# 2.07606828e-01F, 2.05113656e-01F, 2.02639439e-01F, 2.00183975e-01F, +# 1.97747066e-01F, 1.95328521e-01F, 1.92928150e-01F, 1.90545770e-01F, +# 1.88181199e-01F, 1.85834263e-01F, 1.83504787e-01F, 1.81192603e-01F, +# 1.78897547e-01F, 1.76619455e-01F, 1.74358169e-01F, 1.72113535e-01F, +# 1.69885401e-01F, 1.67673619e-01F, 1.65478042e-01F, 1.63298529e-01F, +# 1.61134940e-01F, 1.58987139e-01F, 1.56854992e-01F, 1.54738369e-01F, +# 1.52637142e-01F, 1.50551185e-01F, 1.48480376e-01F, 1.46424594e-01F, +# 1.44383722e-01F, 1.42357645e-01F, 1.40346251e-01F, 1.38349429e-01F, +# 1.36367071e-01F, 1.34399072e-01F, 1.32445328e-01F, 1.30505738e-01F, +# 1.28580205e-01F, 1.26668629e-01F, 1.24770919e-01F, 1.22886980e-01F, +# 1.21016722e-01F, 1.19160057e-01F, 1.17316899e-01F, 1.15487164e-01F, +# 1.13670768e-01F, 1.11867632e-01F, 1.10077676e-01F, 1.08300825e-01F, +# 1.06537004e-01F, 1.04786139e-01F, 1.03048160e-01F, 1.01322997e-01F, +# 9.96105837e-02F, 9.79108533e-02F, 9.62237426e-02F, 9.45491894e-02F, +# 9.28871336e-02F, 9.12375166e-02F, 8.96002819e-02F, 8.79753745e-02F, +# 8.63627411e-02F, 8.47623305e-02F, 8.31740930e-02F, 8.15979807e-02F, +# 8.00339475e-02F, 7.84819492e-02F, 7.69419432e-02F, 7.54138887e-02F, +# 7.38977470e-02F, 7.23934809e-02F, 7.09010552e-02F, 6.94204365e-02F, +# 6.79515934e-02F, 6.64944964e-02F, 6.50491178e-02F, 6.36154320e-02F, +# 6.21934154e-02F, 6.07830464e-02F, 5.93843056e-02F, 5.79971756e-02F, +# 5.66216413e-02F, 5.52576897e-02F, 5.39053102e-02F, 5.25644946e-02F, +# 5.12352371e-02F, 4.99175343e-02F, 4.86113856e-02F, 4.73167929e-02F, +# 4.60337611e-02F, 4.47622977e-02F, 4.35024136e-02F, 4.22541224e-02F, +# 4.10174414e-02F, 3.97923910e-02F, 3.85789955e-02F, 3.73772828e-02F, +# 3.61872848e-02F, 3.50090377e-02F, 3.38425822e-02F, 3.26879635e-02F, +# 3.15452322e-02F, 3.04144439e-02F, 2.92956602e-02F, 2.81889488e-02F, +# 2.70943838e-02F, 2.60120466e-02F, 2.49420264e-02F, 2.38844205e-02F, +# 2.28393354e-02F, 2.18068875e-02F, 2.07872041e-02F, 1.97804243e-02F, +# 1.87867007e-02F, 1.78062004e-02F, 1.68391068e-02F, 1.58856218e-02F, +# 1.49459680e-02F, 1.40203914e-02F, 1.31091649e-02F, 1.22125924e-02F, +# 1.13310136e-02F, 1.04648102e-02F, 9.61441364e-03F, 8.78031499e-03F, +# 7.96307744e-03F, 7.16335318e-03F, 6.38190594e-03F, 5.61964221e-03F, +# 4.87765598e-03F, 4.15729512e-03F, 3.46026478e-03F, 2.78879879e-03F, +# 2.14596774e-03F, 1.53629978e-03F, 9.67269282e-04F, 4.54134354e-04F, +# }; +# +# +# static const double ziggurat_nor_r = 3.6541528853610087963519472518; +# static const double ziggurat_nor_inv_r = +# 0.27366123732975827203338247596; // 1.0 / ziggurat_nor_r; +# static const double ziggurat_exp_r = 7.6971174701310497140446280481; +# +# static const float ziggurat_nor_r_f = 3.6541528853610087963519472518f; +# static const float ziggurat_nor_inv_r_f = 0.27366123732975827203338247596f; +# static const float ziggurat_exp_r_f = 7.6971174701310497140446280481f; +# +# +# double random_standard_normal(bitgen_t *bitgen_state) { +# uint64_t r; +# int sign; +# uint64_t rabs; +# int idx; +# double x, xx, yy; +# for (;;) { +# /* r = e3n52sb8 */ +# r = next_uint64(bitgen_state); +# idx = r & 0xff; +# r >>= 8; +# sign = r & 0x1; +# rabs = (r >> 1) & 0x000fffffffffffff; +# x = rabs * wi_double[idx]; +# if (sign & 0x1) +# x = -x; +# if (rabs < ki_double[idx]) +# return x; /* 99.3% of the time return here */ +# if (idx == 0) { +# for (;;) { +# /* Switch to 1.0 - U to avoid log(0.0), see GH 13361 */ +# xx = -ziggurat_nor_inv_r * npy_log1p(-next_double(bitgen_state)); +# yy = -npy_log1p(-next_double(bitgen_state)); +# if (yy + yy > xx * xx) +# return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r + xx) +# : ziggurat_nor_r + xx; +# } +# } else { +# if (((fi_double[idx - 1] - fi_double[idx]) * next_double(bitgen_state) + +# fi_double[idx]) < exp(-0.5 * x * x)) +# return x; +# } +# } +# } + +from __future__ import annotations + +# --------------------------------------------------------------------------- +# Ziggurat tables (parsed from the C source comments at the top of this file) +# --------------------------------------------------------------------------- +import math as math +import re as re + +import cffi +import numba as nb +import numpy as np + +_FFI = cffi.FFI() + +_ZIGGURAT_NOR_R = 3.6541528853610087963519472518 +_ZIGGURAT_NOR_INV_R = 0.27366123732975827203338247596 # 1.0 / ziggurat_nor_r + +_KI_DOUBLE = None +_WI_DOUBLE = None +_FI_DOUBLE = None + +# _BIT_GENERATOR = np.random.PCG64(seed=42) +# _next_uint64 = _BIT_GENERATOR.cffi.next_uint64 +# _next_double = _BIT_GENERATOR.cffi.next_double +# _state_address = _BIT_GENERATOR.cffi.state_address +# _state_ptr = _FFI.cast("uint8_t(*)[128]", _state_address) +# _state_bytes = np.frombuffer(_FFI.buffer(_state_ptr), dtype=np.uint8) +# _state_uint64 = _state_bytes.view(np.uint64)[3:7] + + +def _load_ziggurat_tables(): + """Parse the C-comment Ziggurat tables embedded at the top of this module.""" + global _KI_DOUBLE, _WI_DOUBLE, _FI_DOUBLE + with open(__file__) as fh: + text = fh.read() + + def _extract_block(name): + m = re.search( + r"#\s*static const \w+\s+" + name + r"\[\]\s*=\s*\{(.*?)\};", + text, + re.S, + ) + if m is None: + raise RuntimeError(f"Could not locate Ziggurat table {name!r}") + # strip leading '#' on each line + return re.sub(r"^#\s?", "", m.group(1), flags=re.M) + + ki_body = _extract_block("ki_double").replace("ULL", "").replace("LL", "") + ki_tokens = re.findall(r"0x[0-9A-Fa-f]+", ki_body) + _KI_DOUBLE = np.array([int(t, 16) for t in ki_tokens], dtype=np.uint64) + + float_re = r"[-+]?\d+\.\d+(?:[eE][-+]?\d+)?|[-+]?\d+(?:[eE][-+]?\d+)?" + wi_tokens = re.findall(float_re, _extract_block("wi_double")) + _WI_DOUBLE = np.array([float(t) for t in wi_tokens], dtype=np.float64) + fi_tokens = re.findall(float_re, _extract_block("fi_double")) + _FI_DOUBLE = np.array([float(t) for t in fi_tokens], dtype=np.float64) + + if not (len(_KI_DOUBLE) == len(_WI_DOUBLE) == len(_FI_DOUBLE) == 256): + raise RuntimeError( + "Unexpected Ziggurat table sizes: " + f"ki={len(_KI_DOUBLE)} wi={len(_WI_DOUBLE)} fi={len(_FI_DOUBLE)}" + ) + + +_load_ziggurat_tables() + + +@nb.njit +def _single_random_standard_normal(_next_double, _next_uint64, _state_address) -> float: + """ + Return one sample from the standard normal distribution, :math:`\\mathcal{N}(0, 1)`. + + Uses the module-level PCG64 bit-generator directly via its CFFI pointer + (``_state_address``). Advances the global RNG state in-place. + + Returns + ------- + float + A single draw from :math:`\\mathcal{N}(0, 1)`. + + Notes + ----- + Direct Python port of NumPy's ``random_standard_normal`` C implementation + (see the C source reproduced in the comment block at the top of this + module). The Ziggurat algorithm divides the probability density function + into 256 horizontal slabs and uses the precomputed ``ki_double``, + ``wi_double`` and ``fi_double`` tables (also reproduced in the top-of-file + C comments) to quickly accept the common case (~99.3%). When the fast + accept fails, the function falls back to either a tail sample (when + ``idx == 0``) or a wedge-rejection test against ``exp(-x^2/2)``. + """ + ki = _KI_DOUBLE + wi = _WI_DOUBLE + fi = _FI_DOUBLE + nor_r = _ZIGGURAT_NOR_R + nor_inv_r = _ZIGGURAT_NOR_INV_R + log1p = math.log1p + exp = math.exp + + while True: + r = _next_uint64(_state_address) + idx = r & 0xFF + r >>= 8 + sign = r & 0x1 + rabs = (r >> 1) & 0x000FFFFFFFFFFFFF # low 52 bits + x = rabs * float(wi[idx]) + if sign & 0x1: + x = -x + if rabs < int(ki[idx]): + # 99.3% of the time return here + return x + if idx == 0: + # Tail of the distribution: sample using the marsaglia tail method. + while True: + # Switch to 1.0 - U to avoid log(0.0) + xx = -nor_inv_r * log1p(-_next_double(_state_address)) + yy = -log1p(-_next_double(_state_address)) + if yy + yy > xx * xx: + if (rabs >> 8) & 0x1: + return -(nor_r + xx) + return nor_r + xx + else: + # Wedge: accept x if it lies under exp(-x^2/2) + fi_im1 = float(fi[idx - 1]) + fi_i = float(fi[idx]) + if (fi_im1 - fi_i) * _next_double(_state_address) + fi_i < exp( + -0.5 * x * x + ): + return x + + +@nb.njit +def _several_random_standard_normal( + state_array: np.ndarray, + state_bytes: np.ndarray, + size: int, + _next_double, + _next_uint64, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw ``size`` standard-normal samples for a single RNG state row. + + Loads the RNG state from *state_array* into the shared bit-generator, + fills a 1-D array of length *size* with Ziggurat normal samples, then + writes the updated state back to *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 1-D ``uint64`` array of length ≥ 4 holding the PCG64 state for one + agent/row. Mutated in-place with the post-draw state. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + Used as the staging area when injecting and extracting PCG64 state. + size : int, optional + Number of samples to draw. Defaults to 1. + + Returns + ------- + np.ndarray + 1-D ``float64`` array of shape ``(size,)`` containing draws from + :math:`\\mathcal{N}(0, 1)`. + + Notes + ----- + Direct Python port of NumPy's ``random_standard_normal`` C implementation + (see the C source reproduced in the comment block at the top of this + module). The Ziggurat algorithm divides the probability density function + into 256 horizontal slabs and uses the precomputed ``ki_double``, + ``wi_double`` and ``fi_double`` tables (also reproduced in the top-of-file + C comments) to quickly accept the common case (~99.3%). When the fast + accept fails, the function falls back to either a tail sample (when + ``idx == 0``) or a wedge-rejection test against ``exp(-x^2/2)``. + """ + # write into the BIT_GENERATOR's state from the given state_array + _state_uint64_ = state_bytes.view(np.uint64)[_slice_start:_slice_end] + _state_uint64_[:] = state_array[:4] + + try: + result = np.empty(size, dtype=np.float64) + for n in range(size): + result[n] = _single_random_standard_normal( + _next_double, _next_uint64, _state_address + ) + return result + finally: + # write back the updated state into the given state_array + state_array[:4] = _state_uint64_[:4] + + +@nb.njit +def _vector_random_standard_normal( + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _next_uint64, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-normal samples for every row of *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per agent equals ``prod(shape)``. Defaults to ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(n_agents, prod(shape))``. Callers + that want the trailing dimensions split out should call + ``.reshape(n_agents, *shape)`` on the result. + """ + flat_size = 1 + for s in shape: + flat_size *= s + + result = np.empty((state_array.shape[0], flat_size), dtype=np.float64) + for idx in range(state_array.shape[0]): + result[idx] = _several_random_standard_normal( + state_array[idx], + state_bytes, + size=flat_size, + _next_double=_next_double, + _next_uint64=_next_uint64, + _state_address=_state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + return result + + +@nb.njit +def _selected_vector_random_standard_normal( + selected_positions: np.ndarray, + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _next_uint64, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-normal samples for a subset of rows of *state_array*. + + Parameters + ---------- + selected_positions : np.ndarray + 1-D integer array whose values are row indices into *state_array*. + Only the indicated rows are advanced and sampled. + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place for the selected rows. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per selected agent equals ``prod(shape)``. Defaults to + ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(len(selected_positions), prod(shape))``. + Callers that want the trailing dimensions split out should call + ``.reshape(len(selected_positions), *shape)`` on the result. + """ + # compute the size for the unfolded (flat) trailing dimension. + flat_size = 1 + for s in shape: + flat_size *= s + + # Allocate a 2-D buffer (n_selected, flat_size) so the row assignment + # below matches the 1-D array returned by _several_random_standard_normal + # without needing a per-iteration reshape. + n = selected_positions.shape[0] + flat = np.empty((n, flat_size), dtype=np.float64) + + for idx in range(n): + flat[idx] = _several_random_standard_normal( + state_array[selected_positions[idx]], + state_bytes, + size=flat_size, + _next_double=_next_double, + _next_uint64=_next_uint64, + _state_address=_state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + + # Reshape so the trailing dimensions match the caller-provided `shape`. + # Tuple concatenation keeps the result shape a homogeneous int tuple, + # which numba requires for `reshape` in nopython mode. + return flat + + +### UNIFORM + + +@nb.njit +def _several_random_standard_uniform( + state_array: np.ndarray, + state_bytes: np.ndarray, + size: int, + _next_double, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw ``size`` standard-uniform samples for a single RNG state row. + + Loads the RNG state from *state_array* into the shared bit-generator, + fills a 1-D array of length *size* with U[0, 1) samples, then writes the + updated state back to *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 1-D ``uint64`` array of length ≥ 4 holding the PCG64 state for one + agent/row. Mutated in-place with the post-draw state. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + Used as the staging area when injecting and extracting PCG64 state. + size : int, optional + Number of samples to draw. Defaults to 1. + + Returns + ------- + np.ndarray + 1-D ``float64`` array of shape ``(size,)`` containing draws from + U[0, 1). + """ + + # write into the BIT_GENERATOR's state from the given state_array + _state_uint64_ = state_bytes.view(np.uint64)[_slice_start:_slice_end] + _state_uint64_[:] = state_array[:4] + try: + result = np.empty(size, dtype=np.float64) + for n in range(size): + result[n] = _next_double(_state_address) + return result + finally: + # write back the updated state into the given state_array + state_array[:4] = _state_uint64_[:4] + + +@nb.njit +def _vector_random_standard_uniform( + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-uniform samples for every row of *state_array*. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per agent equals ``prod(shape)``. Defaults to ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(n_agents, prod(shape))``. Callers + that want trailing dimensions split out should call + ``.reshape(n_agents, *shape)`` on the result. + """ + # compute the size for the unfolded (flat) trailing dimension. + flat_size = 1 + for s in shape: + flat_size *= s + + result = np.empty((state_array.shape[0], flat_size), dtype=np.float64) + for idx in range(state_array.shape[0]): + result[idx] = _several_random_standard_uniform( + state_array[idx], + state_bytes, + flat_size, + _next_double, + _state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + return result + + +@nb.njit +def _selected_vector_random_standard_uniform( + selected_positions: np.ndarray, + state_array: np.ndarray, + state_bytes: np.ndarray, + shape: tuple[int, ...], + _next_double, + _state_address, + _slice_start: int, + _slice_end: int, +) -> np.ndarray: + """ + Draw standard-uniform samples for a subset of rows of *state_array*. + + Parameters + ---------- + selected_positions : np.ndarray + 1-D integer array whose values are row indices into *state_array*. + Only the indicated rows are advanced and sampled. + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place for the selected rows. + state_bytes : np.ndarray + Byte view (``uint8``) of the shared ``_BIT_GENERATOR`` state buffer. + shape : tuple of int, optional + Trailing dimensions of the per-agent sample block. The total number + of samples per selected agent equals ``prod(shape)``. Defaults to + ``(1,)``. + + Returns + ------- + np.ndarray + 2-D ``float64`` array of shape ``(len(selected_positions), prod(shape))``. + Callers that want the trailing dimensions split out should call + ``.reshape(len(selected_positions), *shape)`` on the result. + """ + # compute the size for the unfolded (flat) trailing dimension. + flat_size = 1 + for s in shape: + flat_size *= s + + result = np.empty((selected_positions.shape[0], flat_size), dtype=np.float64) + for idx in range(selected_positions.shape[0]): + result[idx] = _several_random_standard_uniform( + state_array[selected_positions[idx]], + state_bytes, + flat_size, + _next_double, + _state_address, + _slice_start=_slice_start, + _slice_end=_slice_end, + ) + return result + + +class FastGenerator: + def __init__(self, seed: int = 42) -> None: + self._bit_generator = np.random.PCG64(seed=seed) + self._next_uint64 = self._bit_generator.cffi.next_uint64 + self._next_double = self._bit_generator.cffi.next_double + self._state_address = self._bit_generator.cffi.state_address + self._state_ptr = _FFI.cast("uint8_t(*)[128]", self._state_address) + self._state_bytes = np.frombuffer(_FFI.buffer(self._state_ptr), dtype=np.uint8) + + # Determine the values currently in the `state` + state_array = np.empty(shape=[4], dtype=np.uint64) + _state = self._bit_generator.state["state"] + state_array[0] = _state["state"] & 0xFFFFFFFFFFFFFFFF + state_array[1] = _state["state"] >> 64 + state_array[2] = _state["inc"] & 0xFFFFFFFFFFFFFFFF + state_array[3] = _state["inc"] >> 64 + + # There are 4 values in state_array, which correspond to 4 values + # within _state_bytes.view(np.uint64), in a contiguous block but + # not necessarily in the same order. Find them + viewer = self._state_bytes.view(np.uint64) + positions = [] + for j in range(4): + target = state_array[j] + for k in range(16): + if viewer[k] == target: + break + if k >= 15: + raise ValueError("the state array is not found in raw memory") + positions.append(k) + + # Determine where the block of state values is, and verify that it is contiguous + self._slice_start = min(positions) + self._slice_end = max(positions) + 1 + if self._slice_start + 4 != self._slice_end: + raise ValueError("the state array is not contiguous") + + self._slice_positions = np.asarray(positions) - self._slice_start + + def get_state_array(self, seed) -> np.ndarray: + """Get the state array for a given seed, in the order expected by the numba functions.""" + bit_generator = np.random.PCG64(seed=seed) + _state = bit_generator.state["state"] + state_array = np.empty(shape=[4], dtype=np.uint64) + state_array[self._slice_positions[0]] = _state["state"] & 0xFFFFFFFFFFFFFFFF + state_array[self._slice_positions[1]] = _state["state"] >> 64 + state_array[self._slice_positions[2]] = _state["inc"] & 0xFFFFFFFFFFFFFFFF + state_array[self._slice_positions[3]] = _state["inc"] >> 64 + return state_array + + def vector_random_standard_normal( + self, + state_array: np.ndarray, + selected_positions: np.ndarray | None = None, + shape: int | tuple[int, ...] = (1,), + ) -> np.ndarray: + """ + Draw standard-normal samples for agents, optionally restricted to a subset. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + selected_positions : np.ndarray or None, optional + 1-D integer array of row indices into *state_array*. When provided, + only these agents are sampled; otherwise all agents are sampled. + shape : int or tuple of int, optional + Shape of the per-agent sample block. A plain ``int`` is treated as a + 1-element tuple, e.g. ``3`` → ``(3,)``. Defaults to ``(1,)``. + + Returns + ------- + np.ndarray + ``float64`` array of shape + ``(n_selected, *shape)`` when *selected_positions* is given, or + ``(n_agents, *shape)`` otherwise. + """ + if isinstance(shape, int): + shape = (shape,) + if selected_positions is not None: + return _selected_vector_random_standard_normal( + selected_positions, + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _next_uint64=self._next_uint64, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) + else: + return _vector_random_standard_normal( + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _next_uint64=self._next_uint64, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) + + def vector_random_standard_uniform( + self, + state_array: np.ndarray, + selected_positions: np.ndarray | None = None, + shape: int | tuple[int, ...] = 1, + ) -> np.ndarray: + """ + Draw standard-uniform samples for agents, optionally restricted to a subset. + + Parameters + ---------- + state_array : np.ndarray + 2-D ``uint64`` array of shape ``(n_agents, ≥4)`` where each row holds + the PCG64 state for one agent. Mutated in-place. + selected_positions : np.ndarray or None, optional + 1-D integer array of row indices into *state_array*. When provided, + only these agents are sampled; otherwise all agents are sampled. + shape : int or tuple of int, optional + Shape of the per-agent sample block. A plain ``int`` is treated as a + 1-element tuple, e.g. ``3`` → ``(3,)``. Defaults to ``1``, i.e. one + scalar draw per agent. + + Returns + ------- + np.ndarray + ``float64`` array of shape + ``(n_selected, *shape)`` when *selected_positions* is given, or + ``(n_agents, *shape)`` otherwise; values drawn from U[0, 1). + """ + if isinstance(shape, int): + shape = (shape,) + if selected_positions is not None: + return _selected_vector_random_standard_uniform( + selected_positions, + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) + else: + return _vector_random_standard_uniform( + state_array, + state_bytes=self._state_bytes, + shape=shape, + _next_double=self._next_double, + _state_address=self._state_address, + _slice_start=self._slice_start, + _slice_end=self._slice_end, + ).reshape(-1, *shape) diff --git a/activitysim/core/random.py b/activitysim/core/random.py index 37b1976403..b99c0615e3 100644 --- a/activitysim/core/random.py +++ b/activitysim/core/random.py @@ -9,9 +9,10 @@ import numpy as np import pandas as pd -from activitysim.core.util import reindex from activitysim.core.exceptions import DuplicateLoadableObjectError, TableIndexError +from activitysim.core.util import reindex +from .fast_random import FastChannel from .tracing import print_elapsed_time logger = logging.getLogger(__name__) @@ -450,7 +451,7 @@ def end_step(self, step_name): # channel management - def add_channel(self, channel_name, domain_df): + def add_channel(self, channel_name, domain_df, fast: bool = True): """ Create or extend a channel for generating random number streams for domain_df. @@ -484,7 +485,8 @@ def add_channel(self, channel_name, domain_df): "Adding channel '%s' %s ids" % (channel_name, len(domain_df.index)) ) - channel = SimpleChannel( + channel_class = FastChannel if fast else SimpleChannel + channel = channel_class( channel_name, self.base_seed, domain_df, self.step_name ) diff --git a/activitysim/core/test/test_fast_channel.py b/activitysim/core/test/test_fast_channel.py new file mode 100644 index 0000000000..1d45f1b3a0 --- /dev/null +++ b/activitysim/core/test/test_fast_channel.py @@ -0,0 +1,501 @@ +# ActivitySim +# See full license in LICENSE.txt. +"""Unit tests for activitysim.core.random.FastChannel.""" + +from __future__ import annotations + +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest + +from activitysim.core.fast_random import FastChannel + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_df(index_values, name="household_id"): + """Return a minimal DataFrame with a named index.""" + df = pd.DataFrame({"x": 0}, index=pd.Index(index_values, name=name)) + return df + + +def _make_channel( + index_values=None, base_seed=0, channel_name="households", step_name="" +): + if index_values is None: + index_values = [1, 2, 3, 4, 5] + domain_df = _make_df(index_values) + return FastChannel(channel_name, base_seed, domain_df, step_name=step_name) + + +# --------------------------------------------------------------------------- +# __init__ / construction +# --------------------------------------------------------------------------- + + +class TestInit: + def test_attributes_set(self): + domain_df = _make_df([10, 20, 30]) + ch = FastChannel("persons", 42, domain_df) + assert ch.channel_name == "persons" + assert ch.base_seed == 42 + assert len(ch.domain_index) == 3 + assert ch.step_name is None + assert ch._state_array is None + + def test_domain_index_is_copy(self): + domain_df = _make_df([1, 2, 3]) + ch = FastChannel("h", 0, domain_df) + # mutating original index should not affect stored domain_index + original_index = domain_df.index.copy() + assert list(ch.domain_index) == list(original_index) + + def test_step_started_when_step_name_given(self): + ch = _make_channel(step_name="my_step") + assert ch.step_name == "my_step" + assert ch._state_array is not None + assert ch._state_array.shape == (5, 4) + + def test_no_step_when_step_name_empty(self): + ch = _make_channel(step_name="") + assert ch.step_name is None + assert ch._state_array is None + + def test_channel_seed_differs_by_name(self): + d = _make_df([1, 2, 3]) + ch_a = FastChannel("alpha", 0, d) + ch_b = FastChannel("beta", 0, d) + assert ch_a.channel_seed != ch_b.channel_seed + + +# --------------------------------------------------------------------------- +# begin_step / end_step +# --------------------------------------------------------------------------- + + +class TestBeginEndStep: + def test_begin_step_populates_state(self): + ch = _make_channel() + ch.begin_step("step1") + assert ch.step_name == "step1" + assert ch._state_array is not None + assert ch._state_array.shape == (5, 4) + assert ch._state_array.dtype == np.uint64 + + def test_begin_step_raises_if_already_active(self): + ch = _make_channel() + ch.begin_step("step1") + with pytest.raises(AssertionError): + ch.begin_step("step2") + + def test_end_step_clears_state(self): + ch = _make_channel() + ch.begin_step("step1") + ch.end_step("step1") + assert ch.step_name is None + assert ch.step_seed is None + assert ch._state_array is None + + def test_end_step_consistency_check_passes(self): + ch = _make_channel() + ch.begin_step("step1") + # should not raise + ch.end_step("step1") + + def test_end_step_consistency_check_fails(self): + ch = _make_channel() + ch.begin_step("step1") + with pytest.raises(AssertionError): + ch.end_step("wrong_step") + + def test_end_step_no_name_skips_check(self): + ch = _make_channel() + ch.begin_step("step1") + ch.end_step() # no name → no assertion + assert ch.step_name is None + + def test_begin_step_same_name_reproducible(self): + """Re-running the same step name must reproduce the same state array.""" + ch = _make_channel() + ch.begin_step("step1") + state1 = ch._state_array.copy() + ch.end_step() + ch.begin_step("step1") + state2 = ch._state_array.copy() + npt.assert_array_equal(state1, state2) + + def test_begin_step_different_names_give_different_states(self): + ch = _make_channel() + ch.begin_step("step_a") + state_a = ch._state_array.copy() + ch.end_step() + ch.begin_step("step_b") + state_b = ch._state_array.copy() + assert not np.array_equal(state_a, state_b) + + +# --------------------------------------------------------------------------- +# _check_valid_df +# --------------------------------------------------------------------------- + + +class TestCheckValidDf: + def test_returns_positions_for_full_domain(self): + ch = _make_channel(index_values=[10, 20, 30]) + ch.begin_step("s") + df = _make_df([10, 20, 30]) + pos = ch._check_valid_df(df) + npt.assert_array_equal(pos, [0, 1, 2]) + + def test_returns_positions_for_subset(self): + ch = _make_channel(index_values=[10, 20, 30]) + ch.begin_step("s") + df = _make_df([30, 10]) + pos = ch._check_valid_df(df) + npt.assert_array_equal(pos, [2, 0]) + + def test_raises_on_duplicate_index(self): + ch = _make_channel(index_values=[1, 2, 3]) + ch.begin_step("s") + df = _make_df([1, 1, 2]) + with pytest.raises(ValueError, match="unique index"): + ch._check_valid_df(df) + + def test_raises_on_index_not_in_domain(self): + ch = _make_channel(index_values=[1, 2, 3]) + ch.begin_step("s") + df = _make_df([1, 99]) + with pytest.raises(ValueError, match="not found in the domain"): + ch._check_valid_df(df) + + def test_raises_outside_step(self): + ch = _make_channel(index_values=[1, 2, 3]) + # no begin_step called + df = _make_df([1, 2]) + with pytest.raises(ValueError, match="outside of a defined step"): + ch._check_valid_df(df) + + +# --------------------------------------------------------------------------- +# random_for_df +# --------------------------------------------------------------------------- + + +class TestRandomForDf: + def test_output_shape_single_draw(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + rands = ch.random_for_df(df, "s", n=1) + assert rands.shape == (5, 1) + + def test_output_shape_multiple_draws(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + rands = ch.random_for_df(df, "s", n=3) + assert rands.shape == (5, 3) + + def test_values_in_unit_interval(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + rands = ch.random_for_df(df, "s", n=10) + assert np.all(rands >= 0.0) + assert np.all(rands < 1.0) + + def test_successive_calls_advance_stream(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + r1 = ch.random_for_df(df, "s") + r2 = ch.random_for_df(df, "s") + assert not np.array_equal(r1, r2) + + def test_reproducible_across_steps(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("s") + r1 = ch.random_for_df(df, "s").copy() + ch.end_step() + ch.begin_step("s") + r2 = ch.random_for_df(df, "s").copy() + npt.assert_array_equal(r1, r2) + + def test_different_steps_give_different_streams(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("step_a") + r_a = ch.random_for_df(df, "step_a").copy() + ch.end_step() + ch.begin_step("step_b") + r_b = ch.random_for_df(df, "step_b").copy() + assert not np.array_equal(r_a, r_b) + + def test_different_base_seeds_give_different_streams(self): + df = _make_df([1, 2, 3]) + ch0 = FastChannel("h", 0, df) + ch1 = FastChannel("h", 1, df) + ch0.begin_step("s") + ch1.begin_step("s") + r0 = ch0.random_for_df(df, "s") + r1 = ch1.random_for_df(df, "s") + assert not np.array_equal(r0, r1) + + def test_different_channel_names_give_different_streams(self): + df = _make_df([1, 2, 3]) + ch_a = FastChannel("alpha", 0, df) + ch_b = FastChannel("beta", 0, df) + ch_a.begin_step("s") + ch_b.begin_step("s") + r_a = ch_a.random_for_df(df, "s") + r_b = ch_b.random_for_df(df, "s") + assert not np.array_equal(r_a, r_b) + + def test_subset_matches_full_domain_rows(self): + """Draws for a subset of rows should equal draws for those rows from the full domain.""" + ch_full = _make_channel(index_values=[1, 2, 3, 4, 5]) + ch_sub = _make_channel(index_values=[2, 4]) + + ch_full.begin_step("s") + ch_sub.begin_step("s") + + df_full = _make_df([1, 2, 3, 4, 5]) + df_sub = _make_df([2, 4]) + + r_full = ch_full.random_for_df(df_full, "s") + r_sub = ch_sub.random_for_df(df_sub, "s") + + # Rows for index 2 and 4 should match regardless of which other rows exist + npt.assert_array_equal(r_full[[1, 3]], r_sub) + + def test_wrong_step_name_raises(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2]) + with pytest.raises(AssertionError): + ch.random_for_df(df, "wrong_step") + + +# --------------------------------------------------------------------------- +# normal_for_df +# --------------------------------------------------------------------------- + + +class TestNormalForDf: + def test_output_shape(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + result = ch.normal_for_df(df, "s") + assert result.shape == (3, 1) + + def test_output_shape_with_size(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + result = ch.normal_for_df(df, "s", size=4) + assert result.shape == (3, 4) + + def test_mu_sigma_applied(self): + """With large sigma, values should spread away from mu.""" + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + result = ch.normal_for_df(df, "s", mu=100.0, sigma=0.0001) + # all values should be very close to mu + npt.assert_allclose(result.flatten(), 100.0, atol=0.01) + + def test_lognormal_flag_positive(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + result = ch.normal_for_df(df, "s", lognormal=True) + assert np.all(result > 0), "lognormal values must be positive" + + def test_successive_calls_advance_stream(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + r1 = ch.normal_for_df(df, "s").copy() + r2 = ch.normal_for_df(df, "s").copy() + assert not np.array_equal(r1, r2) + + def test_reproducible_across_steps(self): + ch = _make_channel() + df = _make_df([1, 2, 3]) + ch.begin_step("s") + r1 = ch.normal_for_df(df, "s").copy() + ch.end_step() + ch.begin_step("s") + r2 = ch.normal_for_df(df, "s").copy() + npt.assert_array_equal(r1, r2) + + def test_scalar_mu_sigma_broadcast(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + # should not raise; shape should still be (5, 1) + result = ch.normal_for_df(df, "s", mu=5.0, sigma=2.0) + assert result.shape == (5, 1) + + def test_lognormal_vs_normal_exp_relationship(self): + """exp(normal_for_df) should equal lognormal_for_df for same step.""" + domain_df = _make_df([1, 2, 3, 4, 5]) + ch_n = FastChannel("h", 7, domain_df) + ch_l = FastChannel("h", 7, domain_df) + ch_n.begin_step("s") + ch_l.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + r_normal = ch_n.normal_for_df(df, "s", mu=0, sigma=1, lognormal=False) + r_lognormal = ch_l.normal_for_df(df, "s", mu=0, sigma=1, lognormal=True) + npt.assert_allclose(np.exp(r_normal), r_lognormal) + + +# --------------------------------------------------------------------------- +# choice_for_df +# --------------------------------------------------------------------------- + + +class TestChoiceForDf: + def test_output_length_with_replace(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + choices = ch.choice_for_df(df, "s", a=10, size=4, replace=True) + assert choices.shape == (12,) # 3 rows × 4 + + def test_output_length_without_replace(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + choices = ch.choice_for_df(df, "s", a=np.arange(10), size=3, replace=False) + assert choices.shape == (9,) # 3 rows × 3 + + def test_int_population_values_in_range(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3, 4, 5]) + choices = ch.choice_for_df(df, "s", a=7, size=10, replace=True) + assert np.all(choices >= 0) + assert np.all(choices < 7) + + def test_array_population_values_from_array(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + a = np.array([10, 20, 30, 40]) + choices = ch.choice_for_df(df, "s", a=a, size=5, replace=True) + assert set(choices).issubset({10, 20, 30, 40}) + + def test_without_replace_no_duplicates_per_row(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + n_pop = 8 + size = n_pop # draw all elements → no duplicates allowed + choices = ch.choice_for_df(df, "s", a=n_pop, size=size, replace=False) + for row_choices in choices.reshape(len(df), size): + assert len(set(row_choices)) == size + + def test_without_replace_raises_when_size_exceeds_population(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2]) + with pytest.raises(ValueError, match="replace=False"): + ch.choice_for_df(df, "s", a=3, size=5, replace=False) + + def test_successive_calls_advance_stream(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + c1 = ch.choice_for_df(df, "s", a=100, size=5, replace=True).copy() + c2 = ch.choice_for_df(df, "s", a=100, size=5, replace=True).copy() + assert not np.array_equal(c1, c2) + + def test_reproducible_across_steps(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("s") + c1 = ch.choice_for_df(df, "s", a=20, size=4, replace=True).copy() + ch.end_step() + ch.begin_step("s") + c2 = ch.choice_for_df(df, "s", a=20, size=4, replace=True).copy() + npt.assert_array_equal(c1, c2) + + def test_different_steps_give_different_choices(self): + ch = _make_channel() + df = _make_df([1, 2, 3, 4, 5]) + ch.begin_step("step_a") + c_a = ch.choice_for_df(df, "step_a", a=50, size=10, replace=True).copy() + ch.end_step() + ch.begin_step("step_b") + c_b = ch.choice_for_df(df, "step_b", a=50, size=10, replace=True).copy() + assert not np.array_equal(c_a, c_b) + + def test_output_dtype_int(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + choices = ch.choice_for_df(df, "s", a=10, size=3, replace=True) + assert np.issubdtype(choices.dtype, np.integer) + + def test_array_population_without_replace_values_in_array(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + a = np.array([100, 200, 300, 400, 500]) + choices = ch.choice_for_df(df, "s", a=a, size=3, replace=False) + assert set(choices).issubset(set(a)) + + def test_wrong_step_name_raises(self): + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2]) + with pytest.raises(AssertionError): + ch.choice_for_df(df, "wrong", a=5, size=2, replace=True) + + +# --------------------------------------------------------------------------- +# Cross-method stream independence +# --------------------------------------------------------------------------- + + +class TestStreamIndependence: + def test_random_and_normal_advance_independently(self): + """random_for_df and normal_for_df on the same channel should each + advance the underlying streams, so their second calls differ from first.""" + ch = _make_channel() + ch.begin_step("s") + df = _make_df([1, 2, 3]) + r1 = ch.random_for_df(df, "s").copy() + n1 = ch.normal_for_df(df, "s").copy() + r2 = ch.random_for_df(df, "s").copy() + n2 = ch.normal_for_df(df, "s").copy() + assert not np.array_equal(r1, r2) + assert not np.array_equal(n1, n2) + + def test_two_channels_same_seed_independent(self): + """Two FastChannels with the same base_seed but different names should + produce independent (different) streams for the same index values.""" + df = _make_df([1, 2, 3, 4, 5]) + ch_a = FastChannel("channel_a", 0, df) + ch_b = FastChannel("channel_b", 0, df) + ch_a.begin_step("s") + ch_b.begin_step("s") + r_a = ch_a.random_for_df(df, "s") + r_b = ch_b.random_for_df(df, "s") + assert not np.array_equal(r_a, r_b) + + def test_row_streams_are_independent(self): + """Each row should have its own stream; draws for one row should not + correlate trivially with another row.""" + ch = _make_channel(index_values=list(range(100))) + ch.begin_step("s") + df = _make_df(list(range(100))) + rands = ch.random_for_df(df, "s", n=10) + # Check that not all rows are identical + assert not np.all(rands == rands[0]) diff --git a/activitysim/core/test/test_fast_random.py b/activitysim/core/test/test_fast_random.py new file mode 100644 index 0000000000..63cfa3dfec --- /dev/null +++ b/activitysim/core/test/test_fast_random.py @@ -0,0 +1,306 @@ +"""Tests for vector_random_standard_normal and vector_random_standard_uniform.""" + +from __future__ import annotations + +import numpy as np + +from activitysim.core.fast_random._fast_random import FastGenerator + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_fast_generator(): + return FastGenerator() + + +def make_state_array( + fg: FastGenerator, n_agents: int, base_seed: int = 0 +) -> np.ndarray: + """Return a (n_agents, 4) uint64 state array seeded from PCG64 generators. + + Each row is a valid PCG64 state obtained by seeding a fresh generator with + ``base_seed + i`` and reading the current raw CFFI state buffer. This + mirrors exactly the layout consumed by the private ``_several_random_*`` + functions. + """ + uint64_view = fg._state_bytes.view(np.uint64) + state = np.zeros((n_agents, 4), dtype=np.uint64) + for i in range(n_agents): + fg._bit_generator.state = np.random.PCG64(seed=base_seed + i).state + state[i] = uint64_view[fg._slice_start : fg._slice_end].copy() + return state + + +# --------------------------------------------------------------------------- +# vector_random_standard_uniform +# --------------------------------------------------------------------------- + + +class TestVectorRandomStandardUniform: + """Tests for vector_random_standard_uniform.""" + + # --- output dtype and basic shape --- + + def test_returns_float64(self): + fg = make_fast_generator() + state = make_state_array(fg, 5) + result = fg.vector_random_standard_uniform(state) + assert result.dtype == np.float64 + + def test_default_shape_all_agents(self): + n = 7 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state) + assert result.shape == (n, 1) + + def test_int_shape(self): + n = 6 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state, shape=4) + assert result.shape == (n, 4) + + def test_tuple_shape_1d(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state, shape=(3,)) + assert result.shape == (n, 3) + + def test_tuple_shape_2d(self): + n = 4 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_uniform(state, shape=(2, 3)) + assert result.shape == (n, 2, 3) + + # --- values in [0, 1) --- + + def test_values_in_unit_interval(self): + fg = make_fast_generator() + state = make_state_array(fg, 50) + result = fg.vector_random_standard_uniform(state, shape=100) + assert np.all(result >= 0.0) + assert np.all(result < 1.0) + + # --- selected_positions --- + + def test_selected_positions_shape(self): + n = 10 + fg = make_fast_generator() + state = make_state_array(fg, n) + sel = np.array([0, 2, 5], dtype=np.intp) + result = fg.vector_random_standard_uniform( + state, selected_positions=sel, shape=4 + ) + assert result.shape == (3, 4) + + def test_selected_positions_values_in_unit_interval(self): + n = 10 + fg = make_fast_generator() + state = make_state_array(fg, n) + sel = np.array([1, 4, 7], dtype=np.intp) + result = fg.vector_random_standard_uniform( + state, selected_positions=sel, shape=20 + ) + assert np.all(result >= 0.0) + assert np.all(result < 1.0) + + def test_selected_positions_only_selected_rows_mutated(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + state_copy = state.copy() + sel = np.array([1, 3], dtype=np.intp) + fg.vector_random_standard_uniform(state, selected_positions=sel, shape=1) + # Rows NOT in sel must be unchanged. + for i in range(n): + if i not in sel: + np.testing.assert_array_equal( + state[i], + state_copy[i], + err_msg=f"Row {i} should not have been mutated", + ) + # Rows IN sel must have changed. + for i in sel: + assert not np.array_equal( + state[i], state_copy[i] + ), f"Row {i} should have been mutated" + + # --- state mutation --- + + def test_state_mutated_in_place(self): + fg = make_fast_generator() + state = make_state_array(fg, 4) + state_before = state.copy() + fg.vector_random_standard_uniform(state, shape=3) + assert not np.array_equal(state, state_before) + + # --- reproducibility --- + + def test_reproducibility(self): + """Identical initial state must produce identical output.""" + fg = make_fast_generator() + state_a = make_state_array(fg, 8, base_seed=99) + state_b = state_a.copy() + out_a = fg.vector_random_standard_uniform(state_a, shape=10) + out_b = fg.vector_random_standard_uniform(state_b, shape=10) + np.testing.assert_array_equal(out_a, out_b) + + def test_different_seeds_differ(self): + fg = make_fast_generator() + state_a = make_state_array(fg, 5, base_seed=0) + state_b = make_state_array(fg, 5, base_seed=1000) + out_a = fg.vector_random_standard_uniform(state_a, shape=20) + out_b = fg.vector_random_standard_uniform(state_b, shape=20) + assert not np.array_equal(out_a, out_b) + + # --- statistical sanity (large sample) --- + + def test_mean_close_to_half(self): + fg = make_fast_generator() + state = make_state_array(fg, 200) + result = fg.vector_random_standard_uniform(state, shape=500) + assert abs(result.mean() - 0.5) < 0.01 + + def test_no_exact_ones(self): + """U[0, 1) must never produce exactly 1.0.""" + fg = make_fast_generator() + state = make_state_array(fg, 100) + result = fg.vector_random_standard_uniform(state, shape=1000) + assert not np.any(result == 1.0) + + +# --------------------------------------------------------------------------- +# vector_random_standard_normal +# --------------------------------------------------------------------------- + + +class TestVectorRandomStandardNormal: + """Tests for vector_random_standard_normal.""" + + # --- output dtype and basic shape --- + + def test_returns_float64(self): + fg = make_fast_generator() + state = make_state_array(fg, 5) + result = fg.vector_random_standard_normal(state) + assert result.dtype == np.float64 + + def test_default_shape_all_agents(self): + n = 7 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state) + assert result.shape == (n, 1) + + def test_int_shape(self): + n = 6 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state, shape=4) + assert result.shape == (n, 4) + + def test_tuple_shape_1d(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state, shape=(3,)) + assert result.shape == (n, 3) + + def test_tuple_shape_2d(self): + n = 4 + fg = make_fast_generator() + state = make_state_array(fg, n) + result = fg.vector_random_standard_normal(state, shape=(2, 3)) + assert result.shape == (n, 2, 3) + + # --- selected_positions --- + + def test_selected_positions_shape(self): + n = 10 + fg = make_fast_generator() + state = make_state_array(fg, n) + sel = np.array([0, 2, 5], dtype=np.intp) + result = fg.vector_random_standard_normal( + state, selected_positions=sel, shape=4 + ) + assert result.shape == (3, 4) + + def test_selected_positions_only_selected_rows_mutated(self): + n = 5 + fg = make_fast_generator() + state = make_state_array(fg, n) + state_copy = state.copy() + sel = np.array([0, 4], dtype=np.intp) + fg.vector_random_standard_normal(state, selected_positions=sel, shape=1) + for i in range(n): + if i not in sel: + np.testing.assert_array_equal( + state[i], + state_copy[i], + err_msg=f"Row {i} should not have been mutated", + ) + for i in sel: + assert not np.array_equal( + state[i], state_copy[i] + ), f"Row {i} should have been mutated" + + # --- state mutation --- + + def test_state_mutated_in_place(self): + fg = make_fast_generator() + state = make_state_array(fg, 4) + state_before = state.copy() + fg.vector_random_standard_normal(state, shape=3) + assert not np.array_equal(state, state_before) + + # --- reproducibility --- + + def test_reproducibility(self): + """Identical initial state must produce identical output.""" + fg = make_fast_generator() + state_a = make_state_array(fg, 8, base_seed=77) + state_b = state_a.copy() + out_a = fg.vector_random_standard_normal(state_a, shape=10) + out_b = fg.vector_random_standard_normal(state_b, shape=10) + np.testing.assert_array_equal(out_a, out_b) + + def test_different_seeds_differ(self): + fg = make_fast_generator() + state_a = make_state_array(fg, 5, base_seed=0) + state_b = make_state_array(fg, 5, base_seed=1000) + out_a = fg.vector_random_standard_normal(state_a, shape=20) + out_b = fg.vector_random_standard_normal(state_b, shape=20) + assert not np.array_equal(out_a, out_b) + + # --- statistical sanity (large sample) --- + + def test_mean_close_to_zero(self): + fg = make_fast_generator() + state = make_state_array(fg, 200) + result = fg.vector_random_standard_normal(state, shape=500) + assert abs(result.mean()) < 0.05 + + def test_std_close_to_one(self): + fg = make_fast_generator() + state = make_state_array(fg, 200) + result = fg.vector_random_standard_normal(state, shape=500) + assert abs(result.std() - 1.0) < 0.05 + + def test_values_are_finite(self): + fg = make_fast_generator() + state = make_state_array(fg, 50) + result = fg.vector_random_standard_normal(state, shape=200) + assert np.all(np.isfinite(result)) + + def test_distribution_symmetry(self): + """Mean of absolute values should be close to sqrt(2/pi) ≈ 0.7979.""" + fg = make_fast_generator() + expected_mean_abs = np.sqrt(2.0 / np.pi) + state = make_state_array(fg, 200) + result = fg.vector_random_standard_normal(state, shape=500) + assert abs(np.abs(result).mean() - expected_mean_abs) < 0.05 From a7a4e2740e252bbc37b15412dbf10cf405c1be65 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Thu, 30 Apr 2026 16:54:47 -0500 Subject: [PATCH 02/21] add fast channel test --- activitysim/core/fast_random/_fast_channel.py | 2 + activitysim/core/test/test_random.py | 70 +++++++++++++------ 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py index 5d6f29ee75..1757852592 100644 --- a/activitysim/core/fast_random/_fast_channel.py +++ b/activitysim/core/fast_random/_fast_channel.py @@ -241,6 +241,8 @@ def normal_for_df( assert step_name is not None assert step_name == self.step_name selected_positions = self._check_valid_df(df) + if size is None: + size = 1 mu = np.asarray(mu) sigma = np.asarray(sigma) diff --git a/activitysim/core/test/test_random.py b/activitysim/core/test/test_random.py index bcbc602685..83e2e4dce5 100644 --- a/activitysim/core/test/test_random.py +++ b/activitysim/core/test/test_random.py @@ -33,7 +33,8 @@ def test_basic(): assert "call set_base_seed before the first step" in str(excinfo.value) -def test_channel(): +@pytest.mark.parametrize("fast", [False, True]) +def test_channel(fast: bool): channels = { "households": "household_id", "persons": "person_id", @@ -58,20 +59,26 @@ def test_channel(): rng.begin_step("test_step") - rng.add_channel("persons", persons) - rng.add_channel("households", households) + rng.add_channel("persons", persons, fast=fast) + rng.add_channel("households", households, fast=fast) rands = rng.random_for_df(persons) print("rands", np.asanyarray(rands).flatten()) assert rands.shape == (5, 1) - test1_expected_rands = [0.1733218, 0.1255693, 0.7384256, 0.3485183, 0.9012387] + if fast: + test1_expected_rands = [0.4072658, 0.5591271, 0.0297283, 0.6235138, 0.6921163] + else: + test1_expected_rands = [0.1733218, 0.1255693, 0.7384256, 0.3485183, 0.9012387] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands) # second call should return something different rands = rng.random_for_df(persons) - test1_expected_rands2 = [0.9105223, 0.5718418, 0.7222742, 0.9062284, 0.3929369] + if fast: + test1_expected_rands2 = [0.336963, 0.5420581, 0.4396565, 0.9702927, 0.0251327] + else: + test1_expected_rands2 = [0.9105223, 0.5718418, 0.7222742, 0.9062284, 0.3929369] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands2) rng.end_step("test_step") @@ -79,16 +86,25 @@ def test_channel(): rng.begin_step("test_step2") rands = rng.random_for_df(households) - expected_rands = [0.417278, 0.2994774, 0.8653719, 0.4429748, 0.5101697] + if fast: + expected_rands = [0.1571023, 0.2709219, 0.2515827, 0.9444831, 0.6816792] + else: + expected_rands = [0.417278, 0.2994774, 0.8653719, 0.4429748, 0.5101697] npt.assert_almost_equal(np.asanyarray(rands).flatten(), expected_rands) choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) - expected_choices = [2, 1, 3, 3, 4, 2, 4, 1, 4, 1] + if fast: + expected_choices = [4, 1, 4, 3, 2, 1, 3, 1, 1, 4] + else: + expected_choices = [2, 1, 3, 3, 4, 2, 4, 1, 4, 1] npt.assert_almost_equal(choices, expected_choices) # should be DIFFERENT the second time choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) - expected_choices = [3, 1, 4, 3, 3, 2, 2, 1, 4, 2] + if fast: + expected_choices = [1, 4, 2, 1, 2, 3, 1, 2, 2, 4] + else: + expected_choices = [3, 1, 4, 3, 3, 2, 2, 1, 4, 2] npt.assert_almost_equal(choices, expected_choices) rng.end_step("test_step2") @@ -97,18 +113,32 @@ def test_channel(): rands = rng.random_for_df(households, n=2) - expected_rands = [ - 0.3157928, - 0.3321823, - 0.5194067, - 0.9340083, - 0.9002048, - 0.8754209, - 0.3898816, - 0.4101094, - 0.7351484, - 0.1741092, - ] + if fast: + expected_rands = [ + 0.0728735, + 0.9764697, + 0.6611142, + 0.8802973, + 0.0122184, + 0.8770089, + 0.9944639, + 0.2064867, + 0.6051138, + 0.1666114, + ] + else: + expected_rands = [ + 0.3157928, + 0.3321823, + 0.5194067, + 0.9340083, + 0.9002048, + 0.8754209, + 0.3898816, + 0.4101094, + 0.7351484, + 0.1741092, + ] npt.assert_almost_equal(np.asanyarray(rands).flatten(), expected_rands) From 3b65bb3693cb5e5af57b920fe5d60a73a2734563 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Thu, 30 Apr 2026 19:30:21 -0500 Subject: [PATCH 03/21] configurable RNG channel type Introduce a configurable RNG channel type and exercise both implementations in tests. Add Settings.rng_channel_type to choose between the new FastChannel (PCG64 vectorised) and legacy SimpleChannel for reproducibility. Random now accepts a channel_type on init and add_channel accepts fast=None to default to the global channel_type; existing code will pick up settings.rng_channel_type via State initialization and rng access. Implement FastChannel.extend_domain to allow adding new domain rows (initialising per-row PCG64 state when a step is active) and tighten index handling. Update many pipeline tests to parametrize over channel types, isolate per-channel output dirs, and include per-channel expected regression values and checks. --- .../abm/test/test_pipeline/test_pipeline.py | 225 ++++++++++++------ activitysim/core/configuration/top.py | 20 +- activitysim/core/fast_random/_fast_channel.py | 62 ++++- activitysim/core/random.py | 17 +- activitysim/core/workflow/state.py | 31 ++- 5 files changed, 268 insertions(+), 87 deletions(-) diff --git a/activitysim/abm/test/test_pipeline/test_pipeline.py b/activitysim/abm/test/test_pipeline/test_pipeline.py index 2d837258a5..15c225d4f9 100644 --- a/activitysim/abm/test/test_pipeline/test_pipeline.py +++ b/activitysim/abm/test/test_pipeline/test_pipeline.py @@ -37,7 +37,21 @@ def example_path(dirname): return str(importlib.resources.files("activitysim").joinpath(resource)) -def setup_dirs(ancillary_configs_dir=None, data_dir=None): +# Run every parametrized test against both the legacy SimpleChannel and the +# new FastChannel implementations. Tests use ``channel_type`` to (a) inform +# the workflow which channel implementation to use, (b) isolate per-type +# output directories so checkpoint stores written by one channel type cannot +# be read by the other, and (c) look up the appropriate regression-expected +# values from per-channel-type tables defined below. +CHANNEL_TYPES = ("simple", "fast") + + +@pytest.fixture(params=CHANNEL_TYPES) +def channel_type(request): + return request.param + + +def setup_dirs(ancillary_configs_dir=None, data_dir=None, channel_type="fast"): # ancillary_configs_dir is used by run_mp to test multiprocess test_pipeline_configs_dir = os.path.join(os.path.dirname(__file__), "configs") @@ -47,7 +61,13 @@ def setup_dirs(ancillary_configs_dir=None, data_dir=None): if ancillary_configs_dir is not None: configs_dir = [ancillary_configs_dir] + configs_dir - output_dir = os.path.join(os.path.dirname(__file__), "output") + # Use a per-channel-type output subdirectory so checkpoint stores and skim + # caches written under one channel implementation do not interfere with + # those written under the other. + output_dir = os.path.join( + os.path.dirname(__file__), "output", f"channel_{channel_type}" + ) + os.makedirs(output_dir, exist_ok=True) if not data_dir: data_dir = example_path("data") @@ -58,6 +78,9 @@ def setup_dirs(ancillary_configs_dir=None, data_dir=None): data_dir=data_dir, ) + # Select the channel implementation requested by the test parameter. + state.settings.rng_channel_type = channel_type + state.logging.config_logger() state.tracing.delete_output_files("csv") @@ -77,8 +100,8 @@ def close_handlers(): logger.setLevel(logging.NOTSET) -def test_rng_access(): - state = setup_dirs() +def test_rng_access(channel_type): + state = setup_dirs(channel_type=channel_type) state.settings.rng_base_seed = 0 state.checkpoint.restore() @@ -86,15 +109,37 @@ def test_rng_access(): rng = state.get_rn_generator() assert isinstance(rng, random.Random) + assert rng.channel_type == channel_type state.checkpoint.close_store() -def regress_mini_auto(state: workflow.State): +# Per-channel-type regression-expected values for the mini-pipeline tests. +# The "simple" values are the long-standing reference values; the "fast" +# values were captured from a clean run with the FastChannel implementation +# and are checked here to guard against unintended drift in the new +# vectorised PCG64 streams. +_MINI_AUTO_HH_IDS = [1099626, 1173905, 1196298, 1286259] +_MINI_AUTO_EXPECTED = { + "simple": [1, 1, 0, 0], + "fast": [0, 0, 1, 0], +} + +_MINI_MTF_PER_IDS = { + "simple": [2566701, 2566702, 3061895], + "fast": [3188482, 3188483, 3188484], +} +_MINI_MTF_EXPECTED = { + "simple": ["school1", "school1", "work1"], + "fast": ["work1", "work1", "work_and_school"], +} + + +def regress_mini_auto(state: workflow.State, channel_type: str = "simple"): # regression test: these are among the middle households in households table # should be the same results as in run_mp (multiprocessing) test case - hh_ids = [1099626, 1173905, 1196298, 1286259] - choices = [1, 1, 0, 0] + hh_ids = _MINI_AUTO_HH_IDS + choices = _MINI_AUTO_EXPECTED[channel_type] expected_choice = pd.Series( choices, index=pd.Index(hh_ids, name="household_id"), name="auto_ownership" ) @@ -106,12 +151,15 @@ def regress_mini_auto(state: workflow.State): offset = ( HOUSEHOLDS_SAMPLE_SIZE // 2 ) # choose something midway as hh_id ordered by hh size - print("auto_choice\n%s" % auto_choice.head(offset).tail(4)) + print( + "auto_choice (channel=%s)\n%s" + % (channel_type, auto_choice.head(offset).tail(4)) + ) auto_choice = auto_choice.reindex(hh_ids) """ - auto_choice + auto_choice (simple channel) household_id 1099626 1 1173905 1 @@ -122,14 +170,14 @@ def regress_mini_auto(state: workflow.State): pdt.assert_series_equal(auto_choice, expected_choice, check_dtype=False) -def regress_mini_mtf(state: workflow.State): +def regress_mini_mtf(state: workflow.State, channel_type: str = "simple"): mtf_choice = ( state.checkpoint.load_dataframe("persons").sort_index().mandatory_tour_frequency ) # these choices are for pure regression - their appropriateness has not been checked - per_ids = [2566701, 2566702, 3061895] - choices = ["school1", "school1", "work1"] + per_ids = _MINI_MTF_PER_IDS[channel_type] + choices = _MINI_MTF_EXPECTED[channel_type] expected_choice = pd.Series( choices, index=pd.Index(per_ids, name="person_id"), @@ -139,10 +187,16 @@ def regress_mini_mtf(state: workflow.State): mtf_choice = mtf_choice[mtf_choice != ""] # drop null (empty string) choices offset = len(mtf_choice) // 2 # choose something midway as hh_id ordered by hh size - print("mtf_choice\n%s" % mtf_choice.head(offset).tail(3)) + print( + "mtf_choice (channel=%s)\n%s" % (channel_type, mtf_choice.head(offset).tail(3)) + ) + print( + "mtf_choice for regression per_ids (channel=%s):\n%s" + % (channel_type, mtf_choice.astype(str).reindex(per_ids)) + ) """ - mtf_choice + mtf_choice (simple channel) person_id 2566701 school1 2566702 school1 @@ -165,10 +219,10 @@ def regress_mini_location_choice_logsums(state: workflow.State): assert "workplace_location_logsum" not in persons -def test_mini_pipeline_run(): +def test_mini_pipeline_run(channel_type): from activitysim.abm.tables.skims import network_los_preload - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) state.get(network_los_preload) state.settings.households_sample_size = HOUSEHOLDS_SAMPLE_SIZE @@ -185,12 +239,12 @@ def test_mini_pipeline_run(): state.run(models=_MODELS, resume_after=None) - regress_mini_auto(state) + regress_mini_auto(state, channel_type=channel_type) state.run.by_name("cdap_simulate") state.run.by_name("mandatory_tour_frequency") - regress_mini_mtf(state) + regress_mini_mtf(state, channel_type=channel_type) regress_mini_location_choice_logsums(state) # try to get a non-existant table @@ -213,12 +267,12 @@ def test_mini_pipeline_run(): close_handlers() -def test_mini_pipeline_run2(): +def test_mini_pipeline_run2(channel_type): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) from activitysim.abm.tables.skims import network_los_preload state.get(network_los_preload) @@ -236,7 +290,7 @@ def test_mini_pipeline_run2(): state.checkpoint.restore("auto_ownership_simulate") - regress_mini_auto(state) + regress_mini_auto(state, channel_type=channel_type) # try to run a model already in pipeline with pytest.raises(DuplicateWorkflowNameError) as excinfo: @@ -247,7 +301,7 @@ def test_mini_pipeline_run2(): state.run.by_name("cdap_simulate") state.run.by_name("mandatory_tour_frequency") - regress_mini_mtf(state) + regress_mini_mtf(state, channel_type=channel_type) # should be able to get this before pipeline is closed (from existing open store) checkpoints_df = state.checkpoint.get_inventory() @@ -265,10 +319,10 @@ def test_mini_pipeline_run2(): close_handlers() -def test_mini_pipeline_run3(): +def test_mini_pipeline_run3(channel_type): # test that hh_ids setting overrides household sampling - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) state.settings.hh_ids = "override_hh_ids.csv" households = state.get_dataframe("households") @@ -294,8 +348,9 @@ def full_run( trace_hh_id=None, trace_od=None, check_for_variability=False, + channel_type="fast", ): - state = setup_dirs() + state = setup_dirs(channel_type=channel_type) state.settings.households_sample_size = households_sample_size state.settings.chunk_size = chunk_size @@ -318,10 +373,32 @@ def full_run( return state, tour_count -EXPECT_TOUR_COUNT = 121 +EXPECT_TOUR_COUNT = { + "simple": 121, + "fast": 108, +} + + +# Per-channel-type expected per-tour values for HH_ID, sorted by +# (person_id, tour_category, tour_num). These were captured from clean +# reference runs and are checked here to guard against unintended drift. +_EXPECT_PERSON_IDS = { + "simple": [325051, 325051, 325051, 325052, 325052, 325052], + "fast": [325051, 325051, 325051, 325052], +} + +_EXPECT_TOUR_TYPES = { + "simple": ["othdiscr", "work", "work", "business", "work", "othmaint"], + "fast": ["work", "escort", "eatout", "work"], +} +_EXPECT_MODES = { + "simple": ["WALK", "WALK", "SHARED3FREE", "WALK", "WALK_LOC", "WALK"], + "fast": ["WALK", "TNC_SHARED", "WALK", "WALK"], +} -def regress_tour_modes(tours_df): + +def regress_tour_modes(tours_df, channel_type: str = "simple"): mode_cols = ["tour_mode", "person_id", "tour_type", "tour_num", "tour_category"] tours_df = tours_df[tours_df.household_id == HH_ID] @@ -329,9 +406,10 @@ def regress_tour_modes(tours_df): tours_df.tour_category = tours_df.tour_category.astype(str) tours_df = tours_df.sort_values(by=["person_id", "tour_category", "tour_num"]) - print("mode_df\n%s" % tours_df[mode_cols]) + print("mode_df (channel=%s)\n%s" % (channel_type, tours_df[mode_cols])) """ + simple channel: tour_mode person_id tour_type tour_num tour_category tour_id 13327106 WALK 325051 othdiscr 1 joint @@ -340,27 +418,19 @@ def regress_tour_modes(tours_df): 13327132 WALK 325052 business 1 atwork 13327171 WALK_LOC 325052 work 1 mandatory 13327160 WALK 325052 othmaint 1 non_mandatory - """ - - EXPECT_PERSON_IDS = [ - 325051, - 325051, - 325051, - 325052, - 325052, - 325052, - ] - EXPECT_TOUR_TYPES = ["othdiscr", "work", "work", "business", "work", "othmaint"] + fast channel: + tour_mode person_id tour_type tour_num tour_category + tour_id + 13327130 WALK 325051 work 1 mandatory + 13327100 TNC_SHARED 325051 escort 1 non_mandatory + 13327097 WALK 325051 eatout 2 non_mandatory + 13327171 WALK 325052 work 1 mandatory + """ - EXPECT_MODES = [ - "WALK", - "WALK", - "SHARED3FREE", - "WALK", - "WALK_LOC", - "WALK", - ] + EXPECT_PERSON_IDS = _EXPECT_PERSON_IDS[channel_type] + EXPECT_TOUR_TYPES = _EXPECT_TOUR_TYPES[channel_type] + EXPECT_MODES = _EXPECT_MODES[channel_type] assert len(tours_df) == len(EXPECT_PERSON_IDS) assert (tours_df.person_id.values == EXPECT_PERSON_IDS).all() @@ -368,7 +438,7 @@ def regress_tour_modes(tours_df): assert (tours_df.tour_mode.astype(str).values == EXPECT_MODES).all() -def regress(state: workflow.State): +def regress(state: workflow.State, channel_type: str = "simple"): persons_df = state.checkpoint.load_dataframe("persons") persons_df = persons_df[persons_df.household_id == HH_ID] print("persons_df\n%s" % persons_df[["value_of_time", "distance_to_work"]]) @@ -383,7 +453,7 @@ def regress(state: workflow.State): tours_df = state.checkpoint.load_dataframe("tours") - regress_tour_modes(tours_df) + regress_tour_modes(tours_df, channel_type=channel_type) assert tours_df.shape[0] > 0 assert not tours_df.tour_mode.isnull().any() @@ -434,7 +504,15 @@ def regress(state: workflow.State): trip_matrices.close() -def test_full_run1(): +def _check_tour_count(channel_type, tour_count): + expect = EXPECT_TOUR_COUNT[channel_type] + assert tour_count == expect, "EXPECT_TOUR_COUNT %s but got tour_count %s" % ( + expect, + tour_count, + ) + + +def test_full_run1(channel_type): if SKIP_FULL_RUN: return @@ -442,39 +520,38 @@ def test_full_run1(): trace_hh_id=HH_ID, check_for_variability=True, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, + channel_type=channel_type, ) print("tour_count", tour_count) - assert ( - tour_count == EXPECT_TOUR_COUNT - ), "EXPECT_TOUR_COUNT %s but got tour_count %s" % (EXPECT_TOUR_COUNT, tour_count) + _check_tour_count(channel_type, tour_count) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run2(): +def test_full_run2(channel_type): # resume_after should successfully load tours table and replicate results if SKIP_FULL_RUN: return state, tour_count = full_run( - resume_after="non_mandatory_tour_scheduling", trace_hh_id=HH_ID + resume_after="non_mandatory_tour_scheduling", + trace_hh_id=HH_ID, + channel_type=channel_type, ) - assert ( - tour_count == EXPECT_TOUR_COUNT - ), "EXPECT_TOUR_COUNT %s but got tour_count %s" % (EXPECT_TOUR_COUNT, tour_count) + _check_tour_count(channel_type, tour_count) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run3_with_chunks(): +def test_full_run3_with_chunks(channel_type): # should get the same result with different chunk size if SKIP_FULL_RUN: @@ -484,33 +561,34 @@ def test_full_run3_with_chunks(): trace_hh_id=HH_ID, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, chunk_size=500000, + channel_type=channel_type, ) - assert ( - tour_count == EXPECT_TOUR_COUNT - ), "EXPECT_TOUR_COUNT %s but got tour_count %s" % (EXPECT_TOUR_COUNT, tour_count) + _check_tour_count(channel_type, tour_count) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run4_stability(): +def test_full_run4_stability(channel_type): # hh should get the same result with different sample size if SKIP_FULL_RUN: return state, tour_count = full_run( - trace_hh_id=HH_ID, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE - 10 + trace_hh_id=HH_ID, + households_sample_size=HOUSEHOLDS_SAMPLE_SIZE - 10, + channel_type=channel_type, ) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() -def test_full_run5_singleton(): +def test_full_run5_singleton(channel_type): # should work with only one hh # run with minimum chunk size to drive potential chunking errors in models # where choosers has multiple rows that all have to be included in the same chunk @@ -519,15 +597,18 @@ def test_full_run5_singleton(): return state, tour_count = full_run( - trace_hh_id=HH_ID, households_sample_size=1, chunk_size=1 + trace_hh_id=HH_ID, + households_sample_size=1, + chunk_size=1, + channel_type=channel_type, ) - regress(state) + regress(state, channel_type=channel_type) state.checkpoint.close_store() if __name__ == "__main__": print("running test_full_run1") - test_full_run1() + test_full_run1("simple") # teardown_function(None) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 825283126c..9dbb61820c 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -1,9 +1,9 @@ from __future__ import annotations -from pathlib import Path -from typing import Any, Literal import struct import time +from pathlib import Path +from typing import Any, Literal from pydantic import model_validator, validator @@ -743,6 +743,20 @@ def _check_store_skims_in_shm(self): rng_base_seed: Union[int, None] = 0 """Base seed for pseudo-random number generator.""" + rng_channel_type: Literal["fast", "simple"] = "fast" + """ + Which random-channel implementation to use for per-row RNG streams. + + * ``"fast"`` (default) + Use :class:`activitysim.core.fast_random.FastChannel`, a vectorised + PCG64-based implementation. + * ``"simple"`` + Use the legacy :class:`activitysim.core.random.SimpleChannel`, + which seeds a per-row :class:`numpy.random.RandomState` on the fly. + Slower but provided for backward compatibility / reproducibility + with results generated by older ActivitySim versions. + """ + duplicate_step_execution: Literal["error", "allow"] = "error" """ How activitysim should handle attempts to re-run a step with the same name. @@ -780,7 +794,7 @@ def _check_store_skims_in_shm(self): """ run checks to validate that YAML settings files are loadable and spec and coefficient csv can be resolved. - should catch many common errors early, including missing required configurations or specified coefficient labels without defined values. + should catch many common errors early, including missing required configurations or specified coefficient labels without defined values. """ skip_failed_choices: bool = True diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py index 1757852592..7231f04769 100644 --- a/activitysim/core/fast_random/_fast_channel.py +++ b/activitysim/core/fast_random/_fast_channel.py @@ -70,14 +70,71 @@ def __init__( self.base_seed = base_seed self.channel_name = channel_name self.channel_seed = hash32(self.channel_name) - self.domain_index = domain_df.index.copy() + self.domain_index = domain_df.index[:0].copy() self.step_name = None self.step_seed = None self._fast_generator = FastGenerator() self._state_array = None + self.extend_domain(domain_df) if step_name: self.begin_step(step_name) + def extend_domain(self, domain_df: pd.DataFrame) -> None: + """ + Extend the channel's domain by adding new rows from *domain_df*. + + If a step is currently active, the per-row PCG64 state for the new rows + is initialised immediately (using the current ``step_seed``) and + appended to ``self._state_array`` so that random draws can be made for + the extended rows within the same step. + + The index values of *domain_df* must be disjoint from the channel's + existing ``domain_index`` so there is no ambiguity / collision between + rows. + + Parameters + ---------- + domain_df : pandas.DataFrame + DataFrame whose index defines the new agents (rows) to add to the + channel. Columns are ignored. + + Raises + ------ + AssertionError + If any index value in *domain_df* already exists in the channel's + domain. + """ + new_index = domain_df.index + + if new_index.empty: + return + + # new rows must be disjoint from existing domain + assert ( + len(self.domain_index.intersection(new_index)) == 0 + ), "extend_domain: new domain_df index overlaps existing domain" + + if self.step_name is not None: + # generate state for the new rows and append to existing state array + new_state = np.empty(shape=[len(new_index), 4], dtype=np.uint64) + for n, i in enumerate(new_index): + ss = np.random.SeedSequence( + [self.base_seed, self.channel_seed, self.step_seed, i] + ) + new_state[n, :] = self._fast_generator.get_state_array(ss) + + if self._state_array is None: + self._state_array = new_state + else: + self._state_array = np.concatenate( + [self._state_array, new_state], axis=0 + ) + + if len(self.domain_index) == 0: + self.domain_index = new_index.copy() + else: + self.domain_index = self.domain_index.append(new_index) + def begin_step(self, step_name: str) -> None: """ Initialise (or re-initialise) the per-row PCG64 states for a new step. @@ -178,7 +235,8 @@ def _check_valid_df(self, df: pd.DataFrame) -> np.ndarray: selected_positions = self.domain_index.get_indexer(df.index) # check that all df.index values were found in self.domain_index - if selected_positions.min() < 0: + # (skip the check for empty input – min() on a zero-size array errors) + if selected_positions.size and selected_positions.min() < 0: raise ValueError("DataFrame has index values not found in the domain") if self._state_array is None: diff --git a/activitysim/core/random.py b/activitysim/core/random.py index b99c0615e3..5cfce7fda6 100644 --- a/activitysim/core/random.py +++ b/activitysim/core/random.py @@ -373,7 +373,7 @@ def choice_for_df(self, df, step_name, a, size, replace): class Random(object): - def __init__(self): + def __init__(self, channel_type: str = "fast"): self.channels = {} # dict mapping df index name to channel name @@ -384,6 +384,12 @@ def __init__(self): self.base_seed = 0 self.global_rng = np.random.RandomState() + if channel_type not in ("fast", "simple"): + raise ValueError( + f"channel_type must be 'fast' or 'simple', got {channel_type!r}" + ) + self.channel_type = channel_type + def get_channel_for_df(self, df): """ Return the channel for this df. Channel should already have been loaded/added. @@ -451,7 +457,7 @@ def end_step(self, step_name): # channel management - def add_channel(self, channel_name, domain_df, fast: bool = True): + def add_channel(self, channel_name, domain_df, fast: bool | None = None): """ Create or extend a channel for generating random number streams for domain_df. @@ -468,8 +474,15 @@ def add_channel(self, channel_name, domain_df, fast: bool = True): channel_name : str expected channel name provided as a consistency check + fast : bool, optional + If ``None`` (default), the channel implementation is selected + based on ``self.channel_type``. Pass ``True`` / ``False`` to + force a specific implementation for this channel only. """ + if fast is None: + fast = self.channel_type == "fast" + if channel_name in self.channels: assert channel_name == self.index_to_channel[domain_df.index.name] logger.debug( diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index de22b0687d..b13e021c21 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -20,7 +20,7 @@ import activitysim.core.random from activitysim.core.configuration import FileSystem, NetworkSettings, Settings -from activitysim.core.exceptions import StateAccessError, CheckpointNameNotFoundError +from activitysim.core.exceptions import CheckpointNameNotFoundError, StateAccessError from activitysim.core.workflow.checkpoint import LAST_CHECKPOINT, Checkpoints from activitysim.core.workflow.chunking import Chunking from activitysim.core.workflow.dataset import Datasets @@ -181,14 +181,17 @@ def init_state(self) -> None: def _initialize_prng(self, base_seed=None): from activitysim.core.random import Random - self._context["prng"] = Random() - if base_seed is None: - try: - self.settings - except StateAccessError: + try: + self.settings + except StateAccessError: + channel_type = "fast" + if base_seed is None: base_seed = 0 - else: + else: + channel_type = getattr(self.settings, "rng_channel_type", "fast") + if base_seed is None: base_seed = self.settings.rng_base_seed + self._context["prng"] = Random(channel_type=channel_type) self._context["prng"].set_base_seed(base_seed) def import_extensions(self, ext: str | Iterable[str] = None, append=True) -> None: @@ -858,7 +861,19 @@ def extract(self, func): def rng(self) -> activitysim.core.random.Random: if "prng" not in self._context: self._initialize_prng() - return self._context["prng"] + prng = self._context["prng"] + # Allow channel_type to be picked up from settings even when prng was + # initialised before settings were loaded. Safe to update as long as + # no channels have been registered yet. + try: + desired_channel_type = getattr( + self.settings, "rng_channel_type", prng.channel_type + ) + except StateAccessError: + desired_channel_type = prng.channel_type + if desired_channel_type != prng.channel_type and not prng.channels: + prng.channel_type = desired_channel_type + return prng def pipeline_table_key(self, table_name, checkpoint_name): if checkpoint_name: From f3d3fded9c2f2043817ede9d903a16169774ef79 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Thu, 30 Apr 2026 19:34:46 -0500 Subject: [PATCH 04/21] remove extra gitignores --- activitysim/abm/test/test_pipeline/output/.gitignore | 6 ------ activitysim/abm/test/test_pipeline/output/cache/.gitignore | 1 - activitysim/abm/test/test_pipeline/output/trace/.gitignore | 1 - 3 files changed, 8 deletions(-) delete mode 100644 activitysim/abm/test/test_pipeline/output/.gitignore delete mode 100644 activitysim/abm/test/test_pipeline/output/cache/.gitignore delete mode 100644 activitysim/abm/test/test_pipeline/output/trace/.gitignore diff --git a/activitysim/abm/test/test_pipeline/output/.gitignore b/activitysim/abm/test/test_pipeline/output/.gitignore deleted file mode 100644 index 9cf6da1afa..0000000000 --- a/activitysim/abm/test/test_pipeline/output/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -*.csv -*.txt -*.h5 -*.yaml -*.omx -*.mmap diff --git a/activitysim/abm/test/test_pipeline/output/cache/.gitignore b/activitysim/abm/test/test_pipeline/output/cache/.gitignore deleted file mode 100644 index a55e67af68..0000000000 --- a/activitysim/abm/test/test_pipeline/output/cache/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.mmap diff --git a/activitysim/abm/test/test_pipeline/output/trace/.gitignore b/activitysim/abm/test/test_pipeline/output/trace/.gitignore deleted file mode 100644 index afed0735dc..0000000000 --- a/activitysim/abm/test/test_pipeline/output/trace/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.csv From 321b106892be88cd89b7b3c9578746308bd061b9 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Thu, 30 Apr 2026 20:07:49 -0500 Subject: [PATCH 05/21] change default from fast to simple --- activitysim/core/configuration/top.py | 7 ++++++- activitysim/core/fast_random/_fast_random.py | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 9dbb61820c..b65525d577 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -743,7 +743,7 @@ def _check_store_skims_in_shm(self): rng_base_seed: Union[int, None] = 0 """Base seed for pseudo-random number generator.""" - rng_channel_type: Literal["fast", "simple"] = "fast" + rng_channel_type: Literal["simple", "fast"] = "simple" """ Which random-channel implementation to use for per-row RNG streams. @@ -755,6 +755,11 @@ def _check_store_skims_in_shm(self): which seeds a per-row :class:`numpy.random.RandomState` on the fly. Slower but provided for backward compatibility / reproducibility with results generated by older ActivitySim versions. + + Note the default is `simple` not because simple is preferred, but because + many integration tests are built looking for the results derived from the + original `simple` channel. New models should generally prefer the `fast` + channel type, as it is faster. """ duplicate_step_execution: Literal["error", "allow"] = "error" diff --git a/activitysim/core/fast_random/_fast_random.py b/activitysim/core/fast_random/_fast_random.py index a4663f7d6d..a7241696bf 100644 --- a/activitysim/core/fast_random/_fast_random.py +++ b/activitysim/core/fast_random/_fast_random.py @@ -1822,7 +1822,9 @@ def vector_random_standard_normal( ``(n_selected, *shape)`` when *selected_positions* is given, or ``(n_agents, *shape)`` otherwise. """ - if isinstance(shape, int): + if shape is None: + shape = 1 + if isinstance(shape, (int, np.integer)): shape = (shape,) if selected_positions is not None: return _selected_vector_random_standard_normal( @@ -1877,7 +1879,9 @@ def vector_random_standard_uniform( ``(n_selected, *shape)`` when *selected_positions* is given, or ``(n_agents, *shape)`` otherwise; values drawn from U[0, 1). """ - if isinstance(shape, int): + if shape is None: + shape = 1 + if isinstance(shape, (int, np.integer)): shape = (shape,) if selected_positions is not None: return _selected_vector_random_standard_uniform( From 2f48d8ae1b7c98ab409127e21d3bebe92e22ba46 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sat, 2 May 2026 18:28:40 -0500 Subject: [PATCH 06/21] reseed only when needed --- activitysim/core/fast_random/_fast_channel.py | 65 ++++++++++++++----- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py index 7231f04769..1b41c08141 100644 --- a/activitysim/core/fast_random/_fast_channel.py +++ b/activitysim/core/fast_random/_fast_channel.py @@ -114,7 +114,8 @@ def extend_domain(self, domain_df: pd.DataFrame) -> None: len(self.domain_index.intersection(new_index)) == 0 ), "extend_domain: new domain_df index overlaps existing domain" - if self.step_name is not None: + if self._state_array is not None: + # we already have state for some rows, so we also need to # generate state for the new rows and append to existing state array new_state = np.empty(shape=[len(new_index), 4], dtype=np.uint64) for n, i in enumerate(new_index): @@ -122,19 +123,48 @@ def extend_domain(self, domain_df: pd.DataFrame) -> None: [self.base_seed, self.channel_seed, self.step_seed, i] ) new_state[n, :] = self._fast_generator.get_state_array(ss) - - if self._state_array is None: - self._state_array = new_state - else: - self._state_array = np.concatenate( - [self._state_array, new_state], axis=0 - ) + self._state_array = np.concatenate([self._state_array, new_state], axis=0) if len(self.domain_index) == 0: self.domain_index = new_index.copy() else: self.domain_index = self.domain_index.append(new_index) + def _reseed_step(self, force: bool = False) -> None: + """ + Initialise (or re-initialise) the per-row PCG64 states for a new step. + + Must be called before any random-number methods are used within a step. + The method seeds every row's bit-generator from the four-integer sequence + ``[base_seed, channel_seed, step_seed, row_index]`` via + :class:`numpy.random.SeedSequence`, ensuring that: + + * the same step always produces the same stream (reproducibility), and + * different steps produce independent streams (no cross-step correlation). + + Parameters + ---------- + step_name : str + Name of the pipeline step being started (e.g. ``"auto_ownership"``). + Hashed into the seed so that different steps yield distinct streams. + + Raises + ------ + AssertionError + If a step is already active (``end_step`` was not called first). + """ + + if self._state_array is None or force: + # Seed the bit generators, extracting state along the way + state_array = np.empty(shape=[len(self.domain_index), 4], dtype=np.uint64) + for n, i in enumerate(self.domain_index): + ss = np.random.SeedSequence( + [self.base_seed, self.channel_seed, self.step_seed, i] + ) + state_array[n, :] = self._fast_generator.get_state_array(ss) + + self._state_array = state_array + def begin_step(self, step_name: str) -> None: """ Initialise (or re-initialise) the per-row PCG64 states for a new step. @@ -164,15 +194,11 @@ def begin_step(self, step_name: str) -> None: self.step_name = step_name self.step_seed = hash32(self.step_name) - # Seed the bit generators, extracting state along the way - state_array = np.empty(shape=[len(self.domain_index), 4], dtype=np.uint64) - for n, i in enumerate(self.domain_index): - ss = np.random.SeedSequence( - [self.base_seed, self.channel_seed, self.step_seed, i] - ) - state_array[n, :] = self._fast_generator.get_state_array(ss) - - self._state_array = state_array + # do NOT reseed immediately, defer until the first call to generate + # any random numbers using this channel. There may not be any such + # calls (most ActivitySim steps only use one of many channels), and + # we want to avoid the overhead of seeding every channel in every + # step when many channels are unused. def end_step(self, step_name: str = "") -> None: """ @@ -239,7 +265,7 @@ def _check_valid_df(self, df: pd.DataFrame) -> np.ndarray: if selected_positions.size and selected_positions.min() < 0: raise ValueError("DataFrame has index values not found in the domain") - if self._state_array is None: + if self.step_name is None: raise ValueError("outside of a defined step") return selected_positions @@ -299,6 +325,7 @@ def normal_for_df( assert step_name is not None assert step_name == self.step_name selected_positions = self._check_valid_df(df) + self._reseed_step() if size is None: size = 1 @@ -352,6 +379,7 @@ def random_for_df( assert step_name is not None assert step_name == self.step_name selected_positions = self._check_valid_df(df) + self._reseed_step() return self._fast_generator.vector_random_standard_uniform( self._state_array, selected_positions=selected_positions, shape=n ) @@ -398,6 +426,7 @@ def choice_for_df( assert step_name is not None assert step_name == self.step_name selected_positions = self._check_valid_df(df) + self._reseed_step() # total number of draws required per row if isinstance(size, (int, np.integer)): From 4774d3fdabe116000870bc75c1c69b9036596a92 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sat, 2 May 2026 22:39:16 -0500 Subject: [PATCH 07/21] SFC64 with quick entropy --- .../abm/test/test_pipeline/test_pipeline.py | 2 +- activitysim/core/configuration/top.py | 9 ++- activitysim/core/fast_random/_fast_channel.py | 69 ++++++++++++++++--- activitysim/core/fast_random/_fast_random.py | 66 +++++++++++++----- activitysim/core/random.py | 13 ++-- 5 files changed, 123 insertions(+), 36 deletions(-) diff --git a/activitysim/abm/test/test_pipeline/test_pipeline.py b/activitysim/abm/test/test_pipeline/test_pipeline.py index 15c225d4f9..c6881f1bd1 100644 --- a/activitysim/abm/test/test_pipeline/test_pipeline.py +++ b/activitysim/abm/test/test_pipeline/test_pipeline.py @@ -43,7 +43,7 @@ def example_path(dirname): # output directories so checkpoint stores written by one channel type cannot # be read by the other, and (c) look up the appropriate regression-expected # values from per-channel-type tables defined below. -CHANNEL_TYPES = ("simple", "fast") +CHANNEL_TYPES = ("simple", "fast", "faster") @pytest.fixture(params=CHANNEL_TYPES) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index b65525d577..5d3dd5e6f8 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -743,13 +743,16 @@ def _check_store_skims_in_shm(self): rng_base_seed: Union[int, None] = 0 """Base seed for pseudo-random number generator.""" - rng_channel_type: Literal["simple", "fast"] = "simple" + rng_channel_type: Literal["simple", "fast", "faster"] = "simple" """ Which random-channel implementation to use for per-row RNG streams. * ``"fast"`` (default) - Use :class:`activitysim.core.fast_random.FastChannel`, a vectorised - PCG64-based implementation. + Use :class:`activitysim.core.fast_random.FastChannel`, with a + vectorised PCG64-based implementation. + * ``"faster"`` (default) + Use :class:`activitysim.core.fast_random.FastChannel`, with a + vectorised SFC64-based implementation and hash-based reseeding. * ``"simple"`` Use the legacy :class:`activitysim.core.random.SimpleChannel`, which seeds a per-row :class:`numpy.random.RandomState` on the fly. diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py index 1b41c08141..076f3f2151 100644 --- a/activitysim/core/fast_random/_fast_channel.py +++ b/activitysim/core/fast_random/_fast_channel.py @@ -1,12 +1,13 @@ from __future__ import annotations import hashlib +from typing import Literal import numpy as np import pandas as pd from cffi import FFI -from ._fast_random import FastGenerator +from ._fast_random import FastGenerator, quick_entropy # one more than 0xFFFFFFFF so we can wrap using: int64 % _MAX_SEED _MAX_SEED = 1 << 32 @@ -38,6 +39,8 @@ def __init__( base_seed: int, domain_df: pd.DataFrame, step_name: str = "", + bit_generator: Literal["PCG64", "SFC64"] = "SFC64", + entropy_type: Literal[None, "robust", "quick"] = None, ) -> None: """ Create a new FastChannel for vectorised PCG64-based random number generation. @@ -66,6 +69,20 @@ def __init__( If non-empty, ``begin_step(step_name)`` is called immediately after construction so the channel is ready to generate numbers straight away. Defaults to ``""`` (no step started). + bit_generator : {"SFC64", "PCG64"}, default: "SFC64" + Which bit generator to use for the per-row streams. Defaults to + SFC64, which supports using quick-hash random entropy for maximum + speed at runtime. + entropy_type : {None, "robust", "quick"}, default: None + The type of entropy used to reseed the bit generators. If ``None``, + the entropy will be selected automatically based on ``bit_generator``. + Robust entropy uses the numpy SeedSequence tools to create entropy + with strong statistical properties, but is slower to generate. Quick + entropy uses a custom hash-based method that is much faster to generate + but may have a slightly greater risk of non-independent streams. Since + ActivitySim reseeds quite frequently, the practical risk of problems + is low. + """ self.base_seed = base_seed self.channel_name = channel_name @@ -73,12 +90,45 @@ def __init__( self.domain_index = domain_df.index[:0].copy() self.step_name = None self.step_seed = None - self._fast_generator = FastGenerator() + + # If entropy_type is not given, choose a default appropriate for the bit generator + if entropy_type is None: + if bit_generator == "PCG64": + entropy_type = "robust" + elif bit_generator == "SFC64": + entropy_type = "quick" + else: + raise ValueError(f"unsupported bit generator class: {bit_generator}") + + if entropy_type not in {"robust", "quick"}: + raise ValueError("entropy_type must be 'robust' or 'quick'") + self._entropy_type = entropy_type + self._fast_generator = FastGenerator(bit_gen=bit_generator) self._state_array = None self.extend_domain(domain_df) if step_name: self.begin_step(step_name) + def _init_states(self, *seeds): + if self._entropy_type == "robust": + return self._fast_generator.get_state_array( + np.random.SeedSequence(list(seeds)) + ) + elif self._entropy_type == "quick": + if self._fast_generator._bit_gen_class == "PCG64": + raise ValueError( + "PCG64 random number generator does not support quick entropy" + ) + elif self._fast_generator._bit_gen_class == "SFC64": + q = quick_entropy(seeds).copy() + q[-1] = 1 + # self._fast_generator.vector_random_standard_uniform(q.reshape(1, -1), shape=12) + return q + else: + raise ValueError( + f"unsupported bit generator class: {self._fast_generator._bit_gen_class}" + ) + def extend_domain(self, domain_df: pd.DataFrame) -> None: """ Extend the channel's domain by adding new rows from *domain_df*. @@ -119,10 +169,11 @@ def extend_domain(self, domain_df: pd.DataFrame) -> None: # generate state for the new rows and append to existing state array new_state = np.empty(shape=[len(new_index), 4], dtype=np.uint64) for n, i in enumerate(new_index): - ss = np.random.SeedSequence( - [self.base_seed, self.channel_seed, self.step_seed, i] + new_state[n, :] = self._init_states( + self.base_seed, self.channel_seed, self.step_seed, i ) - new_state[n, :] = self._fast_generator.get_state_array(ss) + # if we are using quick entropy, make a few draws to properly mix + self._fast_generator.vector_random_standard_uniform(new_state, shape=12) self._state_array = np.concatenate([self._state_array, new_state], axis=0) if len(self.domain_index) == 0: @@ -158,11 +209,11 @@ def _reseed_step(self, force: bool = False) -> None: # Seed the bit generators, extracting state along the way state_array = np.empty(shape=[len(self.domain_index), 4], dtype=np.uint64) for n, i in enumerate(self.domain_index): - ss = np.random.SeedSequence( - [self.base_seed, self.channel_seed, self.step_seed, i] + state_array[n, :] = self._init_states( + self.base_seed, self.channel_seed, self.step_seed, i ) - state_array[n, :] = self._fast_generator.get_state_array(ss) - + # if we are using quick entropy, make a few draws to properly mix + self._fast_generator.vector_random_standard_uniform(state_array, shape=12) self._state_array = state_array def begin_step(self, step_name: str) -> None: diff --git a/activitysim/core/fast_random/_fast_random.py b/activitysim/core/fast_random/_fast_random.py index a7241696bf..91490a8ca6 100644 --- a/activitysim/core/fast_random/_fast_random.py +++ b/activitysim/core/fast_random/_fast_random.py @@ -1280,11 +1280,14 @@ from __future__ import annotations +import hashlib + # --------------------------------------------------------------------------- # Ziggurat tables (parsed from the C source comments at the top of this file) # --------------------------------------------------------------------------- import math as math import re as re +from typing import Literal import cffi import numba as nb @@ -1299,14 +1302,6 @@ _WI_DOUBLE = None _FI_DOUBLE = None -# _BIT_GENERATOR = np.random.PCG64(seed=42) -# _next_uint64 = _BIT_GENERATOR.cffi.next_uint64 -# _next_double = _BIT_GENERATOR.cffi.next_double -# _state_address = _BIT_GENERATOR.cffi.state_address -# _state_ptr = _FFI.cast("uint8_t(*)[128]", _state_address) -# _state_bytes = np.frombuffer(_FFI.buffer(_state_ptr), dtype=np.uint8) -# _state_uint64 = _state_bytes.view(np.uint64)[3:7] - def _load_ziggurat_tables(): """Parse the C-comment Ziggurat tables embedded at the top of this module.""" @@ -1744,9 +1739,27 @@ def _selected_vector_random_standard_uniform( return result +def quick_entropy(*seeds): + """Generate a quick-and-dirty entropy array from the given seeds. + + This is a less robust starting seed than generated by numpy.random.SeedSequence, + but it is very fast to compute and should be sufficient for most purposes. + The resulting array is deterministic for a given set of seeds, but will change + if the seeds change in any way (e.g. different order). + """ + h = hashlib.sha256() + for s in seeds: + h.update(str(s).encode()) + h.update(b"|") + return np.frombuffer(h.digest(), dtype=np.uint64) + + class FastGenerator: - def __init__(self, seed: int = 42) -> None: - self._bit_generator = np.random.PCG64(seed=seed) + def __init__( + self, seed: int = 42, bit_gen: Literal["PCG64", "SFC64"] = "PCG64" + ) -> None: + self._bit_gen_class = bit_gen + self._bit_generator = self._new_bit_generator(seed) self._next_uint64 = self._bit_generator.cffi.next_uint64 self._next_double = self._bit_generator.cffi.next_double self._state_address = self._bit_generator.cffi.state_address @@ -1756,10 +1769,13 @@ def __init__(self, seed: int = 42) -> None: # Determine the values currently in the `state` state_array = np.empty(shape=[4], dtype=np.uint64) _state = self._bit_generator.state["state"] - state_array[0] = _state["state"] & 0xFFFFFFFFFFFFFFFF - state_array[1] = _state["state"] >> 64 - state_array[2] = _state["inc"] & 0xFFFFFFFFFFFFFFFF - state_array[3] = _state["inc"] >> 64 + if bit_gen == "PCG64": + state_array[0] = _state["state"] & 0xFFFFFFFFFFFFFFFF + state_array[1] = _state["state"] >> 64 + state_array[2] = _state["inc"] & 0xFFFFFFFFFFFFFFFF + state_array[3] = _state["inc"] >> 64 + elif bit_gen == "SFC64": + state_array[:] = _state["state"][:] # There are 4 values in state_array, which correspond to 4 values # within _state_bytes.view(np.uint64), in a contiguous block but @@ -1783,15 +1799,27 @@ def __init__(self, seed: int = 42) -> None: self._slice_positions = np.asarray(positions) - self._slice_start + def _new_bit_generator(self, seed): + if self._bit_gen_class == "PCG64": + return np.random.PCG64(seed=seed) + elif self._bit_gen_class == "SFC64": + return np.random.SFC64(seed=seed) + else: + raise ValueError("bit_gen must be one of 'PCG64' or 'SFC64'") + def get_state_array(self, seed) -> np.ndarray: """Get the state array for a given seed, in the order expected by the numba functions.""" - bit_generator = np.random.PCG64(seed=seed) + bit_generator = self._new_bit_generator(seed=seed) _state = bit_generator.state["state"] state_array = np.empty(shape=[4], dtype=np.uint64) - state_array[self._slice_positions[0]] = _state["state"] & 0xFFFFFFFFFFFFFFFF - state_array[self._slice_positions[1]] = _state["state"] >> 64 - state_array[self._slice_positions[2]] = _state["inc"] & 0xFFFFFFFFFFFFFFFF - state_array[self._slice_positions[3]] = _state["inc"] >> 64 + if self._bit_gen_class == "PCG64": + state_array[self._slice_positions[0]] = _state["state"] & 0xFFFFFFFFFFFFFFFF + state_array[self._slice_positions[1]] = _state["state"] >> 64 + state_array[self._slice_positions[2]] = _state["inc"] & 0xFFFFFFFFFFFFFFFF + state_array[self._slice_positions[3]] = _state["inc"] >> 64 + elif self._bit_gen_class == "SFC64": + for i in range(4): + state_array[self._slice_positions[i]] = _state["state"][i] return state_array def vector_random_standard_normal( diff --git a/activitysim/core/random.py b/activitysim/core/random.py index 5cfce7fda6..9a23228d6b 100644 --- a/activitysim/core/random.py +++ b/activitysim/core/random.py @@ -384,9 +384,9 @@ def __init__(self, channel_type: str = "fast"): self.base_seed = 0 self.global_rng = np.random.RandomState() - if channel_type not in ("fast", "simple"): + if channel_type not in ("fast", "faster", "simple"): raise ValueError( - f"channel_type must be 'fast' or 'simple', got {channel_type!r}" + f"channel_type must be 'fast', 'faster' or 'simple', got {channel_type!r}" ) self.channel_type = channel_type @@ -481,7 +481,7 @@ def add_channel(self, channel_name, domain_df, fast: bool | None = None): """ if fast is None: - fast = self.channel_type == "fast" + fast = self.channel_type in {"fast", "faster"} if channel_name in self.channels: assert channel_name == self.index_to_channel[domain_df.index.name] @@ -499,8 +499,13 @@ def add_channel(self, channel_name, domain_df, fast: bool | None = None): ) channel_class = FastChannel if fast else SimpleChannel + channel_args = {} + if self.channel_type == "faster": + channel_args = {"bit_generator": "SFC64", "entropy_type": "quick"} + if self.channel_type == "fast": + channel_args = {"bit_generator": "PCG64", "entropy_type": "robust"} channel = channel_class( - channel_name, self.base_seed, domain_df, self.step_name + channel_name, self.base_seed, domain_df, self.step_name, **channel_args ) self.channels[channel_name] = channel From 9702ef53f1cb6ca3a09112f277cd0d66d1939296 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sat, 2 May 2026 22:46:25 -0500 Subject: [PATCH 08/21] reseed in tests --- activitysim/core/fast_random/_fast_channel.py | 2 +- activitysim/core/test/test_fast_channel.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py index 076f3f2151..365f2d9fb2 100644 --- a/activitysim/core/fast_random/_fast_channel.py +++ b/activitysim/core/fast_random/_fast_channel.py @@ -39,7 +39,7 @@ def __init__( base_seed: int, domain_df: pd.DataFrame, step_name: str = "", - bit_generator: Literal["PCG64", "SFC64"] = "SFC64", + bit_generator: Literal["PCG64", "SFC64"] = "PCG64", entropy_type: Literal[None, "robust", "quick"] = None, ) -> None: """ diff --git a/activitysim/core/test/test_fast_channel.py b/activitysim/core/test/test_fast_channel.py index 1d45f1b3a0..9ca3e6b3e7 100644 --- a/activitysim/core/test/test_fast_channel.py +++ b/activitysim/core/test/test_fast_channel.py @@ -56,6 +56,8 @@ def test_domain_index_is_copy(self): def test_step_started_when_step_name_given(self): ch = _make_channel(step_name="my_step") assert ch.step_name == "my_step" + assert ch._state_array is None + ch._reseed_step() # ensure state is populated assert ch._state_array is not None assert ch._state_array.shape == (5, 4) @@ -81,6 +83,8 @@ def test_begin_step_populates_state(self): ch = _make_channel() ch.begin_step("step1") assert ch.step_name == "step1" + assert ch._state_array is None # no state before a draw + ch._reseed_step() # ensure state is populated assert ch._state_array is not None assert ch._state_array.shape == (5, 4) assert ch._state_array.dtype == np.uint64 @@ -121,18 +125,22 @@ def test_begin_step_same_name_reproducible(self): """Re-running the same step name must reproduce the same state array.""" ch = _make_channel() ch.begin_step("step1") + ch._reseed_step() # ensure state is populated state1 = ch._state_array.copy() ch.end_step() ch.begin_step("step1") + ch._reseed_step() # ensure state is populated state2 = ch._state_array.copy() npt.assert_array_equal(state1, state2) def test_begin_step_different_names_give_different_states(self): ch = _make_channel() ch.begin_step("step_a") + ch._reseed_step() # ensure state is populated state_a = ch._state_array.copy() ch.end_step() ch.begin_step("step_b") + ch._reseed_step() # ensure state is populated state_b = ch._state_array.copy() assert not np.array_equal(state_a, state_b) From 45415f11e8e1555c050ad224c382090b039e3a05 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sat, 2 May 2026 23:01:13 -0500 Subject: [PATCH 09/21] update test targets --- .../abm/test/test_pipeline/test_pipeline.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/activitysim/abm/test/test_pipeline/test_pipeline.py b/activitysim/abm/test/test_pipeline/test_pipeline.py index c6881f1bd1..6145ce7fe1 100644 --- a/activitysim/abm/test/test_pipeline/test_pipeline.py +++ b/activitysim/abm/test/test_pipeline/test_pipeline.py @@ -122,16 +122,19 @@ def test_rng_access(channel_type): _MINI_AUTO_HH_IDS = [1099626, 1173905, 1196298, 1286259] _MINI_AUTO_EXPECTED = { "simple": [1, 1, 0, 0], - "fast": [0, 0, 1, 0], + "fast": [1, 0, 0, 1], + "faster": [1, 0, 0, 1], } _MINI_MTF_PER_IDS = { "simple": [2566701, 2566702, 3061895], - "fast": [3188482, 3188483, 3188484], + "fast": [3188483, 3188484, 3188485], + "faster": [2458503, 2566698, 2566700], } _MINI_MTF_EXPECTED = { "simple": ["school1", "school1", "work1"], - "fast": ["work1", "work1", "work_and_school"], + "fast": ["work1", "work1", "work1"], + "faster": ["school1", "work1", "school1"], } @@ -375,7 +378,8 @@ def full_run( EXPECT_TOUR_COUNT = { "simple": 121, - "fast": 108, + "fast": 103, + "faster": 102, } @@ -384,17 +388,20 @@ def full_run( # reference runs and are checked here to guard against unintended drift. _EXPECT_PERSON_IDS = { "simple": [325051, 325051, 325051, 325052, 325052, 325052], - "fast": [325051, 325051, 325051, 325052], + "fast": [325051, 325051, 325052], + "faster": [325051, 325052], } _EXPECT_TOUR_TYPES = { "simple": ["othdiscr", "work", "work", "business", "work", "othmaint"], - "fast": ["work", "escort", "eatout", "work"], + "fast": ["work", "work", "work"], + "faster": ["work", "work"], } _EXPECT_MODES = { "simple": ["WALK", "WALK", "SHARED3FREE", "WALK", "WALK_LOC", "WALK"], - "fast": ["WALK", "TNC_SHARED", "WALK", "WALK"], + "fast": ["WALK", "WALK", "WALK_LRF"], + "faster": ["WALK_LOC", "WALK"], } From 58cd433d9e6752773cdff079840462deadac3b1e Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sat, 2 May 2026 23:24:32 -0500 Subject: [PATCH 10/21] fix tests --- .../abm/test/test_pipeline/test_pipeline.py | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/activitysim/abm/test/test_pipeline/test_pipeline.py b/activitysim/abm/test/test_pipeline/test_pipeline.py index 6145ce7fe1..b970d834d7 100644 --- a/activitysim/abm/test/test_pipeline/test_pipeline.py +++ b/activitysim/abm/test/test_pipeline/test_pipeline.py @@ -465,22 +465,26 @@ def regress(state: workflow.State, channel_type: str = "simple"): assert tours_df.shape[0] > 0 assert not tours_df.tour_mode.isnull().any() - # optional logsum column was added to all tours except mandatory - assert "destination_logsum" in tours_df - if ( - tours_df.destination_logsum.isnull() != (tours_df.tour_category == "mandatory") - ).any(): - print( - tours_df[ - ( - tours_df.destination_logsum.isnull() - != (tours_df.tour_category == "mandatory") - ) - ] - ) - assert ( - tours_df.destination_logsum.isnull() == (tours_df.tour_category == "mandatory") - ).all() + if "destination_logsum" in tours_df: + # optional logsum column was added to all tours except mandatory + # since there are now multiple different random generators, there is no + # guarantee that there are any non-mandatory tours (e.g. in the singleton test) + if ( + tours_df.destination_logsum.isnull() + != (tours_df.tour_category == "mandatory") + ).any(): + print( + tours_df[ + ( + tours_df.destination_logsum.isnull() + != (tours_df.tour_category == "mandatory") + ) + ] + ) + assert ( + tours_df.destination_logsum.isnull() + == (tours_df.tour_category == "mandatory") + ).all() # mode choice logsum calculated for all tours assert "mode_choice_logsum" in tours_df From 624bc4aae152fe868d444b8cc7f10047fbad43cd Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sat, 2 May 2026 23:39:14 -0500 Subject: [PATCH 11/21] add a script to demonstrate random runtime performance gains --- activitysim/core/random.py | 4 +- .../scripts/random-performance.ipynb | 727 ++++++++++++++++++ 2 files changed, 729 insertions(+), 2 deletions(-) create mode 100644 other_resources/scripts/random-performance.ipynb diff --git a/activitysim/core/random.py b/activitysim/core/random.py index 9a23228d6b..771d83f7b4 100644 --- a/activitysim/core/random.py +++ b/activitysim/core/random.py @@ -500,9 +500,9 @@ def add_channel(self, channel_name, domain_df, fast: bool | None = None): channel_class = FastChannel if fast else SimpleChannel channel_args = {} - if self.channel_type == "faster": + if fast and self.channel_type == "faster": channel_args = {"bit_generator": "SFC64", "entropy_type": "quick"} - if self.channel_type == "fast": + if fast and self.channel_type == "fast": channel_args = {"bit_generator": "PCG64", "entropy_type": "robust"} channel = channel_class( channel_name, self.base_seed, domain_df, self.step_name, **channel_args diff --git a/other_resources/scripts/random-performance.ipynb b/other_resources/scripts/random-performance.ipynb new file mode 100644 index 0000000000..9108634f2e --- /dev/null +++ b/other_resources/scripts/random-performance.ipynb @@ -0,0 +1,727 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "9b2e3b94", + "metadata": {}, + "outputs": [], + "source": [ + "from activitysim.core.random import Random\n", + "\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d39b9739", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Length of idxs: 249972\n" + ] + } + ], + "source": [ + "prng = np.random.default_rng(seed=12345)\n", + "idxs = pd.Index(np.sort(prng.integers(1_000_000_000, size=250_000)))\n", + "idxs = idxs.unique()\n", + "print(f\"Length of idxs: {idxs.size}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "82aeb86c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dummy
household_id
51761
63381
85231
114551
191271
......
9999801881
9999830151
9999874651
9999915721
9999942001
\n", + "

249972 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " dummy\n", + "household_id \n", + "5176 1\n", + "6338 1\n", + "8523 1\n", + "11455 1\n", + "19127 1\n", + "... ...\n", + "999980188 1\n", + "999983015 1\n", + "999987465 1\n", + "999991572 1\n", + "999994200 1\n", + "\n", + "[249972 rows x 1 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hh = pd.DataFrame(index=idxs, columns=[])\n", + "hh.index.name = \"household_id\"\n", + "hh[\"dummy\"] = 1\n", + "hh" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "99dddb2e", + "metadata": {}, + "outputs": [], + "source": [ + "r = Random(channel_type=\"faster\")\n", + "r.set_base_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "727ccddd", + "metadata": {}, + "outputs": [], + "source": [ + "r.add_channel(\"households\", hh, fast=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8adc2910", + "metadata": {}, + "outputs": [], + "source": [ + "shh = hh.copy()\n", + "shh.index.name = \"slow_hhid\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f6e0addd", + "metadata": {}, + "outputs": [], + "source": [ + "r.add_channel(\"slow_households\", shh, fast=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "970a59cf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.02 ms, sys: 518 μs, total: 1.54 ms\n", + "Wall time: 983 μs\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "r.begin_step(step_name=\"peekaboo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "96dd5a03", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.15779417],\n", + " [0.53942147],\n", + " [0.08538743],\n", + " ...,\n", + " [0.08571587],\n", + " [0.70314741],\n", + " [0.16105867]])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# first call per session to each randomization function on fast channel will trigger \n", + "# numba compilation, which takes an extra second or two\n", + "# compilation is once per ActivitySim session, not once per step, so all subsequent \n", + "# calls to [random_for_df, normal_for_df] will be faster\n", + "\n", + "# the first call to any randomization function on a channel within a step will trigger reseeding, this happens once per step only\n", + "# but it does happen every new step.\n", + "\n", + "r.random_for_df(hh)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b6550f05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7.08 ms ± 35.2 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 10\n", + "\n", + "# subsequent calls to random_for_df on the same channel within the same step should be fast, no numba compilation, no reseeding\n", + "\n", + "r.random_for_df(hh)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "814044b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 422 ms, sys: 5.54 ms, total: 428 ms\n", + "Wall time: 427 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[0.12387598],\n", + " [0.37916747],\n", + " [0.11812594],\n", + " ...,\n", + " [0.22852176],\n", + " [0.08680529],\n", + " [0.97651523]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "# by contrast, the old slow channel will trigger reseeding every time\n", + "\n", + "r.random_for_df(shh)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "62563564", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "474 ms ± 4.91 ms per loop (mean ± std. dev. of 5 runs, 3 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 3 -r 5\n", + "\n", + "# so it is always slow\n", + "\n", + "r.random_for_df(shh)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "26dc0414", + "metadata": {}, + "outputs": [], + "source": [ + "## Normal distribution with parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a2bd313a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[3.80394339],\n", + " [5.29739599],\n", + " [0.44868825],\n", + " ...,\n", + " [1.09076021],\n", + " [1.72714507],\n", + " [3.78585531]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## again, first call is slow (a bit) due to numba compilation\n", + "\n", + "r.normal_for_df(hh, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "9f33b887", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7.57 ms ± 56.7 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 10\n", + "\n", + "# subsequent calls should be fast, no numba compilation, no reseeding\n", + "\n", + "r.normal_for_df(hh, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e26d6c1f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "486 ms ± 13.6 ms per loop (mean ± std. dev. of 5 runs, 3 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 3 -r 5\n", + "\n", + "# old slow channel is slow every time due to reseeding for normals too\n", + "\n", + "r.normal_for_df(shh, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "af87ac24", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2, 3, 5, 4],\n", + " [3, 2, 4, 5, 1],\n", + " [1, 4, 3, 5, 2],\n", + " ...,\n", + " [5, 4, 7, 3, 2],\n", + " [7, 3, 4, 1, 5],\n", + " [2, 4, 5, 6, 3]])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# choice_for_df sets atop the random_for_df state, so it should be fast after the first call to random_for_df\n", + "\n", + "r.choice_for_df(hh, a=[1,2,3,4,5,6,7], size=5, replace=False).reshape(-1, 5)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "eff7492b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3, 5, 4, ..., 3, 1, 5])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# old way still slow\n", + "\n", + "r.choice_for_df(shh, a=[1,2,3,4,5,6,7], size=5, replace=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "fbaf696e", + "metadata": {}, + "outputs": [], + "source": [ + "## Reproducible within the same named step, but different across steps due to reseeding" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bc0d5523", + "metadata": {}, + "outputs": [], + "source": [ + "r.end_step(\"peekaboo\")\n", + "r.begin_step(\"peekaboo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d677ec72", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 286 ms, sys: 6.4 ms, total: 292 ms\n", + "Wall time: 296 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 2.22074798],\n", + " [ 4.0869555 ],\n", + " [-0.40208594],\n", + " ...,\n", + " [ 2.36546569],\n", + " [ 2.33117172],\n", + " [ 2.12748091]])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "# first new random draw after resetting the step, will need to reseed, so takes a moment\n", + "# but if using quick entropy, still faster than the old way\n", + "\n", + "r.normal_for_df(hh, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "765b7822", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 443 ms, sys: 6.14 ms, total: 449 ms\n", + "Wall time: 454 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "array([1.48765918, 1.65064059, 2.49874109, ..., 2.84916606, 3.15987699,\n", + " 3.07499564])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "# old way not appreciably different, as reseeding happens every time we want to make any draw\n", + "\n", + "r.normal_for_df(shh, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f03f761f", + "metadata": {}, + "outputs": [], + "source": [ + "r.end_step(\"peekaboo\")\n", + "r.begin_step(\"peekaboo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "073e840e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 265 ms, sys: 5.16 ms, total: 270 ms\n", + "Wall time: 269 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 2.22074798],\n", + " [ 4.0869555 ],\n", + " [-0.40208594],\n", + " ...,\n", + " [ 2.36546569],\n", + " [ 2.33117172],\n", + " [ 2.12748091]])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "# after resetting the step with the same name, we recover the same random numbers again\n", + "\n", + "r.normal_for_df(hh, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5ec16c2a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 428 ms, sys: 3.67 ms, total: 432 ms\n", + "Wall time: 433 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "array([1.48765918, 1.65064059, 2.49874109, ..., 2.84916606, 3.15987699,\n", + " 3.07499564])" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "r.normal_for_df(shh, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "28998b7c", + "metadata": {}, + "outputs": [], + "source": [ + "# Faster is faster on a small subset also" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "b4281dac", + "metadata": {}, + "outputs": [], + "source": [ + "hh3 = hh.iloc[2:5]\n", + "shh3 = shh.iloc[2:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "3b5aa39c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "49 μs ± 21.9 μs per loop (mean ± std. dev. of 7 runs, 3 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 3\n", + "\n", + "r.normal_for_df(hh3, mu=3.0, sigma=1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "12c07d2e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "504 μs ± 121 μs per loop (mean ± std. dev. of 7 runs, 3 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n 3\n", + "\n", + "r.normal_for_df(shh3, mu=3.0, sigma=1.5)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "consort (3.10.20)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6209ec64d29a4e83500050f04d08ad40efd5420c Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 12:10:09 -0500 Subject: [PATCH 12/21] fix test targets --- activitysim/core/test/test_random.py | 77 ++++++++++++++++++---------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/activitysim/core/test/test_random.py b/activitysim/core/test/test_random.py index 83e2e4dce5..29a7bf5f9d 100644 --- a/activitysim/core/test/test_random.py +++ b/activitysim/core/test/test_random.py @@ -2,6 +2,8 @@ # See full license in LICENSE.txt. from __future__ import annotations +from typing import Literal + import numpy as np import numpy.testing as npt import pandas as pd @@ -33,13 +35,13 @@ def test_basic(): assert "call set_base_seed before the first step" in str(excinfo.value) -@pytest.mark.parametrize("fast", [False, True]) -def test_channel(fast: bool): +@pytest.mark.parametrize("channel_type", ["simple", "fast", "faster"]) +def test_channel(channel_type: Literal["simple", "fast", "faster"]): channels = { "households": "household_id", "persons": "person_id", } - rng = random.Random() + rng = random.Random(channel_type=channel_type) persons = pd.DataFrame( { @@ -59,24 +61,28 @@ def test_channel(fast: bool): rng.begin_step("test_step") - rng.add_channel("persons", persons, fast=fast) - rng.add_channel("households", households, fast=fast) + rng.add_channel("persons", persons) + rng.add_channel("households", households) rands = rng.random_for_df(persons) print("rands", np.asanyarray(rands).flatten()) assert rands.shape == (5, 1) - if fast: - test1_expected_rands = [0.4072658, 0.5591271, 0.0297283, 0.6235138, 0.6921163] + if channel_type == "fast": + test1_expected_rands = [0.162561, 0.0355962, 0.0295559, 0.2415235, 0.755611] + elif channel_type == "faster": + test1_expected_rands = [0.1461754, 0.3404831, 0.2687972, 0.5384345, 0.8583329] else: test1_expected_rands = [0.1733218, 0.1255693, 0.7384256, 0.3485183, 0.9012387] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands) # second call should return something different rands = rng.random_for_df(persons) - if fast: - test1_expected_rands2 = [0.336963, 0.5420581, 0.4396565, 0.9702927, 0.0251327] + if channel_type == "fast": + test1_expected_rands2 = [0.7580942, 0.7115404, 0.2006981, 0.1460596, 0.5890839] + elif channel_type == "faster": + test1_expected_rands2 = [0.023297, 0.3941741, 0.2690284, 0.4423097, 0.9897082] else: test1_expected_rands2 = [0.9105223, 0.5718418, 0.7222742, 0.9062284, 0.3929369] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands2) @@ -86,23 +92,29 @@ def test_channel(fast: bool): rng.begin_step("test_step2") rands = rng.random_for_df(households) - if fast: - expected_rands = [0.1571023, 0.2709219, 0.2515827, 0.9444831, 0.6816792] + if channel_type == "fast": + expected_rands = [0.9603448, 0.2157038, 0.1521092, 0.1159168, 0.525709] + elif channel_type == "faster": + expected_rands = [0.8545137, 0.6644139, 0.0298825, 0.0820411, 0.5859529] else: expected_rands = [0.417278, 0.2994774, 0.8653719, 0.4429748, 0.5101697] npt.assert_almost_equal(np.asanyarray(rands).flatten(), expected_rands) choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) - if fast: - expected_choices = [4, 1, 4, 3, 2, 1, 3, 1, 1, 4] + if channel_type == "fast": + expected_choices = [1, 3, 2, 2, 1, 3, 2, 1, 2, 2] + elif channel_type == "faster": + expected_choices = [2, 4, 1, 1, 3, 3, 3, 2, 3, 2] else: expected_choices = [2, 1, 3, 3, 4, 2, 4, 1, 4, 1] npt.assert_almost_equal(choices, expected_choices) # should be DIFFERENT the second time choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) - if fast: - expected_choices = [1, 4, 2, 1, 2, 3, 1, 2, 2, 4] + if channel_type == "fast": + expected_choices = [4, 2, 1, 3, 1, 3, 3, 1, 2, 3] + elif channel_type == "faster": + expected_choices = [3, 3, 3, 4, 3, 1, 4, 1, 1, 4] else: expected_choices = [3, 1, 4, 3, 3, 2, 2, 1, 4, 2] npt.assert_almost_equal(choices, expected_choices) @@ -113,18 +125,31 @@ def test_channel(fast: bool): rands = rng.random_for_df(households, n=2) - if fast: + if channel_type == "fast": + expected_rands = [ + 0.6475412, + 0.2042014, + 0.996377, + 0.7417811, + 0.3613508, + 0.0233316, + 0.1154262, + 0.5316864, + 0.0239487, + 0.1951297, + ] + elif channel_type == "faster": expected_rands = [ - 0.0728735, - 0.9764697, - 0.6611142, - 0.8802973, - 0.0122184, - 0.8770089, - 0.9944639, - 0.2064867, - 0.6051138, - 0.1666114, + 0.1904426, + 0.5351702, + 0.8730219, + 0.959349, + 0.7729186, + 0.425713, + 0.219114, + 0.9312722, + 0.0218454, + 0.3462854, ] else: expected_rands = [ From 04f68a74235a91c72da3b21fa23a65454b7d224b Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 14:00:12 -0500 Subject: [PATCH 13/21] performance check script --- .../performance-checks/fast-channel-random.py | 351 ++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 other_resources/performance-checks/fast-channel-random.py diff --git a/other_resources/performance-checks/fast-channel-random.py b/other_resources/performance-checks/fast-channel-random.py new file mode 100644 index 0000000000..7ea9c50969 --- /dev/null +++ b/other_resources/performance-checks/fast-channel-random.py @@ -0,0 +1,351 @@ +""" +Random Number Generation Performance Benchmark +=============================================== +Compares the "fast" channel (new implementation) vs. the "slow" channel +(legacy implementation) for various random generation functions in +activitysim.core.random. + +Run this script from the activitysim repo root with: + python other_resources/scripts/random-performance.py +""" + +from __future__ import annotations + +import textwrap +import timeit + +import numpy as np +import pandas as pd + +from activitysim.core.random import Random + +# ── helpers ──────────────────────────────────────────────────────────────────── + +SEP_WIDE = "=" * 72 +SEP_THIN = "-" * 72 +INDENT = " " + + +def section(title): + print() + print(SEP_WIDE) + print(f" {title}") + print(SEP_WIDE) + + +def subsection(title): + print() + print(f"{INDENT}{SEP_THIN[2:]}") + print(f"{INDENT}{title}") + print(f"{INDENT}{SEP_THIN[2:]}") + + +def note(text, indent=2): + prefix = " " * indent + "│ " + wrapped = textwrap.fill( + text, width=68, initial_indent=prefix, subsequent_indent=prefix + ) + print(wrapped) + + +def result(label, elapsed_ms, n=None): + if n is not None: + avg_ms = elapsed_ms / n + print( + f"{INDENT} {label:<40s} {avg_ms:8.3f} ms/call " + f"(total {elapsed_ms:.1f} ms × {n})" + ) + else: + print(f"{INDENT} {label:<40s} {elapsed_ms:8.3f} ms") + + +def speedup(fast_ms, slow_ms, fast_label="fast", slow_label="slow"): + if fast_ms > 0: + ratio = slow_ms / fast_ms + print(f"{INDENT} --> {slow_label} is {ratio:.1f}x slower than {fast_label}") + + +def bench(fn, number=10, repeat=3): + """Return best average time in milliseconds.""" + times = timeit.repeat(fn, number=number, repeat=repeat) + best_total = min(times) # best total over `number` calls + return best_total / number * 1_000 # convert to ms per call + + +# ── setup ────────────────────────────────────────────────────────────────────── + +section("Setup") + +note( + "Building a household index of 250 000 unique random integers drawn " + "from [0, 1 000 000 000). This mimics a realistic, sparse household " + "ID space as seen in large-scale travel-demand models." +) + +prng = np.random.default_rng(seed=12345) +idxs = pd.Index(np.sort(prng.integers(1_000_000_000, size=250_000))).unique() +print(f"\n{INDENT} Household IDs generated: {idxs.size:,}") + +hh = pd.DataFrame(index=idxs, columns=[]) +hh.index.name = "household_id" +hh["dummy"] = 1 + +# fast channel dataframe +shh = hh.copy() +shh.index.name = "slow_hhid" + +# small slices for later +hh3 = hh.iloc[2:5] +shh3 = shh.iloc[2:5] + +note( + "Creating the Random object and registering one FAST channel " + "(new implementation) and one SLOW channel (legacy implementation).", + indent=2, +) + +r = Random(channel_type="faster") +r.set_base_seed(42) +r.add_channel("households", hh, fast=True) +r.add_channel("slow_households", shh, fast=False) + +print(f"\n{INDENT} Random object created.") +print(f"{INDENT} Base seed : 42") +print(f"{INDENT} Fast channel index name : '{hh.index.name}'") +print(f"{INDENT} Slow channel index name : '{shh.index.name}'") + +# ── benchmark: first-call costs ──────────────────────────────────────────────── + +section("First-call costs (Numba JIT compilation + reseeding)") + +note( + "The very first call to random_for_df on a fast channel within an " + "ActivitySim session triggers Numba ahead-of-time compilation. " + "This one-time cost disappears for all subsequent calls in the same " + "session. The slow channel does NOT use Numba." +) + +r.begin_step("peekaboo") + +t0 = timeit.default_timer() +r.random_for_df(hh) +first_fast_ms = (timeit.default_timer() - t0) * 1_000 + +t0 = timeit.default_timer() +r.random_for_df(shh) +first_slow_ms = (timeit.default_timer() - t0) * 1_000 + +print() +result("Fast channel – first call (w/ JIT + reseed)", first_fast_ms) +result("Slow channel – first call (w/ reseed) ", first_slow_ms) +note( + "The fast channel first-call cost includes Numba compilation. " + "This cost is paid only once per process, not once per step." +) + +# ── benchmark: random_for_df (subsequent calls) ──────────────────────────────── + +section("random_for_df — subsequent calls within the same step") + +note( + "After the first call, the fast channel has already compiled its Numba " + "kernels and cached the reseeded state. Subsequent calls within the " + "same step are pure number generation — very fast. " + "The slow channel must reseed from scratch on every single call, " + "making it consistently slow regardless of call order." +) + +fast_rand_ms = bench(lambda: r.random_for_df(hh), number=10, repeat=3) +slow_rand_ms = bench(lambda: r.random_for_df(shh), number=3, repeat=5) + +print() +result("Fast channel random_for_df (n=250k)", fast_rand_ms) +result("Slow channel random_for_df (n=250k)", slow_rand_ms) +speedup(fast_rand_ms, slow_rand_ms) + +# ── benchmark: normal_for_df ─────────────────────────────────────────────────── + +section("normal_for_df — uniform-to-normal transform (μ=3.0, σ=1.5)") + +note( + "Generating normally-distributed draws adds a Box–Muller / inverse-CDF " + "transform on top of the uniform draws. The fast channel benefits from " + "the same Numba-compiled, pre-seeded state as before. " + "The slow channel pays the full reseeding cost every call." +) + +# Trigger any remaining first-call compilation for normal_for_df +r.normal_for_df(hh, mu=3.0, sigma=1.5) + +fast_norm_ms = bench( + lambda: r.normal_for_df(hh, mu=3.0, sigma=1.5), number=10, repeat=3 +) +slow_norm_ms = bench( + lambda: r.normal_for_df(shh, mu=3.0, sigma=1.5), number=3, repeat=5 +) + +print() +result("Fast channel normal_for_df (n=250k)", fast_norm_ms) +result("Slow channel normal_for_df (n=250k)", slow_norm_ms) +speedup(fast_norm_ms, slow_norm_ms) + +# ── benchmark: choice_for_df ─────────────────────────────────────────────────── + +section("choice_for_df — sampling without replacement (size=5 from [1..7])") + +note( + "choice_for_df is built atop random_for_df, so by this point the fast " + "channel's seed state is already warm. Drawing 5 choices per row " + "without replacement means each row independently samples from [1,2,3,4,5,6,7]. " + "The slow channel is again penalised by per-call reseeding." +) + +fast_choice_ms = bench( + lambda: r.choice_for_df(hh, a=[1, 2, 3, 4, 5, 6, 7], size=5, replace=False), + number=5, + repeat=3, +) +slow_choice_ms = bench( + lambda: r.choice_for_df(shh, a=[1, 2, 3, 4, 5, 6, 7], size=5, replace=False), + number=3, + repeat=3, +) + +print() +result("Fast channel choice_for_df (n=250k, size=5)", fast_choice_ms) +result("Slow channel choice_for_df (n=250k, size=5)", slow_choice_ms) +speedup(fast_choice_ms, slow_choice_ms) + +r.end_step("peekaboo") + +# ── benchmark: cross-step reproducibility & reseed cost ─────────────────────── + +section("Cross-step reproducibility: reseed cost at start of a new step") + +note( + "When a new step begins (or the same step name is restarted), the random " + "state must be reseeded. For the fast channel this happens exactly once " + "— on the first draw after begin_step(). For the slow channel the cost " + "is paid on every single draw, every step." +) + +r.begin_step("peekaboo") + +t0 = timeit.default_timer() +r.normal_for_df(hh, mu=3.0, sigma=1.5) +fast_reseed_ms = (timeit.default_timer() - t0) * 1_000 + +t0 = timeit.default_timer() +r.normal_for_df(shh, mu=3.0, sigma=1.5) +slow_reseed_ms = (timeit.default_timer() - t0) * 1_000 + +print() +result("Fast channel – first draw after new step (reseed + generate)", fast_reseed_ms) +result("Slow channel – first draw after new step (reseed + generate)", slow_reseed_ms) +note( + "The slow channel's cost here is representative of EVERY subsequent " + "call too, since it reseeds every time. For the fast channel this " + "cost is only paid once per step." +) + +r.end_step("peekaboo") + +# ── benchmark: same step name ⇒ same numbers ────────────────────────────────── + +section("Reproducibility: re-running the same step name") + +note( + "Restarting a step with the same name must yield identical random draws. " + "Here we verify this and measure the cost of the reseed-on-first-draw " + "pattern again." +) + +r.begin_step("peekaboo") + +arr1_fast = r.normal_for_df(hh, mu=3.0, sigma=1.5) +arr1_slow = r.normal_for_df(shh, mu=3.0, sigma=1.5) + +r.end_step("peekaboo") +r.begin_step("peekaboo") + +arr2_fast = r.normal_for_df(hh, mu=3.0, sigma=1.5) +arr2_slow = r.normal_for_df(shh, mu=3.0, sigma=1.5) + +fast_repro = np.allclose(arr1_fast, arr2_fast) +slow_repro = np.allclose(arr1_slow, arr2_slow) + +print() +print(f"{INDENT} Fast channel reproducible across step restarts: {fast_repro}") +print(f"{INDENT} Slow channel reproducible across step restarts: {slow_repro}") +note( + "Both channels produce identical draws when the same step name is reused, " + "guaranteeing reproducibility across model runs." +) + +r.end_step("peekaboo") + +# ── benchmark: small subsets ─────────────────────────────────────────────────── + +section("Performance on small subsets (3 rows)") + +note( + "Even when only a handful of rows need random numbers, the slow channel " + "must still reseed the entire household table before slicing. " + "The fast channel uses the pre-computed entropy to seed only the rows " + "requested, making it proportionally cheaper on small subsets too." +) + +r.begin_step("peekaboo") +# warm up fast channel reseed for this step +r.normal_for_df(hh, mu=3.0, sigma=1.5) + +fast_small_ms = bench( + lambda: r.normal_for_df(hh3, mu=3.0, sigma=1.5), number=3, repeat=5 +) +slow_small_ms = bench( + lambda: r.normal_for_df(shh3, mu=3.0, sigma=1.5), number=3, repeat=5 +) + +print() +result("Fast channel normal_for_df (n=3)", fast_small_ms) +result("Slow channel normal_for_df (n=3)", slow_small_ms) +speedup(fast_small_ms, slow_small_ms, fast_label="fast (warm)", slow_label="slow") +note( + "Even for a 3-row draw the fast channel wins because the seeding work " + "was already done at the first draw of this step." +) + +r.end_step("peekaboo") + +# ── summary ──────────────────────────────────────────────────────────────────── + +section("Summary") + +rows = [ + ("random_for_df 250k rows", fast_rand_ms, slow_rand_ms), + ("normal_for_df 250k rows", fast_norm_ms, slow_norm_ms), + ("choice_for_df 250k rows x5", fast_choice_ms, slow_choice_ms), + ("normal_for_df 3 rows (warm)", fast_small_ms, slow_small_ms), +] + +col_w = 38 +print() +print( + f"{INDENT} {'Operation':<{col_w}} {'Fast (ms)':>10} {'Slow (ms)':>10} {'Speedup':>8}" +) +print(f"{INDENT} {'-'*col_w} {'-'*10} {'-'*10} {'-'*8}") +for label, f, s in rows: + ratio = s / f if f > 0 else float("inf") + print(f"{INDENT} {label:<{col_w}} {f:10.3f} {s:10.3f} {ratio:7.1f}x") + +print() +note( + "The fast channel should be faster in every scenario. The gap should be largest " + "on repeated within-step calls because the slow channel pays the full " + "reseeding cost each time, while the fast channel pays it only once per " + "step. Both channels are fully reproducible when the same step name " + "is reused." +) +print() +print(SEP_WIDE) +print() From 1a2aa76cd303e9054cb35566b57eead2ab867a7c Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 14:08:31 -0500 Subject: [PATCH 14/21] action to run performance checks --- .github/workflows/performance-checks.yml | 113 +++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 .github/workflows/performance-checks.yml diff --git a/.github/workflows/performance-checks.yml b/.github/workflows/performance-checks.yml new file mode 100644 index 0000000000..e216fa85cb --- /dev/null +++ b/.github/workflows/performance-checks.yml @@ -0,0 +1,113 @@ +name: Performance Checks + +# Runs performance benchmark scripts from other_resources/performance-checks/ +# manually on demand. Use the `scripts` input to choose which scripts to run. + +on: + workflow_dispatch: + inputs: + scripts: + description: >- + Comma-separated list of script filenames to run from + other_resources/performance-checks/ (e.g. + "fast-channel-random.py" or "fast-channel-random.py,other-script.py"). + Use "all" to run every *.py script found in that directory. + required: false + default: "all" + type: string + python-version: + description: >- + Python version to use (must be available via setup-python and + compatible with the uv.lock in this repo). + required: false + default: "3.11" + type: string + +jobs: + discover: + name: Discover scripts + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.build-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + + - name: Build script matrix + id: build-matrix + shell: python + run: | + import json, os, pathlib, sys + + scripts_dir = pathlib.Path("other_resources/performance-checks") + requested = "${{ inputs.scripts }}".strip() + + all_scripts = sorted(p.name for p in scripts_dir.glob("*.py")) + + if not all_scripts: + print("::error::No *.py scripts found in other_resources/performance-checks/") + sys.exit(1) + + if requested.lower() == "all": + selected = all_scripts + else: + candidates = [s.strip() for s in requested.split(",") if s.strip()] + missing = [c for c in candidates if c not in all_scripts] + if missing: + print(f"::error::The following requested scripts were not found: {missing}") + print(f"::error::Available scripts: {all_scripts}") + sys.exit(1) + selected = candidates + + matrix = {"script": selected} + print(f"Selected scripts: {selected}") + with open(os.environ["GITHUB_OUTPUT"], "a") as fh: + fh.write(f"matrix={json.dumps(matrix)}\n") + + run-script: + name: "Run ${{ matrix.script }} (${{ matrix.os }})" + needs: discover + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + script: ${{ fromJson(needs.discover.outputs.matrix).script }} + + defaults: + run: + # bash is available on all GitHub-hosted runners (Git Bash on Windows) + # and gives us consistent behaviour for tee, path separators, etc. + shell: bash + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + version: "0.7.12" + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Install activitysim + run: | + uv sync --locked + + - name: Run ${{ matrix.script }} + run: | + uv run python other_resources/performance-checks/${{ matrix.script }} \ + 2>&1 | tee perf-output-${{ matrix.script }}.txt + + - name: Upload output + if: always() + uses: actions/upload-artifact@v4 + with: + # artifact names must be unique across the matrix, so include the OS + name: perf-output-${{ matrix.os }}-${{ matrix.script }} + path: perf-output-${{ matrix.script }}.txt + retention-days: 90 From b32a56953b9671e2c724a45fe1b9fee9b9394e0e Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 14:11:51 -0500 Subject: [PATCH 15/21] temporary push trigger --- .github/workflows/performance-checks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/performance-checks.yml b/.github/workflows/performance-checks.yml index e216fa85cb..b4a9d0ac98 100644 --- a/.github/workflows/performance-checks.yml +++ b/.github/workflows/performance-checks.yml @@ -4,6 +4,8 @@ name: Performance Checks # manually on demand. Use the `scripts` input to choose which scripts to run. on: + push: + branches: ["main", "reroll"] workflow_dispatch: inputs: scripts: From fe03a23b1b0eadc7f3f9650642f8f4b195a06c5a Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 14:15:23 -0500 Subject: [PATCH 16/21] updates for push --- .github/workflows/performance-checks.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/performance-checks.yml b/.github/workflows/performance-checks.yml index b4a9d0ac98..6d9dcf3a26 100644 --- a/.github/workflows/performance-checks.yml +++ b/.github/workflows/performance-checks.yml @@ -32,7 +32,7 @@ jobs: outputs: matrix: ${{ steps.build-matrix.outputs.matrix }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build script matrix id: build-matrix @@ -49,7 +49,9 @@ jobs: print("::error::No *.py scripts found in other_resources/performance-checks/") sys.exit(1) - if requested.lower() == "all": + # Empty string occurs on push/schedule triggers where inputs are not set; + # treat it the same as the explicit "all" keyword. + if not requested or requested.lower() == "all": selected = all_scripts else: candidates = [s.strip() for s in requested.split(",") if s.strip()] @@ -91,10 +93,10 @@ jobs: enable-cache: true cache-dependency-glob: "uv.lock" - - name: Set up Python ${{ inputs.python-version }} + - name: Set up Python ${{ inputs.python-version || '3.11' }} uses: actions/setup-python@v5 with: - python-version: ${{ inputs.python-version }} + python-version: ${{ inputs.python-version || '3.11' }} - name: Install activitysim run: | From 6e5ee3551eefb2fa06c01c861edb69d8d0a170f7 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 14:30:19 -0500 Subject: [PATCH 17/21] make it run on windows --- .github/workflows/performance-checks.yml | 17 ++++++---- .../performance-checks/fast-channel-random.py | 32 +++++++++++++++++++ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/.github/workflows/performance-checks.yml b/.github/workflows/performance-checks.yml index 6d9dcf3a26..c6d6f1be0f 100644 --- a/.github/workflows/performance-checks.yml +++ b/.github/workflows/performance-checks.yml @@ -4,7 +4,7 @@ name: Performance Checks # manually on demand. Use the `scripts` input to choose which scripts to run. on: - push: + push: # TODO: consider running on a schedule (e.g. weekly) but not on every push branches: ["main", "reroll"] workflow_dispatch: inputs: @@ -77,6 +77,11 @@ jobs: os: [ubuntu-latest, windows-latest] script: ${{ fromJson(needs.discover.outputs.matrix).script }} + # Force UTF-8 I/O on Windows (default console encoding is cp1252 which + # cannot represent box-drawing characters used in benchmark output). + env: + PYTHONUTF8: "1" + defaults: run: # bash is available on all GitHub-hosted runners (Git Bash on Windows) @@ -84,17 +89,16 @@ jobs: shell: bash steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: - version: "0.7.12" enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python ${{ inputs.python-version || '3.11' }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ inputs.python-version || '3.11' }} @@ -104,12 +108,13 @@ jobs: - name: Run ${{ matrix.script }} run: | + set -o pipefail uv run python other_resources/performance-checks/${{ matrix.script }} \ 2>&1 | tee perf-output-${{ matrix.script }}.txt - name: Upload output if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: # artifact names must be unique across the matrix, so include the OS name: perf-output-${{ matrix.os }}-${{ matrix.script }} diff --git a/other_resources/performance-checks/fast-channel-random.py b/other_resources/performance-checks/fast-channel-random.py index 7ea9c50969..a7fbb9ac58 100644 --- a/other_resources/performance-checks/fast-channel-random.py +++ b/other_resources/performance-checks/fast-channel-random.py @@ -11,6 +11,7 @@ from __future__ import annotations +import sys import textwrap import timeit @@ -19,6 +20,16 @@ from activitysim.core.random import Random +# Windows consoles default to a locale-specific encoding (e.g. cp1252) that +# cannot represent the box-drawing characters used below. Reconfiguring stdout +# and stderr to UTF-8 here means the script works correctly regardless of the +# active code page, without requiring the caller to set PYTHONUTF8=1. +if hasattr(sys.stdout, "reconfigure"): + sys.stdout.reconfigure(encoding="utf-8") +if hasattr(sys.stderr, "reconfigure"): + sys.stderr.reconfigure(encoding="utf-8") + + # ── helpers ──────────────────────────────────────────────────────────────────── SEP_WIDE = "=" * 72 @@ -346,6 +357,27 @@ def bench(fn, number=10, repeat=3): "step. Both channels are fully reproducible when the same step name " "is reused." ) + +# ── sanity check ─────────────────────────────────────────────────────────────── + +failures = [label for label, f, s in rows if s <= f] +if failures: + print() + print(f"{INDENT} !! PERFORMANCE REGRESSION DETECTED !!") + for label in failures: + print( + f"{INDENT} · '{label}': slow channel was NOT slower than fast channel" + ) + note( + "A 'slow' result that is faster than (or equal to) the corresponding " + "'fast' result indicates a regression in the fast-channel implementation " + "or an anomaly in the benchmark environment. Investigate before merging." + ) + print() + print(SEP_WIDE) + print() + sys.exit(1) + print() print(SEP_WIDE) print() From d32167109a20f373c52778c8c23a80c4a2949d26 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 14:40:47 -0500 Subject: [PATCH 18/21] add os to artifact filename --- .github/workflows/performance-checks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance-checks.yml b/.github/workflows/performance-checks.yml index c6d6f1be0f..364c119d45 100644 --- a/.github/workflows/performance-checks.yml +++ b/.github/workflows/performance-checks.yml @@ -110,7 +110,7 @@ jobs: run: | set -o pipefail uv run python other_resources/performance-checks/${{ matrix.script }} \ - 2>&1 | tee perf-output-${{ matrix.script }}.txt + 2>&1 | tee perf-output-${{ matrix.os }}-${{ matrix.script }}.txt - name: Upload output if: always() @@ -118,5 +118,5 @@ jobs: with: # artifact names must be unique across the matrix, so include the OS name: perf-output-${{ matrix.os }}-${{ matrix.script }} - path: perf-output-${{ matrix.script }}.txt + path: perf-output-${{ matrix.os }}-${{ matrix.script }}.txt retention-days: 90 From 34bee8215c1b53f6d8682d02788113c3d0ec812f Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 16:43:15 -0500 Subject: [PATCH 19/21] use fold-in style reseeding --- activitysim/core/fast_random/_entropy.py | 115 ++++++++++++++++++ activitysim/core/fast_random/_fast_channel.py | 58 ++++----- 2 files changed, 137 insertions(+), 36 deletions(-) create mode 100644 activitysim/core/fast_random/_entropy.py diff --git a/activitysim/core/fast_random/_entropy.py b/activitysim/core/fast_random/_entropy.py new file mode 100644 index 0000000000..ba03052035 --- /dev/null +++ b/activitysim/core/fast_random/_entropy.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from typing import Literal + +import numba as nb +import numpy as np +import pandas as pd + +from ._fast_random import FastGenerator + + +@nb.njit +def _fold_in_256(key_arr: np.ndarray, data: np.uint64) -> np.ndarray: + """ + Numba kernel: mixes a uint64 `data` value into each of the four 64-bit + words of `key_arr` using a MurmurHash3-style finalizer, then applies a + four-step XOR diffusion pass so that every input bit influences all four + output words. + """ + GOLDEN = np.uint64(0x9E3779B97F4A7C15) + C1 = np.uint64(0xFF51AFD7ED558CCD) + C2 = np.uint64(0xC4CEB9FE1A85EC53) + SHIFT = np.uint64(33) + + result = np.empty(4, dtype=np.uint64) + for i in range(4): + # Salt each word position uniquely before mixing + salt = data + np.uint64(i) * GOLDEN + h = key_arr[i] ^ salt + # MurmurHash3 64-bit finalizer + h ^= h >> SHIFT + h *= C1 + h ^= h >> SHIFT + h *= C2 + h ^= h >> SHIFT + result[i] = h + + # Cross-word diffusion: ensure all 256 input bits affect all 256 output bits. + # Four XOR steps form a simple diffusion network at negligible cost. + result[1] ^= result[0] + result[2] ^= result[1] + result[3] ^= result[2] + result[0] ^= result[3] + + return result + + +@nb.njit +def _fold_in_256_batch(key_arr: np.ndarray, data_arr: np.ndarray) -> np.ndarray: + """ + Batch seeding. + + Generates `len(data_arr)` independent 256-bit child states from a single + parent `key_arr` state in one sweep. + + Parameters + ---------- + key_arr : np.ndarray + A 4-element uint64 array representing the parent 256-bit key. + data_arr : np.ndarray + A 1D array of uint64 values to fold into the key. Each value + will produce one child key. + + Returns + ------- + results : np.ndarray + A (N, 4) uint64 array representing the child keys. + """ + n = data_arr.shape[0] + results = np.empty((n, 4), dtype=np.uint64) + for j in range(n): + results[j] = _fold_in_256(key_arr, data_arr[j]) + return results + + +def _fast_entropy_raw(base_seeds: int | list[int], index_keys: pd.Index) -> np.ndarray: + + if isinstance(base_seeds, int): + base_seeds = [base_seeds] + base_seed_sequence = np.random.SeedSequence(base_seeds) + base_state = base_seed_sequence.generate_state(4, dtype=np.uint64) + return _fold_in_256_batch(base_state, np.asarray(index_keys).astype(np.uint64)) + + +_FG_PCG64 = FastGenerator(42, "PCG64") +_FG_SFC64 = FastGenerator(42, "SFC64") + + +def fast_entropy_PCG64(base_seeds: int | list[int], index_keys: pd.Index) -> np.ndarray: + generated_states = _fast_entropy_raw(base_seeds, index_keys) + generated_states[:, -1] |= 1 # Ensure the last word is odd for PCG64 + # make a couple draws to properly mix the state and avoid any initial correlation with the input keys + _FG_PCG64.vector_random_standard_uniform(generated_states, shape=2) + return generated_states + + +def fast_entropy_SFC64(base_seeds: int | list[int], index_keys: pd.Index) -> np.ndarray: + generated_states = _fast_entropy_raw(base_seeds, index_keys) + generated_states[:, -1] = 1 # the last word is a counter, start it at 1 + # SFC initialization typically makes a dozen draws to properly mix the state + _FG_SFC64.vector_random_standard_uniform(generated_states, shape=12) + return generated_states + + +def fast_entropy( + base_seeds: int | list[int], + index_keys: pd.Index, + bit_gen: Literal["PCG64", "SFC64"], +) -> np.ndarray: + if bit_gen == "PCG64": + return fast_entropy_PCG64(base_seeds, index_keys) + elif bit_gen == "SFC64": + return fast_entropy_SFC64(base_seeds, index_keys) + else: + raise ValueError("Unsupported bit gen %s" % bit_gen) diff --git a/activitysim/core/fast_random/_fast_channel.py b/activitysim/core/fast_random/_fast_channel.py index 365f2d9fb2..5f339c6285 100644 --- a/activitysim/core/fast_random/_fast_channel.py +++ b/activitysim/core/fast_random/_fast_channel.py @@ -7,6 +7,7 @@ import pandas as pd from cffi import FFI +from ._entropy import fast_entropy from ._fast_random import FastGenerator, quick_entropy # one more than 0xFFFFFFFF so we can wrap using: int64 % _MAX_SEED @@ -40,7 +41,7 @@ def __init__( domain_df: pd.DataFrame, step_name: str = "", bit_generator: Literal["PCG64", "SFC64"] = "PCG64", - entropy_type: Literal[None, "robust", "quick"] = None, + entropy_type: Literal["robust", "quick"] = "quick", ) -> None: """ Create a new FastChannel for vectorised PCG64-based random number generation. @@ -73,9 +74,8 @@ def __init__( Which bit generator to use for the per-row streams. Defaults to SFC64, which supports using quick-hash random entropy for maximum speed at runtime. - entropy_type : {None, "robust", "quick"}, default: None - The type of entropy used to reseed the bit generators. If ``None``, - the entropy will be selected automatically based on ``bit_generator``. + entropy_type : {"robust", "quick"}, default: "quick" + The type of entropy used to reseed the bit generators. Robust entropy uses the numpy SeedSequence tools to create entropy with strong statistical properties, but is slower to generate. Quick entropy uses a custom hash-based method that is much faster to generate @@ -109,25 +109,20 @@ def __init__( if step_name: self.begin_step(step_name) - def _init_states(self, *seeds): - if self._entropy_type == "robust": - return self._fast_generator.get_state_array( - np.random.SeedSequence(list(seeds)) + def _batch_init_states(self, base_seeds, index_seeds): + if self._entropy_type == "quick": + return fast_entropy( + base_seeds, index_seeds, self._fast_generator._bit_gen_class ) - elif self._entropy_type == "quick": - if self._fast_generator._bit_gen_class == "PCG64": - raise ValueError( - "PCG64 random number generator does not support quick entropy" - ) - elif self._fast_generator._bit_gen_class == "SFC64": - q = quick_entropy(seeds).copy() - q[-1] = 1 - # self._fast_generator.vector_random_standard_uniform(q.reshape(1, -1), shape=12) - return q - else: - raise ValueError( - f"unsupported bit generator class: {self._fast_generator._bit_gen_class}" + elif self._entropy_type == "robust": + new_state = np.empty(shape=[len(index_seeds), 4], dtype=np.uint64) + for n, i in enumerate(index_seeds): + new_state[n, :] = self._fast_generator.get_state_array( + np.random.SeedSequence([*base_seeds, i]) ) + return new_state + else: + raise ValueError("entropy_type must be 'robust' or 'quick'") def extend_domain(self, domain_df: pd.DataFrame) -> None: """ @@ -167,13 +162,9 @@ def extend_domain(self, domain_df: pd.DataFrame) -> None: if self._state_array is not None: # we already have state for some rows, so we also need to # generate state for the new rows and append to existing state array - new_state = np.empty(shape=[len(new_index), 4], dtype=np.uint64) - for n, i in enumerate(new_index): - new_state[n, :] = self._init_states( - self.base_seed, self.channel_seed, self.step_seed, i - ) - # if we are using quick entropy, make a few draws to properly mix - self._fast_generator.vector_random_standard_uniform(new_state, shape=12) + new_state = self._batch_init_states( + [self.base_seed, self.channel_seed, self.step_seed], new_index + ) self._state_array = np.concatenate([self._state_array, new_state], axis=0) if len(self.domain_index) == 0: @@ -207,14 +198,9 @@ def _reseed_step(self, force: bool = False) -> None: if self._state_array is None or force: # Seed the bit generators, extracting state along the way - state_array = np.empty(shape=[len(self.domain_index), 4], dtype=np.uint64) - for n, i in enumerate(self.domain_index): - state_array[n, :] = self._init_states( - self.base_seed, self.channel_seed, self.step_seed, i - ) - # if we are using quick entropy, make a few draws to properly mix - self._fast_generator.vector_random_standard_uniform(state_array, shape=12) - self._state_array = state_array + self._state_array = self._batch_init_states( + [self.base_seed, self.channel_seed, self.step_seed], self.domain_index + ) def begin_step(self, step_name: str) -> None: """ From 152896dd388d15116d4a73da345f9db89a234050 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 16:59:15 -0500 Subject: [PATCH 20/21] update test targets for faster seeds --- .../abm/test/test_pipeline/test_pipeline.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/activitysim/abm/test/test_pipeline/test_pipeline.py b/activitysim/abm/test/test_pipeline/test_pipeline.py index b970d834d7..0931ca7cd3 100644 --- a/activitysim/abm/test/test_pipeline/test_pipeline.py +++ b/activitysim/abm/test/test_pipeline/test_pipeline.py @@ -122,19 +122,19 @@ def test_rng_access(channel_type): _MINI_AUTO_HH_IDS = [1099626, 1173905, 1196298, 1286259] _MINI_AUTO_EXPECTED = { "simple": [1, 1, 0, 0], - "fast": [1, 0, 0, 1], - "faster": [1, 0, 0, 1], + "fast": [0, 0, 1, 0], + "faster": [2, 0, 0, 1], } _MINI_MTF_PER_IDS = { "simple": [2566701, 2566702, 3061895], - "fast": [3188483, 3188484, 3188485], - "faster": [2458503, 2566698, 2566700], + "fast": [3188482, 3188483, 3188484], + "faster": [2566701, 2566702, 3188482], } _MINI_MTF_EXPECTED = { "simple": ["school1", "school1", "work1"], - "fast": ["work1", "work1", "work1"], - "faster": ["school1", "work1", "school1"], + "fast": ["work1", "work1", "work_and_school"], + "faster": ["school1", "school1", "work1"], } @@ -378,7 +378,7 @@ def full_run( EXPECT_TOUR_COUNT = { "simple": 121, - "fast": 103, + "fast": 108, "faster": 102, } @@ -388,20 +388,20 @@ def full_run( # reference runs and are checked here to guard against unintended drift. _EXPECT_PERSON_IDS = { "simple": [325051, 325051, 325051, 325052, 325052, 325052], - "fast": [325051, 325051, 325052], + "fast": [325051, 325051, 325051, 325052], "faster": [325051, 325052], } _EXPECT_TOUR_TYPES = { "simple": ["othdiscr", "work", "work", "business", "work", "othmaint"], - "fast": ["work", "work", "work"], - "faster": ["work", "work"], + "fast": ["work", "escort", "eatout", "work"], + "faster": ["shopping", "work"], } _EXPECT_MODES = { "simple": ["WALK", "WALK", "SHARED3FREE", "WALK", "WALK_LOC", "WALK"], - "fast": ["WALK", "WALK", "WALK_LRF"], - "faster": ["WALK_LOC", "WALK"], + "fast": ["WALK", "TNC_SHARED", "WALK", "WALK"], + "faster": ["WALK", "WALK_LOC"], } @@ -430,7 +430,7 @@ def regress_tour_modes(tours_df, channel_type: str = "simple"): tour_mode person_id tour_type tour_num tour_category tour_id 13327130 WALK 325051 work 1 mandatory - 13327100 TNC_SHARED 325051 escort 1 non_mandatory + 13327100 TNC_SHARED 325051 escort 1 non_mandatory 13327097 WALK 325051 eatout 2 non_mandatory 13327171 WALK 325052 work 1 mandatory """ From 4a71eb56f00afcbb68d39e96c5e376e5b0659c45 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Sun, 3 May 2026 17:04:47 -0500 Subject: [PATCH 21/21] update test targets --- activitysim/core/test/test_random.py | 60 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/activitysim/core/test/test_random.py b/activitysim/core/test/test_random.py index 29a7bf5f9d..b442087b58 100644 --- a/activitysim/core/test/test_random.py +++ b/activitysim/core/test/test_random.py @@ -70,9 +70,9 @@ def test_channel(channel_type: Literal["simple", "fast", "faster"]): assert rands.shape == (5, 1) if channel_type == "fast": - test1_expected_rands = [0.162561, 0.0355962, 0.0295559, 0.2415235, 0.755611] + test1_expected_rands = [0.4072658, 0.5591271, 0.0297283, 0.6235138, 0.6921163] elif channel_type == "faster": - test1_expected_rands = [0.1461754, 0.3404831, 0.2687972, 0.5384345, 0.8583329] + test1_expected_rands = [0.4580108, 0.531716, 0.6470319, 0.6762532, 0.7392374] else: test1_expected_rands = [0.1733218, 0.1255693, 0.7384256, 0.3485183, 0.9012387] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands) @@ -80,9 +80,9 @@ def test_channel(channel_type: Literal["simple", "fast", "faster"]): # second call should return something different rands = rng.random_for_df(persons) if channel_type == "fast": - test1_expected_rands2 = [0.7580942, 0.7115404, 0.2006981, 0.1460596, 0.5890839] + test1_expected_rands2 = [0.336963, 0.5420581, 0.4396565, 0.9702927, 0.0251327] elif channel_type == "faster": - test1_expected_rands2 = [0.023297, 0.3941741, 0.2690284, 0.4423097, 0.9897082] + test1_expected_rands2 = [0.1690983, 0.933964, 0.3887059, 0.7922818, 0.4179632] else: test1_expected_rands2 = [0.9105223, 0.5718418, 0.7222742, 0.9062284, 0.3929369] npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands2) @@ -93,18 +93,18 @@ def test_channel(channel_type: Literal["simple", "fast", "faster"]): rands = rng.random_for_df(households) if channel_type == "fast": - expected_rands = [0.9603448, 0.2157038, 0.1521092, 0.1159168, 0.525709] + expected_rands = [0.1571023, 0.2709219, 0.2515827, 0.9444831, 0.6816792] elif channel_type == "faster": - expected_rands = [0.8545137, 0.6644139, 0.0298825, 0.0820411, 0.5859529] + expected_rands = [0.1934219, 0.3369451, 0.8455883, 0.6440651, 0.3889942] else: expected_rands = [0.417278, 0.2994774, 0.8653719, 0.4429748, 0.5101697] npt.assert_almost_equal(np.asanyarray(rands).flatten(), expected_rands) choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) if channel_type == "fast": - expected_choices = [1, 3, 2, 2, 1, 3, 2, 1, 2, 2] + expected_choices = [4, 1, 4, 3, 2, 1, 3, 1, 1, 4] elif channel_type == "faster": - expected_choices = [2, 4, 1, 1, 3, 3, 3, 2, 3, 2] + expected_choices = [3, 4, 4, 3, 4, 2, 4, 1, 2, 3] else: expected_choices = [2, 1, 3, 3, 4, 2, 4, 1, 4, 1] npt.assert_almost_equal(choices, expected_choices) @@ -112,9 +112,9 @@ def test_channel(channel_type: Literal["simple", "fast", "faster"]): # should be DIFFERENT the second time choices = rng.choice_for_df(households, [1, 2, 3, 4], 2, replace=True) if channel_type == "fast": - expected_choices = [4, 2, 1, 3, 1, 3, 3, 1, 2, 3] + expected_choices = [1, 4, 2, 1, 2, 3, 1, 2, 2, 4] elif channel_type == "faster": - expected_choices = [3, 3, 3, 4, 3, 1, 4, 1, 1, 4] + expected_choices = [4, 1, 3, 3, 4, 1, 4, 2, 3, 2] else: expected_choices = [3, 1, 4, 3, 3, 2, 2, 1, 4, 2] npt.assert_almost_equal(choices, expected_choices) @@ -127,29 +127,29 @@ def test_channel(channel_type: Literal["simple", "fast", "faster"]): if channel_type == "fast": expected_rands = [ - 0.6475412, - 0.2042014, - 0.996377, - 0.7417811, - 0.3613508, - 0.0233316, - 0.1154262, - 0.5316864, - 0.0239487, - 0.1951297, + 0.0728735, + 0.9764697, + 0.6611142, + 0.8802973, + 0.0122184, + 0.8770089, + 0.9944639, + 0.2064867, + 0.6051138, + 0.1666114, ] elif channel_type == "faster": expected_rands = [ - 0.1904426, - 0.5351702, - 0.8730219, - 0.959349, - 0.7729186, - 0.425713, - 0.219114, - 0.9312722, - 0.0218454, - 0.3462854, + 0.2677105, + 0.7688408, + 0.9949042, + 0.909176, + 0.9348486, + 0.069542, + 0.7039883, + 0.89629, + 0.7469927, + 0.3387263, ] else: expected_rands = [