Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
aed9c4e
initial commit, experimenting with pickling non-libE_field fields (x …
jlnav Jun 26, 2025
00bb9c6
Merge branch 'develop' into feature/shelve_sims
jlnav Aug 28, 2025
b33dd7b
additional poking around and experimenting with history saving cache,…
jlnav Aug 28, 2025
a020a58
better making of .npy database, use History attributes created upon t…
jlnav Aug 29, 2025
fe4bc82
little note...?
jlnav Aug 29, 2025
60bd6f5
comments
jlnav Aug 29, 2025
8c20870
Merge branch 'develop' into feature/shelve_sims
jlnav Sep 11, 2025
a5d33cc
experimenting with having caching being a step of the alloc, once we'…
jlnav Sep 11, 2025
e8816a8
set those H entries to sim_started?
jlnav Sep 11, 2025
2894523
grab update-able indexes, then call update_history_x_out and update_h…
jlnav Sep 12, 2025
df29125
moving cache logic into manager, into handle_msg_from_worker that ove…
jlnav Sep 17, 2025
0a70303
grow the manager's internal record of cache hits, instead of overwrit…
jlnav Sep 18, 2025
593b522
save presumptive workerID for the worker that would've been given cac…
jlnav Sep 18, 2025
e55e691
prevent redundant insertions into local cache retrieval. fix a bug in…
jlnav Sep 19, 2025
9575d76
refactor, and remove first draft of code that was in alloc_f
jlnav Sep 19, 2025
c01dcd3
for now, enable cache for sims that lasted longer than a second
jlnav Sep 19, 2025
6e90b5c
experimenting with making disk cache name match calling script plus e…
jlnav Sep 19, 2025
c7a7f52
fix redundant send of work if rows send to gen. tiny test fix
jlnav Sep 19, 2025
4af459d
Merge branch 'develop' into feature/shelve_sims
jlnav Sep 24, 2025
343cc9b
add libE_specs.cache_long_sims, plus more/better docstrings
jlnav Sep 24, 2025
ca7fe00
manager builds libE_stats messages corresponding to cache retrievals.…
jlnav Sep 25, 2025
1f55a61
user can specify database name; trying to figure out occasionally-mal…
jlnav Sep 26, 2025
a21fb44
Merge branch 'develop' into feature/shelve_sims
jlnav Oct 22, 2025
9a3e0c4
fix syntax error uncaught by black and other tools?
jlnav Oct 22, 2025
702c8d9
cache_name is only string. path not needed in specs.py
jlnav Oct 22, 2025
08c1790
param fix
jlnav Oct 22, 2025
9270b93
still want to send Work on persis_stop if we're doing final_gen_send
jlnav Oct 22, 2025
d92b3c3
don't necessarily need cache collisions for these executor tests - si…
jlnav Oct 22, 2025
10946bc
fix iterating over blank template cache entries as though they're val…
jlnav Oct 22, 2025
11694e7
add functionality test for cache_sims
jlnav Oct 22, 2025
019b82e
non-existing cache already dealt with earlier?
jlnav Oct 22, 2025
75105b8
add the new libe specs options to libe_specs.rst
jlnav Oct 23, 2025
85ac196
Merge branch 'develop' into feature/shelve_sims
jlnav Jan 9, 2026
ea8a87d
check that *all* fields in an outbound row match a cache row before c…
jlnav Jan 9, 2026
8965eff
don't need cache_index since we're always updating the last row of th…
jlnav Jan 9, 2026
7d39232
Merge branch 'develop' into feature/shelve_sims
jlnav Apr 21, 2026
bcbf61d
Merge branch 'develop' into feature/shelve_sims
jlnav May 1, 2026
d98a499
cache-name joined with _. cache lives in memory for duration of run i…
jlnav May 1, 2026
a74ce18
update functionality test for develop
jlnav May 4, 2026
40b4208
Merge branch 'develop' into feature/shelve_sims
jlnav May 8, 2026
b57636c
most importantly, specify seed to gen
jlnav May 8, 2026
fa0710e
Merge branch 'develop' into feature/shelve_sims
jlnav May 21, 2026
0752958
cache is computed based on a hash of all the specs, all the callables…
jlnav May 21, 2026
31dbee3
bump pixi versions in ci yml files, formatting?
jlnav May 21, 2026
e208950
Merge branch 'develop' into feature/shelve_sims
jlnav May 26, 2026
a6e0f67
various speed optimizations, especially around the caching sections
jlnav May 26, 2026
87b394f
Merge branch 'develop' into feature/shelve_sims
jlnav May 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions libensemble/alloc_funcs/start_only_persistent.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,20 @@ def only_persistent_gens(W, H, sim_specs, gen_specs, alloc_specs, persis_info, l
gen_count = support.count_persis_gens()
Work = {}

# Asynchronous return to generator
async_return = user.get("async_return", False) and sum(H["sim_ended"]) >= initial_batch_size
# Asynchronous return to generator.
# Use the manager-maintained counter instead of re-scanning the full H array.
async_return = user.get("async_return", False) and libE_info["sim_ended_count"] >= initial_batch_size

if gen_count < persis_info.get("num_gens_started", 0):
# When a persistent worker is done, trigger a shutdown (returning exit condition of 1)
return Work, persis_info, 1

# Give evaluated results back to a running persistent gen
# Give evaluated results back to a running persistent gen.
# Compute the sim_ended & ~gen_informed mask once; AND with per-worker gen_inds inside the loop.
pending_sim = H["sim_ended"] & ~H["gen_informed"]
for wid in support.avail_worker_ids(persistent=EVAL_GEN_TAG, active_recv=active_recv_gen):
gen_inds = H["gen_worker"] == wid
returned_but_not_given = np.logical_and.reduce((H["sim_ended"], ~H["gen_informed"], gen_inds))
returned_but_not_given = pending_sim & gen_inds
if np.any(returned_but_not_given):
if async_return or support.all_sim_ended(H, gen_inds):
point_ids = np.where(returned_but_not_given)[0]
Expand Down
211 changes: 185 additions & 26 deletions libensemble/history.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
import json
import logging
import time
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
import numpy.typing as npt

from libensemble.tools.fields_keys import libE_fields, protected_libE_fields

if TYPE_CHECKING:
from libensemble.logger import LibensembleLogger

logger = logging.getLogger(__name__)
if TYPE_CHECKING:
assert isinstance(logger, LibensembleLogger)


# For debug messages - uncomment
# logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -69,14 +78,14 @@ def __init__(

if "sim_started" not in fields:
logger.manager_warning( # type: ignore[attr-defined]
"Marking entries in H0 as having been " + "'sim_started' and 'sim_ended'"
"Marking entries in H0 as having been 'sim_started' and 'sim_ended'"
)

H["sim_started"][: len(H0)] = 1
H["sim_ended"][: len(H0)] = 1
elif "sim_ended" not in fields:
logger.manager_warning( # type: ignore[attr-defined]
"Marking entries in H0 as having been " + "'sim_ended' if 'sim_started'"
"Marking entries in H0 as having been 'sim_ended' if 'sim_started'"
)

H["sim_ended"][: len(H0)] = H0["sim_started"]
Expand All @@ -102,27 +111,165 @@ def __init__(
self.index = len(H0)
self.grow_count = 0
self.safe_mode = False
self.use_cache = False

self.sim_started_count = np.sum(H["sim_started"])
self.sim_ended_count = np.sum(H["sim_ended"])
self.gen_informed_count = np.sum(H["gen_informed"])
self.sim_started_count: int = np.sum(H["sim_started"])
self.sim_ended_count: int = np.sum(H["sim_ended"])
self.gen_informed_count: int = np.sum(H["gen_informed"])
self.given_back_warned = False

self.sim_started_offset = self.sim_started_count
self.sim_ended_offset = self.sim_ended_count
self.gen_informed_offset = self.gen_informed_count
self.sim_started_offset: int = self.sim_started_count
self.sim_ended_offset: int = self.sim_ended_count
self.gen_informed_offset: int = self.gen_informed_count

self.last_started = -1
self.last_ended = -1

def init_cache(
self,
cache_name: str,
cache_dir: str | Path,
spec_hash: str | None = None,
) -> None:
self.cache_dir = Path(cache_dir).expanduser()
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.cache = self.cache_dir / Path(cache_name + ".npy")
self.cache_meta = self.cache_dir / Path(cache_name + ".meta.json")
self.spec_hash = spec_hash
self.use_cache = True
self.cache_set = False

# Precompute the sorted user-field names and their dtypes once, so
# _shelf_longrunning_sims doesn't recompute them on every sim return.
libE_field_names = {k[0] for k in libE_fields}
self.cache_keys = sorted([n for n in self.H.dtype.names if n not in libE_field_names])
self.cache_dtype = np.dtype(sorted([(n, self.H.dtype.fields[n][0]) for n in self.cache_keys]))

# Buffer for new entries collected during this run; deduplicated via bytes key.
self._cache_buffer: list = []
self._cache_seen: set = set()

# Validate any existing cache against the configuration hash.
cache_valid = False
if self.cache.exists():
if self.cache_meta.exists():
try:
with open(self.cache_meta) as f:
meta = json.load(f)
if meta.get("spec_hash") == spec_hash:
cache_valid = True
except (json.JSONDecodeError, KeyError):
pass
if not cache_valid:
logger.debug(
"Cache hash mismatch or missing metadata — starting fresh: %s",
self.cache.name,
)
self.cache.unlink(missing_ok=True)

if not self.cache.exists():
self.cache.touch()

try:
self.in_cache = np.load(self.cache, allow_pickle=True)
except EOFError:
self.in_cache = None

# Pre-populate the seen-set from any on-disk entries so we don't re-add them.
# Also mark cache_set=True immediately when there is existing data — the manager
# uses this flag to decide whether to scan the cache when dispatching sim work.
if self.in_cache is not None and len(self.in_cache) > 0:
for row in self.in_cache:
self._cache_seen.add(row.tobytes())
self.cache_set = True

def _append_new_fields(self, H_f: npt.NDArray) -> None:
dtype_new = np.dtype(list(set(self.H.dtype.descr + np.lib.recfunctions.repack_fields(H_f).dtype.descr)))
import numpy.lib.recfunctions as rfn

dtype_new: np.dtype = np.dtype(list(set(self.H.dtype.descr + rfn.repack_fields(H_f).dtype.descr)))

H_new = np.zeros(len(self.H), dtype=dtype_new)
old_fields = self.H.dtype.names
for field in old_fields:
H_new[field][: len(self.H)] = self.H[field]
self.H = H_new

def _shelf_longrunning_sims(self, index):
"""Cache any f values that ran for more than a second.

Uses a bytes-keyed set for O(1) deduplication instead of np.unique on
every insertion, and accumulates new entries in a plain Python list that
is only materialised into a structured array at save_cache() time.
"""
if self.H[index]["sim_ended_time"] - self.H[index]["sim_started_time"] <= 1:
return
entry = np.array([self.H[index][self.cache_keys]], dtype=self.cache_dtype)
key = entry[0].tobytes()
if key in self._cache_seen:
return
self._cache_seen.add(key)
self._cache_buffer.append(entry)
self.cache_set = True

def _materialize_cache(self) -> npt.NDArray | None:
"""Combine the on-disk cache with any buffered new entries into one array."""
parts = []
if self.in_cache is not None:
parts.append(self.in_cache)
if self._cache_buffer:
parts.append(np.concatenate(self._cache_buffer))
if not parts:
return None
return np.concatenate(parts) if len(parts) > 1 else parts[0]

def save_cache(self) -> None:
if self.use_cache and self.cache_set:
combined = self._materialize_cache()
if combined is not None:
np.save(self.cache, combined, allow_pickle=True)
if self.spec_hash:
with open(self.cache_meta, "w") as f:
json.dump({"spec_hash": self.spec_hash}, f)

def get_shelved_sims(self) -> npt.NDArray:
combined = self._materialize_cache()
return combined if combined is not None else np.load(self.cache, allow_pickle=True)

@staticmethod
def _classify_fields(fields, returned_H, H):
"""Partition returned fields into three buckets for update_history_f.

Returns
-------
scalar_fields : list[str]
Fields whose per-row value is a scalar or object (can be assigned
with a single fancy-indexed write across all rows).
uniform_fields : list[str]
Fixed-shape array fields whose shape exactly matches H's storage
shape (can also be assigned in one fancy-indexed write).
ragged_fields : list[str]
Fixed-shape array fields that are *smaller* than H's storage shape
(need per-row slice assignment).
"""
scalar_fields = []
uniform_fields = []
ragged_fields = []
for field in fields:
if field in protected_libE_fields:
continue
dt = returned_H.dtype[field]
if dt.shape == () or dt.hasobject:
scalar_fields.append(field)
else:
# Compare element shape: returned vs H's allocated shape
h_shape = H.dtype[field].shape
r_shape = dt.shape
if r_shape == h_shape:
uniform_fields.append(field)
else:
ragged_fields.append(field)
return scalar_fields, uniform_fields, ragged_fields

def update_history_f(self, D: dict, kill_canceled_sims: bool = False) -> None:
"""
Updates the history after points have been evaluated
Expand All @@ -135,30 +282,42 @@ def update_history_f(self, D: dict, kill_canceled_sims: bool = False) -> None:
if returned_H is not None and any([field not in self.H.dtype.names for field in returned_H.dtype.names]):
self._append_new_fields(returned_H)

for j, ind in enumerate(new_inds):
if self.safe_mode:
for field in fields:
if field in protected_libE_fields:
if self.safe_mode:
assert False, "The field '" + field + "' is protected"
continue
assert field not in protected_libE_fields, "The field '" + field + "' is protected"

if np.isscalar(returned_H[field][j]) or returned_H.dtype[field].hasobject:
self.H[field][ind] = returned_H[field][j]
else:
# len or np.size
new_inds = np.asarray(new_inds)

if fields and returned_H is not None:
scalar_fields, uniform_fields, ragged_fields = self._classify_fields(fields, returned_H, self.H)

# Vectorized assignment for scalar and object fields (one op per field)
for field in scalar_fields:
self.H[field][new_inds] = returned_H[field]

# Vectorized assignment for fixed-shape array fields that exactly match H's shape
for field in uniform_fields:
self.H[field][new_inds] = returned_H[field]

# Per-row loop only for ragged (partial-fill) array fields
for j, ind in enumerate(new_inds):
for field in ragged_fields:
H0_size = len(returned_H[field][j])
assert H0_size <= len(self.H[field][ind]), (
"History update Error: Too many values received for " + field
)
assert H0_size, "History update Error: No values in this field " + field
if H0_size == len(self.H[field][ind]):
self.H[field][ind] = returned_H[field][j] # ref
else:
self.H[field][ind][:H0_size] = returned_H[field][j] # Slice View
self.H[field][ind][:H0_size] = returned_H[field][j]

# Batch-update bookkeeping fields for all returned rows at once
t = time.time()
self.H["sim_ended"][new_inds] = True
self.H["sim_ended_time"][new_inds] = t
self.sim_ended_count += len(new_inds)

self.H["sim_ended"][ind] = True
self.H["sim_ended_time"][ind] = time.time()
self.sim_ended_count += 1
if self.use_cache:
for ind in new_inds:
self._shelf_longrunning_sims(ind)

if kill_canceled_sims:
for j in range(self.last_ended + 1, np.max(new_inds) + 1):
Expand Down Expand Up @@ -205,7 +364,7 @@ def update_history_to_gen(self, q_inds: npt.NDArray):

if self.using_H0 and not self.given_back_warned:
logger.manager_warning( # type: ignore[attr-defined]
"Giving entries in H0 back to gen. Marking entries in " + "H0 as 'gen_informed' if 'sim_ended'."
"Giving entries in H0 back to gen. Marking entries in H0 as 'gen_informed' if 'sim_ended'."
)

self.given_back_warned = True
Expand Down
17 changes: 16 additions & 1 deletion libensemble/libE.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
from libensemble.tools.alloc_support import AllocSupport
from libensemble.tools.tools import _USER_SIM_ID_WARNING
from libensemble.utils import launcher
from libensemble.utils.misc import specs_dump
from libensemble.utils.misc import compute_config_hash, specs_dump
from libensemble.utils.timer import Timer
from libensemble.version import __version__
from libensemble.worker import worker_main
Expand Down Expand Up @@ -235,12 +235,27 @@ def libE(
exit_criteria=exit_criteria,
)

# Compute a deterministic hash of the full configuration for cache integrity.
spec_hash = compute_config_hash(
sim_specs=ensemble.sim_specs,
gen_specs=ensemble.gen_specs,
alloc_specs=ensemble.alloc_specs,
libE_specs=ensemble.libE_specs,
exit_criteria=ensemble.exit_criteria,
H0=H0,
)

(sim_specs, gen_specs, alloc_specs, libE_specs) = [
specs_dump(spec, by_alias=True)
for spec in [ensemble.sim_specs, ensemble.gen_specs, ensemble.alloc_specs, ensemble.libE_specs]
]
exit_criteria = specs_dump(ensemble.exit_criteria, by_alias=True, exclude_none=True)

# Inject spec hash and auto-generate cache name when not explicitly provided.
libE_specs["_spec_hash"] = spec_hash
if libE_specs.get("cache_long_sims") and not libE_specs.get("cache_name"):
libE_specs["cache_name"] = f".libe_cache_{spec_hash[:16]}"

# Restore objects that don't survive serialization via model_dump
if hasattr(ensemble.sim_specs, "simulator") and ensemble.sim_specs.simulator is not None:
sim_specs["simulator"] = ensemble.sim_specs.simulator
Expand Down
Loading
Loading