From e476d5349956f310a1082c433cb3a7fe80c7a652 Mon Sep 17 00:00:00 2001 From: Joshua Natarajan Date: Tue, 12 May 2026 21:28:54 -0400 Subject: [PATCH 1/3] first working pass at ASAL-compatible search implementation --- .gitignore | 4 + search/lila_search/__init__.py | 16 ++ search/lila_search/evaluator.py | 180 +++++++++++++++ search/lila_search/illumination.py | 343 ++++++++++++++++++++++++++++ search/lila_search/renderer.py | 298 +++++++++++++++++++++++++ search/lila_search/substrate.py | 122 ++++++++++ search/lila_search/theta.py | 347 +++++++++++++++++++++++++++++ search/lila_search/viz/atlas.py | 199 +++++++++++++++++ search/pyproject.toml | 36 +++ search/scripts/export_world.py | 97 ++++++++ search/scripts/run_illumination.py | 177 +++++++++++++++ search/tests/__init__.py | 0 server/ecosim/worker.py | 16 ++ 13 files changed, 1835 insertions(+) create mode 100644 search/lila_search/__init__.py create mode 100644 search/lila_search/evaluator.py create mode 100644 search/lila_search/illumination.py create mode 100644 search/lila_search/renderer.py create mode 100644 search/lila_search/substrate.py create mode 100644 search/lila_search/theta.py create mode 100644 search/lila_search/viz/atlas.py create mode 100644 search/pyproject.toml create mode 100644 search/scripts/export_world.py create mode 100644 search/scripts/run_illumination.py create mode 100644 search/tests/__init__.py diff --git a/.gitignore b/.gitignore index 83972fa..13b7941 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,10 @@ __pycache__/ *.py[codz] *$py.class +# lila +results/ +*.npy + # C extensions *.so diff --git a/search/lila_search/__init__.py b/search/lila_search/__init__.py new file mode 100644 index 0000000..2623f3a --- /dev/null +++ b/search/lila_search/__init__.py @@ -0,0 +1,16 @@ +"""lila_search — ASAL-compatible search over līlā ecosystem simulations.""" + +from lila_search.substrate import LilaSubstrate +from lila_search.theta import ThetaSpec, theta_to_world_config +from lila_search.renderer import render_headless +from lila_search.evaluator import CLIPEvaluator +from lila_search.illumination import illuminate + +__all__ = [ + "LilaSubstrate", + "ThetaSpec", + "theta_to_world_config", + "render_headless", + "CLIPEvaluator", + "illuminate", +] diff --git a/search/lila_search/evaluator.py b/search/lila_search/evaluator.py new file mode 100644 index 0000000..e26debe --- /dev/null +++ b/search/lila_search/evaluator.py @@ -0,0 +1,180 @@ +"""Foundation model evaluator for ASAL search. + +Wraps CLIP (via open_clip) to embed rendered simulation frames into a +representation space where distance corresponds to perceptual difference. +The search algorithms use these embeddings to score diversity (illumination), +novelty (open-endedness), or target alignment (supervised search). +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np +import torch +from PIL import Image + +# open_clip provides the same CLIP models ASAL uses, in PyTorch +import open_clip + + +class CLIPEvaluator: + """Embed simulation frames using CLIP ViT-B/32. + + Usage:: + + evaluator = CLIPEvaluator() + frames = substrate.rollout(theta, n_steps=2000, n_frames=20) + embedding = evaluator.embed_rollout(frames) # (512,) mean embedding + embeddings = evaluator.embed_frames(frames) # (20, 512) + """ + + def __init__( + self, + model_name: str = "ViT-B-32", + pretrained: str = "openai", + device: str | None = None, + ): + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = torch.device(device) + + self.model, _, self.preprocess = open_clip.create_model_and_transforms( + model_name, pretrained=pretrained, device=self.device, + ) + self.model.eval() + + # Get embedding dimension from model + self._embed_dim = self.model.visual.output_dim + + @property + def embed_dim(self) -> int: + """Dimensionality of CLIP embeddings (typically 512).""" + return self._embed_dim + + @torch.no_grad() + def embed_frames(self, frames: list[np.ndarray]) -> np.ndarray: + """Embed a list of RGB frames into CLIP space. + + Parameters + ---------- + frames : list[np.ndarray] + List of RGB images, each (H, W, 3) uint8. + + Returns + ------- + np.ndarray + Embeddings of shape (n_frames, embed_dim), L2-normalized. + """ + # Convert numpy frames to PIL, apply CLIP preprocessing + tensors = [] + for frame in frames: + pil_img = Image.fromarray(frame) + tensor = self.preprocess(pil_img) + tensors.append(tensor) + + batch = torch.stack(tensors).to(self.device) + embeddings = self.model.encode_image(batch) + + # L2 normalize (standard for CLIP similarity) + embeddings = embeddings / embeddings.norm(dim=-1, keepdim=True) + + return embeddings.cpu().numpy() + + @torch.no_grad() + def embed_text(self, texts: list[str]) -> np.ndarray: + """Embed text prompts into CLIP space. + + Used for supervised target search (not needed for illumination). + + Parameters + ---------- + texts : list[str] + Natural language descriptions. + + Returns + ------- + np.ndarray + Embeddings of shape (n_texts, embed_dim), L2-normalized. + """ + tokenizer = open_clip.get_tokenizer("ViT-B-32") + tokens = tokenizer(texts).to(self.device) + embeddings = self.model.encode_text(tokens) + embeddings = embeddings / embeddings.norm(dim=-1, keepdim=True) + return embeddings.cpu().numpy() + + def embed_rollout(self, frames: list[np.ndarray]) -> np.ndarray: + """Embed a rollout's frames and return the mean embedding. + + This is the standard ASAL approach for representing a full + simulation rollout as a single point in CLIP space. + + Parameters + ---------- + frames : list[np.ndarray] + Rendered frames from a simulation rollout. + + Returns + ------- + np.ndarray + Mean embedding of shape (embed_dim,), L2-normalized. + """ + frame_embeddings = self.embed_frames(frames) + mean_emb = frame_embeddings.mean(axis=0) + mean_emb = mean_emb / np.linalg.norm(mean_emb) + return mean_emb + + def embed_rollouts_batch(self, rollout_frames: list[list[np.ndarray]]) -> np.ndarray: + """Embed multiple rollouts in one batched GPU call. + + Flattens all frames across all rollouts into a single batch, + sends through CLIP once, then reshapes and computes per-rollout + mean embeddings. Much higher GPU utilization than per-rollout calls. + + Parameters + ---------- + rollout_frames : list[list[np.ndarray]] + List of rollouts, each a list of RGB frames. + + Returns + ------- + np.ndarray + Mean embeddings of shape (n_rollouts, embed_dim), L2-normalized. + """ + # Flatten all frames with a mapping back to rollout index + all_frames = [] + boundaries = [0] + for frames in rollout_frames: + all_frames.extend(frames) + boundaries.append(len(all_frames)) + + # One batched GPU call for all frames + all_embeddings = self.embed_frames(all_frames) + + # Compute per-rollout means + n_rollouts = len(rollout_frames) + result = np.zeros((n_rollouts, self._embed_dim)) + for i in range(n_rollouts): + start, end = boundaries[i], boundaries[i + 1] + mean_emb = all_embeddings[start:end].mean(axis=0) + result[i] = mean_emb / np.linalg.norm(mean_emb) + + return result + + def pairwise_distances(self, embeddings: np.ndarray) -> np.ndarray: + """Compute pairwise cosine distances between embeddings. + + Parameters + ---------- + embeddings : np.ndarray + Shape (n, embed_dim), L2-normalized. + + Returns + ------- + np.ndarray + Pairwise distance matrix of shape (n, n). + Distance = 1 - cosine_similarity. + """ + # Since embeddings are L2-normalized, cosine_sim = dot product + sim = embeddings @ embeddings.T + return 1.0 - sim diff --git a/search/lila_search/illumination.py b/search/lila_search/illumination.py new file mode 100644 index 0000000..14dfa4b --- /dev/null +++ b/search/lila_search/illumination.py @@ -0,0 +1,343 @@ +"""Illumination search: discover maximally diverse ecosystem configurations. + +Implements ASAL's illumination mode — a genetic algorithm where the +selection pressure is *diversity*, not fitness. The population is +maintained to maximize the minimum nearest-neighbor distance in CLIP +embedding space. The output is a set of θ vectors, their embeddings, +and rendered thumbnails — the raw material for a simulation atlas. +""" + +from __future__ import annotations + +import json +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import numpy as np +from tqdm import tqdm + +from lila_search.substrate import LilaSubstrate +from lila_search.evaluator import CLIPEvaluator +from lila_search.theta import ThetaSpec + + +def _rollout_worker(args: tuple) -> list[np.ndarray]: + """Module-level rollout function for ProcessPoolExecutor. + + Each worker creates its own LilaSubstrate instance since the + engine can't be pickled across processes. + """ + theta, n_steps, n_frames, seed = args + substrate = LilaSubstrate() + return substrate.rollout(theta, n_steps=n_steps, n_frames=n_frames, seed=seed) + + +@dataclass +class IlluminationResult: + """Output of an illumination run.""" + thetas: np.ndarray # (pop_size, ndim) — final population + embeddings: np.ndarray # (pop_size, embed_dim) — CLIP embeddings + thumbnails: list[np.ndarray] # rendered frames for atlas visualization + diversity_history: list[float] # min nearest-neighbor distance per generation + elapsed_seconds: float + + +@dataclass +class IlluminationConfig: + """Configuration for illumination search.""" + pop_size: int = 64 # population size + n_children: int = 32 # children generated per generation + n_generations: int = 100 # total generations + n_steps: int = 2000 # simulation ticks per rollout + n_frames: int = 20 # frames captured per rollout + mutation_scale: float = 0.1 # std of Gaussian mutation (relative to range) + seed: int = 0 + save_interval: int = 10 # save checkpoint every N generations + n_workers: int = 1 # parallel CPU rollout workers (1 = sequential) + + +def _min_nn_distance(embeddings: np.ndarray) -> float: + """Compute minimum nearest-neighbor distance across the population. + + This is the diversity metric ASAL optimizes for illumination. + Higher = more diverse population. + """ + n = len(embeddings) + if n < 2: + return 0.0 + + # Cosine distance matrix (embeddings are L2-normalized) + sim = embeddings @ embeddings.T + dist = 1.0 - sim + + # Set diagonal to infinity so we skip self-distances + np.fill_diagonal(dist, np.inf) + + # For each point, find its nearest neighbor distance + nn_distances = dist.min(axis=1) + + # Return the minimum of all nearest-neighbor distances + # This is the "bottleneck" — the closest pair in the population + return float(nn_distances.min()) + + +def _mean_nn_distance(embeddings: np.ndarray) -> float: + """Mean nearest-neighbor distance — alternative diversity metric.""" + n = len(embeddings) + if n < 2: + return 0.0 + sim = embeddings @ embeddings.T + dist = 1.0 - sim + np.fill_diagonal(dist, np.inf) + nn_distances = dist.min(axis=1) + return float(nn_distances.mean()) + + +def _select_most_diverse( + embeddings: np.ndarray, + keep_n: int, +) -> np.ndarray: + """Greedy farthest-point selection to keep the most diverse subset. + + Iteratively selects the point that is farthest from the already-selected + set. This is a standard approach for diversity maximization and matches + ASAL's illumination selection pressure. + + Parameters + ---------- + embeddings : np.ndarray + Shape (n_candidates, embed_dim), L2-normalized. + keep_n : int + Number of points to keep. + + Returns + ------- + np.ndarray + Indices of the selected points, shape (keep_n,). + """ + n = len(embeddings) + if n <= keep_n: + return np.arange(n) + + # Precompute pairwise distances + sim = embeddings @ embeddings.T + dist = 1.0 - sim + + # Start with the point that has the highest mean distance to all others + selected = [int(dist.mean(axis=1).argmax())] + + # Greedily add the farthest point from the selected set + min_dist_to_selected = dist[selected[0]].copy() + + for _ in range(keep_n - 1): + # Find the candidate farthest from any selected point + # (max of the min-distance-to-selected) + candidates = np.ones(n, dtype=bool) + candidates[selected] = False + candidate_idx = np.where(candidates)[0] + + best = candidate_idx[min_dist_to_selected[candidate_idx].argmax()] + selected.append(int(best)) + + # Update min distances + np.minimum(min_dist_to_selected, dist[best], out=min_dist_to_selected) + + return np.array(selected) + + +def _mutate( + theta: np.ndarray, + spec: ThetaSpec, + scale: float, + rng: np.random.Generator, +) -> np.ndarray: + """Mutate a θ vector with Gaussian noise scaled to parameter ranges.""" + ranges = spec.bounds[:, 1] - spec.bounds[:, 0] + noise = rng.normal(0, scale, size=theta.shape) * ranges + child = theta + noise + return spec.clip(child) + + +def illuminate( + substrate: LilaSubstrate | None = None, + evaluator: CLIPEvaluator | None = None, + config: IlluminationConfig | None = None, + output_dir: str | Path | None = None, +) -> IlluminationResult: + """Run illumination search over the līlā parameter space. + + Parameters + ---------- + substrate : LilaSubstrate, optional + Substrate instance. Created with defaults if None. + evaluator : CLIPEvaluator, optional + CLIP evaluator. Created with defaults if None. + config : IlluminationConfig, optional + Search configuration. Uses defaults if None. + output_dir : str or Path, optional + Directory to save checkpoints and results. No saving if None. + + Returns + ------- + IlluminationResult + Final population, embeddings, thumbnails, and metrics. + """ + if config is None: + config = IlluminationConfig() + if substrate is None: + substrate = LilaSubstrate() + if evaluator is None: + evaluator = CLIPEvaluator() + + spec = substrate.theta_spec() + rng = np.random.default_rng(config.seed) + + if output_dir is not None: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + t_start = time.time() + + # ----------------------------------------------------------------------- + # Helper: run rollouts (optionally parallel), then batch-embed on GPU + # ----------------------------------------------------------------------- + def _run_rollouts(thetas: np.ndarray, base_seed: int) -> tuple[list[list[np.ndarray]], list[np.ndarray]]: + """Run simulations on CPU, return (all_frames, thumbnails).""" + all_frames = [] + thumbnails = [] + + if config.n_workers > 1: + from concurrent.futures import ProcessPoolExecutor + tasks = [(thetas[i], config.n_steps, config.n_frames, base_seed + i) + for i in range(len(thetas))] + with ProcessPoolExecutor(max_workers=config.n_workers) as pool: + results = list(tqdm(pool.map(_rollout_worker, tasks), + total=len(tasks), desc="Rollouts", leave=False)) + for frames in results: + all_frames.append(frames) + thumbnails.append(frames[-1]) + else: + for i in tqdm(range(len(thetas)), desc="Rollouts", leave=False): + frames = substrate.rollout(thetas[i], n_steps=config.n_steps, + n_frames=config.n_frames, seed=base_seed + i) + all_frames.append(frames) + thumbnails.append(frames[-1]) + + return all_frames, thumbnails + + # ----------------------------------------------------------------------- + # Initialize population with random θ vectors + # ----------------------------------------------------------------------- + print(f"Initializing population of {config.pop_size}...") + pop_thetas = np.array([spec.sample_uniform(rng) for _ in range(config.pop_size)]) + + # CPU: run all rollouts + init_frames, pop_thumbnails = _run_rollouts(pop_thetas, config.seed) + # GPU: batch embed all at once + pop_embeddings = evaluator.embed_rollouts_batch(init_frames) + + diversity_history = [_min_nn_distance(pop_embeddings)] + print(f"Initial diversity (min NN dist): {diversity_history[-1]:.4f}") + + # ----------------------------------------------------------------------- + # Evolution loop + # ----------------------------------------------------------------------- + for gen in tqdm(range(config.n_generations), desc="Illumination"): + # Generate children by mutating random parents + parent_idx = rng.integers(0, config.pop_size, size=config.n_children) + child_thetas = np.array([ + _mutate(pop_thetas[pi], spec, config.mutation_scale, rng) + for pi in parent_idx + ]) + + # CPU: run all child rollouts + child_base_seed = config.seed + config.pop_size + gen * config.n_children + child_frames, child_thumbnails = _run_rollouts(child_thetas, child_base_seed) + # GPU: batch embed all children at once + child_embeddings = evaluator.embed_rollouts_batch(child_frames) + + # Combine parents + children + all_thetas = np.concatenate([pop_thetas, child_thetas], axis=0) + all_embeddings = np.concatenate([pop_embeddings, child_embeddings], axis=0) + all_thumbnails = pop_thumbnails + child_thumbnails + + # Select the most diverse subset + keep_idx = _select_most_diverse(all_embeddings, config.pop_size) + + pop_thetas = all_thetas[keep_idx] + pop_embeddings = all_embeddings[keep_idx] + pop_thumbnails = [all_thumbnails[i] for i in keep_idx] + + div = _min_nn_distance(pop_embeddings) + diversity_history.append(div) + + if (gen + 1) % 10 == 0: + mean_div = _mean_nn_distance(pop_embeddings) + tqdm.write( + f"Gen {gen + 1:4d} | min NN dist: {div:.4f} | " + f"mean NN dist: {mean_div:.4f}" + ) + + # Save checkpoint + if output_dir and config.save_interval > 0 and (gen + 1) % config.save_interval == 0: + _save_checkpoint(output_dir, gen + 1, pop_thetas, pop_embeddings, diversity_history) + + elapsed = time.time() - t_start + + result = IlluminationResult( + thetas=pop_thetas, + embeddings=pop_embeddings, + thumbnails=pop_thumbnails, + diversity_history=diversity_history, + elapsed_seconds=elapsed, + ) + + # Save final results + if output_dir: + _save_final(output_dir, result, spec) + + print(f"\nIllumination complete in {elapsed:.1f}s") + print(f"Final diversity (min NN dist): {diversity_history[-1]:.4f}") + + return result + + +def _save_checkpoint( + output_dir: Path, + gen: int, + thetas: np.ndarray, + embeddings: np.ndarray, + diversity_history: list[float], +) -> None: + """Save a checkpoint to disk.""" + cp_dir = output_dir / "checkpoints" + cp_dir.mkdir(exist_ok=True) + np.save(cp_dir / f"thetas_gen{gen:04d}.npy", thetas) + np.save(cp_dir / f"embeddings_gen{gen:04d}.npy", embeddings) + + +def _save_final(output_dir: Path, result: IlluminationResult, spec: ThetaSpec) -> None: + """Save final results to disk.""" + np.save(output_dir / "thetas_final.npy", result.thetas) + np.save(output_dir / "embeddings_final.npy", result.embeddings) + + # Save thumbnails as individual images + thumb_dir = output_dir / "thumbnails" + thumb_dir.mkdir(exist_ok=True) + from PIL import Image + for i, thumb in enumerate(result.thumbnails): + Image.fromarray(thumb).save(thumb_dir / f"sim_{i:04d}.png") + + # Save metadata + meta = { + "pop_size": len(result.thetas), + "embed_dim": result.embeddings.shape[1], + "ndim": result.thetas.shape[1], + "dim_names": spec.names, + "diversity_history": result.diversity_history, + "elapsed_seconds": result.elapsed_seconds, + } + with open(output_dir / "metadata.json", "w") as f: + json.dump(meta, f, indent=2) diff --git a/search/lila_search/renderer.py b/search/lila_search/renderer.py new file mode 100644 index 0000000..9ca99e0 --- /dev/null +++ b/search/lila_search/renderer.py @@ -0,0 +1,298 @@ +"""Headless renderer: EcosystemEngine state → 256×256 RGB numpy array. + +Produces a top-down 2D image encoding the semantically important features +that CLIP can differentiate: soil moisture, water sources, entity positions +by type and state, plant growth, dormancy markers. + +Mirrors the browser visualizer's color semantics without animation or +interpolation — this is a single-frame snapshot for FM embedding. +""" + +from __future__ import annotations + +import math +from typing import Any + +import numpy as np +from PIL import Image, ImageDraw + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +IMG_SIZE = 256 +GRID_SIZE = 32 # locked in design decisions +CELL_PX = IMG_SIZE / GRID_SIZE # 8.0 + +# Colors (RGB tuples) — chosen to be visually distinct in CLIP space +# and semantically matched to the browser visualizer +COLOR_WATER = (50, 130, 200) +COLOR_DEER = (180, 100, 50) +COLOR_BUTTERFLY = (220, 180, 40) +COLOR_OAK = (60, 100, 50) +COLOR_GRASS_HEALTHY = (80, 170, 80) +COLOR_GRASS_DRY = (160, 150, 80) +COLOR_WILDFLOWER = (180, 60, 130) +COLOR_WILDFLOWER_FRUITING = (240, 200, 50) +COLOR_DORMANT = (120, 95, 70) + +# Soil moisture gradient endpoints +SOIL_DRY = np.array([200, 175, 130], dtype=np.float32) # warm amber +SOIL_WET = np.array([100, 170, 170], dtype=np.float32) # cool teal + + +# --------------------------------------------------------------------------- +# Engine state extraction +# --------------------------------------------------------------------------- +# These functions isolate the engine API assumptions. If attribute names +# differ from what's documented, adjust here only. + +def _extract_entities(engine: Any) -> list[dict]: + """Pull entity data from engine into plain dicts. + + Expected engine API: + engine.entities → dict[str, dict] mapping entity_id to entity data + Each entity has: type, state, health, growth, hydration, and position + stored either as x/y/z keys or a position [x,y,z] array. + + Adjust this function if the actual API differs. + """ + entities = [] + for eid, e in engine.entities.items(): + # Handle position as [x,y,z] array or as separate x/y/z keys + if "position" in e: + pos = e["position"] + x, y, z = pos[0], pos[1], pos[2] + else: + x = e.get("x", 0) + y = e.get("y", 0) + z = e.get("z", 0) + + entities.append({ + "id": eid, + "type": e.get("type", ""), + "state": e.get("state", ""), + "x": x, + "y": y, + "z": z, + "health": e.get("health", 1.0), + "growth": e.get("growth", 0.0), + "hydration": e.get("hydration", 1.0), + "species": e.get("species", ""), + }) + return entities + + +def _extract_moisture_grid(engine: Any) -> np.ndarray: + """Get soil moisture as a 2D grid (GRID_SIZE × GRID_SIZE), values 0–1. + + Expected engine API: + engine.voxel_manager.get_layer_slice("moisture", y=0) + → 2D array of shape (GRID_SIZE, GRID_SIZE) + + If the voxel manager exposes data differently, adjust here. + Falls back to a uniform mid-moisture grid if extraction fails. + """ + try: + vm = engine.voxel_manager + # Try direct layer access — the voxel manager uses sparse storage + # but should provide a way to read a 2D slice + if hasattr(vm, "get_layer_slice"): + return np.clip(vm.get_layer_slice("moisture", y=0), 0, 1) + + # Fallback: read from the sparse grid directly + # Voxel layers: 0=nutrients, 1=moisture, 2=temperature, 3=organic_matter + grid = np.full((GRID_SIZE, GRID_SIZE), 0.3, dtype=np.float32) + if hasattr(vm, "grid"): + for (x, y, z), layers in vm.grid.items(): + if y == 0 and 0 <= x < GRID_SIZE and 0 <= z < GRID_SIZE: + grid[z, x] = np.clip(layers.get(1, layers.get("moisture", 0.3)), 0, 1) + return grid + except Exception: + return np.full((GRID_SIZE, GRID_SIZE), 0.3, dtype=np.float32) + + +def _extract_water_sources(engine: Any) -> list[dict]: + """Get water source positions, radii, and levels. + + Expected engine API: + engine.water_sources → list of dicts with keys: + position: [x, y, z] (or x/z keys), radius, water_level (0–1) + """ + sources = [] + try: + for ws in engine.water_sources: + # Handle position as array or separate keys + if "position" in ws: + pos = ws["position"] + x, z = pos[0], pos[2] + else: + x = ws.get("x", 0) + z = ws.get("z", 0) + + sources.append({ + "x": x, + "z": z, + "radius": ws.get("radius", 2.0), + "water_level": ws.get("water_level", 1.0), + }) + except (AttributeError, TypeError): + pass + return sources + + +# --------------------------------------------------------------------------- +# Drawing helpers +# --------------------------------------------------------------------------- + +def _grid_to_px(gx: float, gz: float) -> tuple[float, float]: + """Convert grid coordinates to pixel coordinates.""" + return gx * CELL_PX, gz * CELL_PX + + +def _draw_moisture_background(img: np.ndarray, moisture: np.ndarray) -> None: + """Render soil moisture as a teal→amber gradient background.""" + # Upscale moisture grid to image size via nearest-neighbor + for gz in range(GRID_SIZE): + for gx in range(GRID_SIZE): + m = moisture[gz, gx] + color = (SOIL_WET * m + SOIL_DRY * (1 - m)).astype(np.uint8) + px_x = int(gx * CELL_PX) + px_z = int(gz * CELL_PX) + px_x2 = int((gx + 1) * CELL_PX) + px_z2 = int((gz + 1) * CELL_PX) + img[px_z:px_z2, px_x:px_x2] = color + + +def _draw_water_sources(draw: ImageDraw.Draw, sources: list[dict]) -> None: + """Draw water sources as blue circles scaled by water_level.""" + for ws in sources: + level = ws["water_level"] + if level < 0.05: + continue # dried up — skip, matching engine behavior + px_x, px_z = _grid_to_px(ws["x"], ws["z"]) + r_px = ws["radius"] * CELL_PX * level * 0.5 # scaled down to let entities show + alpha_color = tuple(int(c * (0.4 + 0.6 * level)) for c in COLOR_WATER) + draw.ellipse( + [px_x - r_px, px_z - r_px, px_x + r_px, px_z + r_px], + fill=alpha_color, + ) + + +def _draw_entity(draw: ImageDraw.Draw, entity: dict) -> None: + """Draw a single entity based on its type and state.""" + etype = entity["type"] + state = entity["state"] + px_x, px_z = _grid_to_px(entity["x"], entity["z"]) + + if state == "DORMANT": + # Faded brown root marker for dormant plants + r = 3 + draw.ellipse([px_x - r, px_z - r, px_x + r, px_z + r], fill=COLOR_DORMANT) + return + + if etype == "ANIMAL": + # Directional triangle — simplified without heading angle + size = 7 + draw.polygon( + [(px_x, px_z - size), (px_x - size * 0.6, px_z + size * 0.5), + (px_x + size * 0.6, px_z + size * 0.5)], + fill=COLOR_DEER, + ) + + elif etype == "INSECT": + # Small dot with color indicating state + r = 3.5 + color = COLOR_WILDFLOWER_FRUITING if state == "POLLINATING" else COLOR_BUTTERFLY + draw.ellipse([px_x - r, px_z - r, px_x + r, px_z + r], fill=color) + + elif etype == "TREE": + # Large circle with canopy halo + canopy_r = 10 + trunk_r = 4 + # Canopy shadow (lighter) + canopy_color = tuple(min(c + 40, 255) for c in COLOR_OAK) + draw.ellipse( + [px_x - canopy_r, px_z - canopy_r, px_x + canopy_r, px_z + canopy_r], + fill=canopy_color, + ) + # Trunk center (darker) + draw.ellipse( + [px_x - trunk_r, px_z - trunk_r, px_x + trunk_r, px_z + trunk_r], + fill=COLOR_OAK, + ) + + elif etype == "PLANT": + # Scale with growth, tint with hydration + growth = entity.get("growth", 0.5) + hydration = entity.get("hydration", 0.5) + r = max(2.5, 4.5 * growth) + + # Determine if this is grass or wildflower from species field or ID + species = entity.get("species", "") + eid = entity.get("id", "") + is_wildflower = "wildflower" in species or "flower" in eid + + if is_wildflower: + if state == "FRUITING": + color = COLOR_WILDFLOWER_FRUITING + else: + color = COLOR_WILDFLOWER + else: + # Grass — interpolate between dry and healthy based on hydration + t = np.clip(hydration, 0, 1) + color = tuple( + int(COLOR_GRASS_DRY[i] * (1 - t) + COLOR_GRASS_HEALTHY[i] * t) + for i in range(3) + ) + + draw.ellipse([px_x - r, px_z - r, px_x + r, px_z + r], fill=color) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def render_headless(engine: Any, img_size: int = IMG_SIZE) -> np.ndarray: + """Render current engine state as an RGB image. + + Parameters + ---------- + engine : EcosystemEngine + The simulation engine instance after one or more steps. + img_size : int + Output image dimensions (square). Default 256. + + Returns + ------- + np.ndarray + RGB image array of shape (img_size, img_size, 3), dtype uint8. + """ + # Allocate image buffer + img = np.zeros((img_size, img_size, 3), dtype=np.uint8) + + # Layer 1: soil moisture background + moisture = _extract_moisture_grid(engine) + _draw_moisture_background(img, moisture) + + # Convert to PIL for shape drawing + pil_img = Image.fromarray(img) + draw = ImageDraw.Draw(pil_img) + + # Layer 2: water sources + water_sources = _extract_water_sources(engine) + _draw_water_sources(draw, water_sources) + + # Layer 3: entities (plants first, then animals/insects on top) + entities = _extract_entities(engine) + + # Sort by draw order: dormant → plants → trees → animals → insects + type_order = {"DORMANT": 0, "PLANT": 1, "TREE": 2, "ANIMAL": 3, "BIRD": 4, "INSECT": 5} + entities.sort(key=lambda e: type_order.get( + "DORMANT" if e["state"] == "DORMANT" else e["type"], 3 + )) + + for entity in entities: + _draw_entity(draw, entity) + + return np.array(pil_img) diff --git a/search/lila_search/substrate.py b/search/lila_search/substrate.py new file mode 100644 index 0000000..33b5506 --- /dev/null +++ b/search/lila_search/substrate.py @@ -0,0 +1,122 @@ +"""ASAL-compatible substrate wrapping the līlā EcosystemEngine. + +Provides the Init/Step/Render interface that the search loop expects. +The engine is a black box — this module imports it, feeds it a world +config from theta, and extracts rendered frames. +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np + +from lila_search.theta import ThetaSpec, make_eco_rates_spec, theta_to_world_config +from lila_search.renderer import render_headless + +# Import ecosim — adjust this path if your PYTHONPATH or package +# installation differs. The search package depends on ecosim being +# importable (either installed via pip or on sys.path). +from ecosim.engine import EcosystemEngine + + +class SimState: + """Opaque simulation state passed between init/step/render.""" + __slots__ = ("engine", "tick", "config", "rain_interval", "rain_intensity") + + def __init__(self, engine: Any, tick: int, config: dict): + self.engine = engine + self.tick = tick + self.config = config + rain_cfg = config.get("rain", {}) + self.rain_interval = rain_cfg.get("interval", 0) + self.rain_intensity = rain_cfg.get("intensity", 0.8) + + +class LilaSubstrate: + """ASAL-compatible substrate interface for līlā. + + Usage:: + + substrate = LilaSubstrate() + state = substrate.init(theta, seed=42) + for _ in range(2000): + state = substrate.step(state) + frame = substrate.render(state) # (256, 256, 3) uint8 + """ + + def __init__(self, spec: ThetaSpec | None = None, img_size: int = 256): + self.spec = spec or make_eco_rates_spec() + self.img_size = img_size + + def theta_spec(self) -> ThetaSpec: + """Describes the parameter space: names, ranges, types.""" + return self.spec + + def init(self, theta: np.ndarray, seed: int = 0) -> SimState: + """Initialize simulation state from parameter vector θ. + + Converts θ to a world config, constructs the engine, and + returns an opaque state object. + """ + theta = self.spec.clip(theta) + config = theta_to_world_config(theta, seed=seed) + engine = EcosystemEngine(config) + return SimState(engine=engine, tick=0, config=config) + + def step(self, state: SimState) -> SimState: + """Advance simulation by one tick.""" + state.engine.step() + state.tick += 1 + + # Apply periodic rain if configured + if (state.rain_interval > 0 + and state.tick > 0 + and state.tick % state.rain_interval == 0): + state.engine.apply_rain(state.rain_intensity) + + return state + + def render(self, state: SimState) -> np.ndarray: + """Render current state as RGB image (H, W, 3) uint8.""" + return render_headless(state.engine, img_size=self.img_size) + + def rollout( + self, + theta: np.ndarray, + n_steps: int = 2000, + n_frames: int = 20, + seed: int = 0, + ) -> list[np.ndarray]: + """Run a full rollout and collect evenly-spaced rendered frames. + + Parameters + ---------- + theta : np.ndarray + Parameter vector. + n_steps : int + Total simulation ticks. + n_frames : int + Number of frames to capture (evenly spaced). + seed : int + Random seed for initialization. + + Returns + ------- + list[np.ndarray] + List of RGB frames, each (img_size, img_size, 3) uint8. + """ + state = self.init(theta, seed=seed) + frames = [] + capture_interval = max(1, n_steps // n_frames) + + for t in range(n_steps): + state = self.step(state) + if (t + 1) % capture_interval == 0 and len(frames) < n_frames: + frames.append(self.render(state)) + + # Ensure we have exactly n_frames (capture final state if short) + if len(frames) < n_frames: + frames.append(self.render(state)) + + return frames diff --git a/search/lila_search/theta.py b/search/lila_search/theta.py new file mode 100644 index 0000000..4b2428c --- /dev/null +++ b/search/lila_search/theta.py @@ -0,0 +1,347 @@ +"""θ parameterization for ASAL search over līlā simulations. + +Defines the searchable parameter space and provides theta_to_world_config() +which converts a flat numpy vector into a valid world JSON dict that +EcosystemEngine can load. + +Current scope (Track A): ~18 dimensions covering rate multipliers, biome +base values, water source configuration, and entity count scaling. +This searches over "interesting tunings of the same five species." + +When the trait system lands, this module expands to encode trait vectors +(body mass, diet, thermal tolerance) without changing the substrate protocol. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +import numpy as np + + +@dataclass +class ThetaDim: + """A single dimension of the search space.""" + name: str + low: float + high: float + default: float + + +@dataclass +class ThetaSpec: + """Describes the full parameter space: names, ranges, defaults.""" + dims: list[ThetaDim] = field(default_factory=list) + + @property + def ndim(self) -> int: + return len(self.dims) + + @property + def names(self) -> list[str]: + return [d.name for d in self.dims] + + @property + def bounds(self) -> np.ndarray: + """Shape (ndim, 2) — lower and upper bounds.""" + return np.array([[d.low, d.high] for d in self.dims]) + + @property + def defaults(self) -> np.ndarray: + return np.array([d.default for d in self.dims]) + + def clip(self, theta: np.ndarray) -> np.ndarray: + """Clip theta to valid bounds.""" + b = self.bounds + return np.clip(theta, b[:, 0], b[:, 1]) + + def sample_uniform(self, rng: np.random.Generator | None = None) -> np.ndarray: + """Sample a random theta uniformly within bounds.""" + if rng is None: + rng = np.random.default_rng() + b = self.bounds + return rng.uniform(b[:, 0], b[:, 1]) + + +# --------------------------------------------------------------------------- +# EcoRates θ spec — the Track A parameter space +# --------------------------------------------------------------------------- + +def make_eco_rates_spec() -> ThetaSpec: + """~18-dimensional parameter space over rate multipliers, biome, and water. + + Dimensions: + 0–5: Rate multipliers (consumption, hunger, thirst, growth, + reproduction, water_replenishment). Range [0.2, 5.0]. + 6–8: Biome base values (soil_nutrients, soil_moisture, + soil_temperature). Range [0.05, 1.0]. + 9: Water source count. Range [1, 4] (rounded to int). + 10: Water source mean radius. Range [1.0, 5.0]. + 11: Water source mean water_level. Range [0.3, 1.0]. + 12: Deer count. Range [2, 8] (rounded to int). + 13: Butterfly count. Range [2, 8] (rounded to int). + 14: Oak count. Range [1, 5] (rounded to int). + 15: Grass count. Range [3, 15] (rounded to int). + 16: Wildflower count. Range [2, 10] (rounded to int). + 17: Rain interval (ticks between rain events, 0=no rain). + Range [0, 2000]. + """ + dims = [ + # Rate multipliers + ThetaDim("rate_consumption", 0.2, 5.0, 1.0), + ThetaDim("rate_hunger", 0.2, 5.0, 1.0), + ThetaDim("rate_thirst", 0.2, 5.0, 1.0), + ThetaDim("rate_growth", 0.2, 5.0, 1.0), + ThetaDim("rate_reproduction", 0.2, 5.0, 1.0), + ThetaDim("rate_water_replenishment", 0.2, 5.0, 1.0), + # Biome base values + ThetaDim("soil_nitrogen", 0.1, 1.0, 0.7), + ThetaDim("soil_moisture", 0.1, 1.0, 0.65), + ThetaDim("climate_temperature", 10.0, 40.0, 22.0), + # Water sources + ThetaDim("water_count", 1.0, 4.0, 2.0), + ThetaDim("water_radius", 1.0, 5.0, 3.0), + # Entity counts + ThetaDim("deer_count", 2.0, 8.0, 4.0), + ThetaDim("butterfly_count", 2.0, 8.0, 4.0), + ThetaDim("oak_count", 1.0, 5.0, 2.0), + ThetaDim("grass_count", 3.0, 15.0, 8.0), + ThetaDim("wildflower_count", 2.0, 10.0, 5.0), + # Rain + ThetaDim("rain_interval", 0.0, 2000.0, 500.0), + ] + return ThetaSpec(dims=dims) + + +# --------------------------------------------------------------------------- +# θ → world config conversion +# --------------------------------------------------------------------------- + +# Default grid size and species templates +_GRID = 32 + + +def _spread_positions(count: int, grid_size: int, rng: np.random.Generator) -> list[list[float]]: + """Generate spread-out positions for entities on the grid. + + Returns list of [x, y, z] arrays matching demo_world.json format. + """ + positions = [] + for _ in range(count): + x = float(rng.uniform(1, grid_size - 1)) + z = float(rng.uniform(1, grid_size - 1)) + positions.append([x, 0.0, z]) + return positions + + +def _water_positions(count: int, grid_size: int, rng: np.random.Generator) -> list[list[float]]: + """Generate water source positions spread across the grid.""" + positions = [] + quadrants = [ + (0.25, 0.25), (0.75, 0.25), (0.25, 0.75), (0.75, 0.75), + ] + for i in range(count): + qx, qz = quadrants[i % len(quadrants)] + jitter_x = rng.uniform(-0.15, 0.15) + jitter_z = rng.uniform(-0.15, 0.15) + x = float(np.clip((qx + jitter_x) * grid_size, 2, grid_size - 2)) + z = float(np.clip((qz + jitter_z) * grid_size, 2, grid_size - 2)) + positions.append([x, 0.0, z]) + return positions + + +# --------------------------------------------------------------------------- +# Entity metadata templates (from demo_world.json) +# --------------------------------------------------------------------------- + +_DEER_METADATA = { + "diet": "herbivore", "body_mass": 60.0, "metabolism_rate": 1.0, + "sensory_range": 12.0, "movement_speed": 3.0, + "lifespan": 800.0, "reproduction_threshold": 0.8, +} + +_BUTTERFLY_METADATA = { + "diet": "herbivore", "colony_size": 1, "metabolism_rate": 0.6, + "pollination_range": 6.0, "movement_speed": 2.0, "lifespan": 150.0, +} + +_OAK_METADATA = { + "metabolism": "photosynthetic", "growth_rate": 0.005, "root_depth": 2.0, + "canopy_radius": 4.0, "height_max": 12.0, "trunk_radius": 0.6, + "shade_factor": 0.35, + "nutrient_demand": {"nitrogen": 0.02, "phosphorus": 0.01}, + "water_demand": 0.05, +} + +_GRASS_METADATA = { + "metabolism": "photosynthetic", "growth_rate": 0.06, "root_depth": 0.1, + "canopy_radius": 0.0, + "nutrient_demand": {"nitrogen": 0.005, "phosphorus": 0.002}, + "water_demand": 0.02, +} + +_WILDFLOWER_METADATA = { + "metabolism": "photosynthetic", "growth_rate": 0.09, "root_depth": 0.15, + "canopy_radius": 0.0, + "nutrient_demand": {"nitrogen": 0.008, "phosphorus": 0.004}, + "water_demand": 0.025, +} + + +def theta_to_world_config(theta: np.ndarray, seed: int = 0) -> dict: + """Convert a flat θ vector into a valid world config dict. + + The output format matches demo_world.json — the dict that + EcosystemEngine.__init__() expects. + + Parameters + ---------- + theta : np.ndarray + Parameter vector of length matching make_eco_rates_spec().ndim. + seed : int + Random seed for entity/water placement. + + Returns + ------- + dict + World configuration ready for EcosystemEngine(config). + """ + rng = np.random.default_rng(seed) + + # Unpack theta + t = dict(zip(make_eco_rates_spec().names, theta)) + + # Integer counts + n_water = int(round(t["water_count"])) + n_deer = int(round(t["deer_count"])) + n_butterfly = int(round(t["butterfly_count"])) + n_oak = int(round(t["oak_count"])) + n_grass = int(round(t["grass_count"])) + n_wildflower = int(round(t["wildflower_count"])) + rain_interval = int(round(t["rain_interval"])) + + # Water sources — position as [x, y, z] array + water_sources = [] + for pos in _water_positions(n_water, _GRID, rng): + water_sources.append({ + "position": pos, + "radius": float(t["water_radius"]), + }) + + # Build entity list + entities = [] + + # Deer + for i, pos in enumerate(_spread_positions(n_deer, _GRID, rng)): + entities.append({ + "id": f"deer_{i:02d}", + "type": "ANIMAL", + "species": "deer", + "position": pos, + "metadata": {**_DEER_METADATA}, + "skeleton_id": "quadruped_medium", + }) + + # Butterflies + for i, pos in enumerate(_spread_positions(n_butterfly, _GRID, rng)): + entities.append({ + "id": f"butterfly_{i:02d}", + "type": "INSECT", + "species": "monarch", + "position": pos, + "metadata": {**_BUTTERFLY_METADATA}, + "skeleton_id": "insect_wing", + }) + + # Oaks + for i, pos in enumerate(_spread_positions(n_oak, _GRID, rng)): + entities.append({ + "id": f"oak_{i:02d}", + "type": "TREE", + "species": "meadow_oak", + "position": pos, + "metadata": {**_OAK_METADATA}, + }) + + # Grass + for i, pos in enumerate(_spread_positions(n_grass, _GRID, rng)): + entities.append({ + "id": f"grass_{i:02d}", + "type": "PLANT", + "species": "meadow_grass", + "position": pos, + "metadata": {**_GRASS_METADATA}, + }) + + # Wildflowers + for i, pos in enumerate(_spread_positions(n_wildflower, _GRID, rng)): + entities.append({ + "id": f"flower_{i:02d}", + "type": "PLANT", + "species": "wildflower", + "position": pos, + "metadata": {**_WILDFLOWER_METADATA}, + }) + + config = { + "version": "0.1", + "session_id": f"search-{seed:06d}", + + "environment": { + "type": "MEADOW", + "biome": "TEMPERATE", + "climate": { + "temperature": float(t["climate_temperature"]), + "humidity": 0.6, + "rainfall": 0.4, + "wind_speed": 0.15, + "light_level": 0.85, + }, + "soil": { + "nitrogen": float(t["soil_nitrogen"]), + "phosphorus": 0.6, + "potassium": 0.5, + "moisture": float(t["soil_moisture"]), + "organic_matter": 0.4, + "ph": 6.8, + }, + "voxel_grid": { + "dimensions": [_GRID, _GRID, _GRID], + "cell_size": 1.0, + }, + "water_sources": water_sources, + }, + + "model": { + "adapter": "static", + }, + + "rates": { + "consumption": float(t["rate_consumption"]), + "hunger": float(t["rate_hunger"]), + "thirst": float(t["rate_thirst"]), + "growth": float(t["rate_growth"]), + "reproduction": float(t["rate_reproduction"]), + "water_replenishment": float(t["rate_water_replenishment"]), + }, + + "entities": entities, + } + + # Randomization — opt-in, use for position jitter + config["randomize"] = { + "jitter": 1.5, + "extra_grass": [0, 2], + "extra_flowers": [0, 1], + "transform": True, + } + + # Rain config — read by substrate protocol, not part of engine init + # 0 means no rain + if rain_interval > 0: + config["rain"] = { + "interval": rain_interval, + "intensity": 0.8, + } + + return config diff --git a/search/lila_search/viz/atlas.py b/search/lila_search/viz/atlas.py new file mode 100644 index 0000000..6e3eda3 --- /dev/null +++ b/search/lila_search/viz/atlas.py @@ -0,0 +1,199 @@ +"""Simulation atlas: UMAP projection of discovered ecosystems. + +Takes illumination results (embeddings + thumbnails), projects into 2D +with UMAP, grid-samples the space, and composites the nearest thumbnail +into each tile. The output is a single image showing the full diversity +of discovered ecosystem configurations — the visual artifact that +demonstrates the substrate works. +""" + +from __future__ import annotations + +from pathlib import Path + +import numpy as np +from PIL import Image + +try: + import umap +except ImportError: + umap = None + +try: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt +except ImportError: + plt = None + + +def build_atlas( + embeddings: np.ndarray, + thumbnails: list[np.ndarray], + grid_cells: int = 8, + thumb_size: int = 128, + output_path: str | Path | None = None, + umap_seed: int = 42, +) -> np.ndarray: + """Build a simulation atlas image from illumination results. + + Parameters + ---------- + embeddings : np.ndarray + Shape (n, embed_dim), L2-normalized CLIP embeddings. + thumbnails : list[np.ndarray] + Corresponding rendered frames, each (H, W, 3) uint8. + grid_cells : int + Number of cells per side in the atlas grid (total = grid_cells²). + thumb_size : int + Size to resize each thumbnail to in the atlas. + output_path : str or Path, optional + If provided, save the atlas image here. + umap_seed : int + Random seed for UMAP reproducibility. + + Returns + ------- + np.ndarray + Atlas image of shape (grid_cells*thumb_size, grid_cells*thumb_size, 3). + """ + if umap is None: + raise ImportError("umap-learn is required for atlas generation: pip install umap-learn") + + n = len(embeddings) + assert len(thumbnails) == n, f"Got {n} embeddings but {len(thumbnails)} thumbnails" + + # Project to 2D with UMAP + reducer = umap.UMAP( + n_components=2, + metric="cosine", + random_state=umap_seed, + n_neighbors=min(15, n - 1), + ) + coords_2d = reducer.fit_transform(embeddings) + + # Normalize to [0, 1] + mins = coords_2d.min(axis=0) + maxs = coords_2d.max(axis=0) + ranges = maxs - mins + ranges[ranges == 0] = 1.0 # avoid div by zero + coords_norm = (coords_2d - mins) / ranges + + # Grid sample: for each cell, find the nearest simulation + atlas_size = grid_cells * thumb_size + atlas = np.ones((atlas_size, atlas_size, 3), dtype=np.uint8) * 240 # light gray bg + + used = set() + for row in range(grid_cells): + for col in range(grid_cells): + # Center of this grid cell in normalized coords + cx = (col + 0.5) / grid_cells + cy = (row + 0.5) / grid_cells + + # Find nearest unused simulation + dists = np.sqrt((coords_norm[:, 0] - cx) ** 2 + (coords_norm[:, 1] - cy) ** 2) + + # Prefer unused simulations, but allow reuse if needed + sorted_idx = np.argsort(dists) + best = None + for idx in sorted_idx: + if idx not in used: + best = idx + break + if best is None: + best = sorted_idx[0] # all used, pick closest + + used.add(best) + + # Resize thumbnail and place in atlas + thumb = Image.fromarray(thumbnails[best]) + thumb = thumb.resize((thumb_size, thumb_size), Image.NEAREST) + thumb_arr = np.array(thumb) + + y0 = row * thumb_size + x0 = col * thumb_size + atlas[y0:y0 + thumb_size, x0:x0 + thumb_size] = thumb_arr + + if output_path is not None: + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + Image.fromarray(atlas).save(output_path) + print(f"Atlas saved to {output_path}") + + return atlas + + +def plot_embedding_space( + embeddings: np.ndarray, + output_path: str | Path | None = None, + umap_seed: int = 42, + diversity_scores: np.ndarray | None = None, +) -> None: + """Plot the 2D UMAP projection of embeddings as a scatter plot. + + Parameters + ---------- + embeddings : np.ndarray + Shape (n, embed_dim). + output_path : str or Path, optional + Save plot here. + umap_seed : int + UMAP random seed. + diversity_scores : np.ndarray, optional + Per-point scores for coloring. If None, uses nearest-neighbor distance. + """ + if umap is None or plt is None: + raise ImportError("umap-learn and matplotlib are required") + + n = len(embeddings) + reducer = umap.UMAP( + n_components=2, + metric="cosine", + random_state=umap_seed, + n_neighbors=min(15, n - 1), + ) + coords = reducer.fit_transform(embeddings) + + if diversity_scores is None: + # Color by nearest-neighbor distance + sim = embeddings @ embeddings.T + dist = 1.0 - sim + np.fill_diagonal(dist, np.inf) + diversity_scores = dist.min(axis=1) + + fig, ax = plt.subplots(1, 1, figsize=(8, 8)) + scatter = ax.scatter( + coords[:, 0], coords[:, 1], + c=diversity_scores, cmap="viridis", + s=30, alpha=0.8, edgecolors="none", + ) + plt.colorbar(scatter, ax=ax, label="NN distance") + ax.set_title("Ecosystem embedding space") + ax.set_xticks([]) + ax.set_yticks([]) + + if output_path: + fig.savefig(output_path, dpi=150, bbox_inches="tight") + print(f"Scatter plot saved to {output_path}") + plt.close(fig) + + +def plot_diversity_curve( + diversity_history: list[float], + output_path: str | Path | None = None, +) -> None: + """Plot diversity (min NN distance) over generations.""" + if plt is None: + raise ImportError("matplotlib is required") + + fig, ax = plt.subplots(1, 1, figsize=(8, 4)) + ax.plot(diversity_history, linewidth=1.5, color="#534AB7") + ax.set_xlabel("Generation") + ax.set_ylabel("Min nearest-neighbor distance") + ax.set_title("Diversity over search") + ax.grid(True, alpha=0.3) + + if output_path: + fig.savefig(output_path, dpi=150, bbox_inches="tight") + print(f"Diversity curve saved to {output_path}") + plt.close(fig) diff --git a/search/pyproject.toml b/search/pyproject.toml new file mode 100644 index 0000000..eaffca0 --- /dev/null +++ b/search/pyproject.toml @@ -0,0 +1,36 @@ +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "lila-search" +version = "0.1.0-dev" +description = "ASAL-compatible search over līlā ecosystem simulations" +requires-python = ">=3.11" +license = {text = "Apache-2.0"} +dependencies = [ + "lila-ecosim @ file:///${PROJECT_ROOT}/../server", + "numpy>=1.24", + "Pillow>=10.0", + "torch>=2.0", + "open-clip-torch>=2.24", + "cma>=3.3", + "umap-learn>=0.5", + "matplotlib>=3.7", + "tqdm>=4.65", +] + +[project.optional-dependencies] +dev = ["pytest>=7.0", "ruff>=0.1"] + +[project.scripts] +lila-illuminate = "scripts.run_illumination:main" + +[tool.setuptools.packages.find] +include = ["lila_search*"] + +[tool.ruff] +line-length = 100 + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/search/scripts/export_world.py b/search/scripts/export_world.py new file mode 100644 index 0000000..f0eef32 --- /dev/null +++ b/search/scripts/export_world.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +"""Export an atlas entry as a world config JSON for browser replay. + +Usage: + # Export atlas entry #42 + uv run python -m scripts.export_world results/illuminate_v1 42 + + # Then run the worker with it (from server/) + cd ../server + WORLD_FILE=../search/replay.json uv run python -m ecosim.worker + + # Or specify output path + uv run python -m scripts.export_world results/illuminate_v1 42 -o my_world.json +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +import numpy as np + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Export an atlas entry as a world config JSON", + ) + parser.add_argument( + "results_dir", + type=str, + help="Path to illumination results directory", + ) + parser.add_argument( + "index", + type=int, + help="Atlas entry index (0-based, matches thumbnail sim_NNNN.png)", + ) + parser.add_argument( + "-o", "--output", + type=str, + default=None, + help="Output JSON path (default: replay.json in current directory)", + ) + parser.add_argument( + "--seed", + type=int, + default=None, + help="Override seed (default: uses the atlas index as seed)", + ) + + args = parser.parse_args() + results_dir = Path(args.results_dir) + + # Load thetas + thetas_path = results_dir / "thetas_final.npy" + if not thetas_path.exists(): + print(f"Error: {thetas_path} not found", file=sys.stderr) + sys.exit(1) + + thetas = np.load(thetas_path) + + if args.index < 0 or args.index >= len(thetas): + print(f"Error: index {args.index} out of range (0–{len(thetas) - 1})", file=sys.stderr) + sys.exit(1) + + # Import here so --help works without full deps + from lila_search.theta import make_eco_rates_spec, theta_to_world_config + + theta = thetas[args.index] + seed = args.seed if args.seed is not None else args.index + spec = make_eco_rates_spec() + + config = theta_to_world_config(theta, seed=seed) + + # Print theta summary + print(f"Atlas entry #{args.index}") + print(f"Seed: {seed}") + print(f"Parameters:") + for name, val in zip(spec.names, theta): + print(f" {name:30s} = {val:.3f}") + + # Write config + output = args.output or "replay.json" + with open(output, "w") as f: + json.dump(config, f, indent=2) + + print(f"\nWorld config written to {output}") + print(f"\nTo replay in browser:") + print(f" cd ../server") + print(f" WORLD_FILE=../search/{output} uv run python -m ecosim.worker") + print(f" # Open http://localhost:8001") + + +if __name__ == "__main__": + main() diff --git a/search/scripts/run_illumination.py b/search/scripts/run_illumination.py new file mode 100644 index 0000000..1cd63c8 --- /dev/null +++ b/search/scripts/run_illumination.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +"""Run ASAL illumination search over līlā ecosystem configurations. + +Usage: + python -m scripts.run_illumination --output results/run_01 + python -m scripts.run_illumination --pop-size 32 --generations 50 --output results/quick + +After the run completes, results/ will contain: + - thetas_final.npy — parameter vectors for all discovered ecosystems + - embeddings_final.npy — CLIP embeddings + - thumbnails/ — rendered frames for each ecosystem + - atlas.png — the simulation atlas (UMAP grid) + - scatter.png — embedding space scatter plot + - diversity.png — diversity curve over generations + - metadata.json — run configuration and metrics +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +import numpy as np + + +def main() -> None: + parser = argparse.ArgumentParser( + description="ASAL illumination search over līlā ecosystems", + ) + parser.add_argument( + "--output", "-o", + type=str, + default="results/illumination", + help="Output directory for results (default: results/illumination)", + ) + parser.add_argument( + "--pop-size", + type=int, + default=64, + help="Population size (default: 64)", + ) + parser.add_argument( + "--children", + type=int, + default=32, + help="Children per generation (default: 32)", + ) + parser.add_argument( + "--generations", + type=int, + default=100, + help="Number of generations (default: 100)", + ) + parser.add_argument( + "--steps", + type=int, + default=2000, + help="Simulation ticks per rollout (default: 2000)", + ) + parser.add_argument( + "--frames", + type=int, + default=20, + help="Frames captured per rollout (default: 20)", + ) + parser.add_argument( + "--mutation-scale", + type=float, + default=0.1, + help="Mutation scale relative to parameter ranges (default: 0.1)", + ) + parser.add_argument( + "--seed", + type=int, + default=0, + help="Random seed (default: 0)", + ) + parser.add_argument( + "--device", + type=str, + default=None, + help="Torch device for CLIP (default: auto-detect cuda/cpu)", + ) + parser.add_argument( + "--atlas-grid", + type=int, + default=8, + help="Atlas grid cells per side (default: 8, produces 8x8 atlas)", + ) + parser.add_argument( + "--skip-atlas", + action="store_true", + help="Skip atlas generation (just run search and save raw results)", + ) + parser.add_argument( + "--workers", + type=int, + default=1, + help="Parallel CPU rollout workers (default: 1, sequential)", + ) + + args = parser.parse_args() + output_dir = Path(args.output) + + # ----------------------------------------------------------------------- + # Import here so --help works without torch installed + # ----------------------------------------------------------------------- + from lila_search.substrate import LilaSubstrate + from lila_search.evaluator import CLIPEvaluator + from lila_search.illumination import illuminate, IlluminationConfig + from lila_search.viz.atlas import build_atlas, plot_embedding_space, plot_diversity_curve + + print("=" * 60) + print("līlā — ASAL Illumination Search") + print("=" * 60) + print(f" Population: {args.pop_size}") + print(f" Children: {args.children}") + print(f" Generations: {args.generations}") + print(f" Steps/rollout:{args.steps}") + print(f" Frames: {args.frames}") + print(f" Mutation: {args.mutation_scale}") + print(f" Seed: {args.seed}") + print(f" Output: {output_dir}") + print(f" Device: {args.device or 'auto'}") + print(f" Workers: {args.workers}") + print("=" * 60) + + # Initialize components + substrate = LilaSubstrate() + evaluator = CLIPEvaluator(device=args.device) + + config = IlluminationConfig( + pop_size=args.pop_size, + n_children=args.children, + n_generations=args.generations, + n_steps=args.steps, + n_frames=args.frames, + mutation_scale=args.mutation_scale, + seed=args.seed, + n_workers=args.workers, + ) + + # Run search + result = illuminate( + substrate=substrate, + evaluator=evaluator, + config=config, + output_dir=output_dir, + ) + + # Generate visualizations + if not args.skip_atlas: + print("\nGenerating visualizations...") + + build_atlas( + result.embeddings, + result.thumbnails, + grid_cells=args.atlas_grid, + output_path=output_dir / "atlas.png", + ) + + plot_embedding_space( + result.embeddings, + output_path=output_dir / "scatter.png", + ) + + plot_diversity_curve( + result.diversity_history, + output_path=output_dir / "diversity.png", + ) + + print(f"\nDone. Results in {output_dir}/") + + +if __name__ == "__main__": + main() diff --git a/search/tests/__init__.py b/search/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/ecosim/worker.py b/server/ecosim/worker.py index d469739..a8e40d9 100644 --- a/server/ecosim/worker.py +++ b/server/ecosim/worker.py @@ -105,6 +105,11 @@ def __init__( self._running = False self._paused = False + # Auto-rain from world config (used by search replay) + rain_cfg = world_config.get("rain", {}) + self._auto_rain_interval = rain_cfg.get("interval", 0) + self._auto_rain_intensity = rain_cfg.get("intensity", 0.8) + # Stats self.ticks_completed = 0 self.total_step_time = 0.0 @@ -146,6 +151,17 @@ def step(self) -> dict[str, Any]: self.ticks_completed += 1 self.total_step_time += elapsed + # Auto-rain from world config (search replay) + if (self._auto_rain_interval > 0 + and self.ticks_completed > 0 + and self.ticks_completed % self._auto_rain_interval == 0): + self.engine.apply_rain(self._auto_rain_intensity) + logger.debug( + "Auto-rain at tick %d (interval=%d, intensity=%.1f)", + self.engine.tick, self._auto_rain_interval, + self._auto_rain_intensity, + ) + return packet async def run_tick_loop( From d3da02417a0e4e80c72a14b59c594fe6ccb15bc2 Mon Sep 17 00:00:00 2001 From: Joshua Natarajan Date: Wed, 13 May 2026 19:56:09 -0400 Subject: [PATCH 2/3] More documentations and cleanup --- ..._PROJECT_STATE.md => LILA_PROJECT_STATE.md | 108 ++++++---- search/README.md | 177 ++++++++++++++++ search/lila_search/evaluator.py | 14 ++ search/lila_search/illumination.py | 14 ++ search/lila_search/renderer.py | 14 ++ search/lila_search/substrate.py | 14 ++ search/lila_search/theta.py | 14 ++ search/lila_search/viz/atlas.py | 14 ++ search/pyproject.toml | 1 + search/scripts/export_world.py | 15 ++ search/scripts/run_illumination.py | 15 ++ search/tests/test_renderer.py | 133 ++++++++++++ search/tests/test_substrate.py | 92 +++++++++ search/tests/test_theta.py | 191 ++++++++++++++++++ 14 files changed, 773 insertions(+), 43 deletions(-) rename docs/LILA_PROJECT_STATE.md => LILA_PROJECT_STATE.md (87%) mode change 100755 => 100644 create mode 100644 search/README.md create mode 100644 search/tests/test_renderer.py create mode 100644 search/tests/test_substrate.py create mode 100644 search/tests/test_theta.py diff --git a/docs/LILA_PROJECT_STATE.md b/LILA_PROJECT_STATE.md old mode 100755 new mode 100644 similarity index 87% rename from docs/LILA_PROJECT_STATE.md rename to LILA_PROJECT_STATE.md index 0044664..0db6d3f --- a/docs/LILA_PROJECT_STATE.md +++ b/LILA_PROJECT_STATE.md @@ -32,7 +32,7 @@ The name comes from the Sanskrit concept of [līlā](https://www.embodiedphiloso ┌─────────────────────────┐ │ Browser Visualizer │ ← v0.0.1-alpha (shipped, single HTML file) │ Godot 4.x Client │ ← deferred to Milestone 4 -│ Headless Renderer │ ← Milestone 3 (for ASAL search) +│ Headless Renderer │ ← Shipped (PIL, 256×256, for ASAL search) └──────────┬──────────────┘ │ WebSocket (delta-encoded tick packets) ┌──────────▼──────────────┐ @@ -58,11 +58,16 @@ The name comes from the Sanskrit concept of [līlā](https://www.embodiedphiloso └──────────────────────────────────────────────────────┘ │ ┌──────────▼───────────────────────────────────────────┐ -│ search/ (Milestone 3, separate package) │ -│ ASAL Substrate Protocol (Init/Step/Render) │ -│ FM Evaluator (CLIP/DINOv2) │ -│ CMA-ES Search (target, open-ended, illumination) │ -│ Simulation Atlas Visualization │ +│ search/ (Shipped — Track A, rate-tuning search) │ +│ ASAL Substrate Protocol (Init/Step/Render) ✅ │ +│ Headless PIL Renderer (256×256) ✅ │ +│ CLIP ViT-B/32 Evaluator ✅ │ +│ Illumination Search (diversity GA) ✅ │ +│ Simulation Atlas (UMAP + grid sampling) ✅ │ +│ ───────────────────────────────────────────────── │ +│ Target Search (CMA-ES + text prompts) pending │ +│ Open-Ended Search (temporal novelty) pending │ +│ Trait-Based θ Expansion (Milestone 2 dep) pending │ └──────────────────────────────────────────────────────┘ ``` @@ -109,19 +114,23 @@ lila/ │ │ └── index.html # canvas-based 2D ecosystem visualizer │ └── godot/ # [M4] Godot 4.x client │ -├── search/ # [M3] ASAL substrate + search -│ ├── pyproject.toml # deps: torch, clip, cma, umap, pillow -│ ├── substrate.py # ALifeSubstrate protocol + LilaSubstrate -│ ├── renderer.py # headless PIL renderer (256×256) -│ ├── evaluator.py # FM evaluation (CLIP, DINOv2) -│ ├── search.py # supervised, open-ended, illumination -│ ├── theta.py # θ parameterization (EcoRates/Topology/Adapt) -│ ├── atlas.py # simulation atlas UMAP visualization -│ ├── constraints.py # physical plausibility validation -│ └── examples/ -│ ├── search_target.py -│ ├── search_openended.py -│ └── search_illuminate.py +├── search/ # ASAL substrate + search (shipped Track A) +│ ├── pyproject.toml # deps: torch, open-clip, cma, umap, pillow +│ ├── lila_search/ +│ │ ├── __init__.py +│ │ ├── substrate.py # LilaSubstrate: Init/Step/Render protocol +│ │ ├── renderer.py # headless PIL renderer (256×256) +│ │ ├── theta.py # θ parameterization (17-dim EcoRates) +│ │ ├── evaluator.py # CLIP ViT-B/32 embedding +│ │ ├── illumination.py # diversity GA with farthest-point selection +│ │ └── viz/ +│ │ └── atlas.py # UMAP projection + grid-sampled atlas +│ ├── scripts/ +│ │ └── run_illumination.py # CLI entry point +│ └── tests/ +│ ├── test_theta.py # θ spec + world config generation +│ ├── test_renderer.py # headless renderer (mock engine) +│ └── test_substrate.py # integration tests (requires ecosim) │ ├── training/ # ML training pipeline (not core) │ ├── pyproject.toml @@ -328,6 +337,26 @@ Five species, two skeletons, five interaction chains: 19. ✅ World randomization (D4 transforms, jitter, extra plants) 20. ✅ `docs/lessons_learned.md` +### Milestone — ASAL Search Track A ✅ + +21. ✅ Headless PIL renderer — engine state → 256×256 RGB numpy array +22. ✅ θ parameterization — 17-dim EcoRates (rate multipliers, biome, water, entity counts, rain) +23. ✅ `theta_to_world_config()` — flat vector → valid `demo_world.json` format +24. ✅ `LilaSubstrate` — ASAL Init(θ)/Step/Render protocol wrapping EcosystemEngine +25. ✅ `CLIPEvaluator` — CLIP ViT-B/32 embedding with batched multi-rollout support +26. ✅ Illumination search — diversity-driven GA, farthest-point selection, configurable population/generations +27. ✅ Parallel CPU rollouts via ProcessPoolExecutor (`--workers N`) +28. ✅ Simulation atlas — UMAP projection + grid-sampled thumbnail composite +29. ✅ Diversity curve + embedding scatter visualizations +30. ✅ CLI entry point (`run_illumination.py`) with full arg parsing +31. ✅ Unit tests (test_theta, test_renderer with mock engine) — 23 tests passing +32. ✅ Integration tests (test_substrate, requires ecosim) — 5 tests passing +33. ✅ First illumination run: 64 pop, 100 gen, 2000-tick rollouts, RTX 5060 Ti, ~100 min +34. ✅ Diversity climbed 0.005 → 0.022 (min NN dist), mean NN dist still rising at termination +35. ✅ Atlas shows distinct ecological regimes: drought-stressed, deer explosions, plant-dominated, balanced +36. ✅ README updated with search section, atlas image, roadmap reflects shipped search +37. ✅ `search/` package with own pyproject.toml, uv workflow, .gitignore for results/ + --- ## Pending — Milestone 2: Trait-Based Architecture + Two-Pool Nutrients @@ -408,9 +437,11 @@ Express deer, butterfly, oak, meadow grass, wildflower as TraitVectors in JSON. --- -## Pending — Milestone 3: New Species + ASAL Substrate +## Pending — Milestone 3: New Species + Trait-Based Search -**Goal:** Validate the trait architecture by adding three species with zero engine code. Build the ASAL substrate protocol and FM-guided search pipeline. +**Goal:** Validate the trait architecture by adding three species with zero engine code. Expand the ASAL search pipeline from rate-tuning (Track A, shipped) to trait-based search (Track B). + +**Dependencies:** Milestone 2 (trait system) must ship first. Track A search infrastructure is complete and stable. **Reference documents:** `TRAIT_TRANSITION_PLAN.md` (Phases 2–3) @@ -431,40 +462,31 @@ With 8 species, run 10,000-tick simulations documenting which interaction chains - Cross-trophic competition: songbirds and butterflies competing for fruiting flowers - Thermal range exclusions in extreme biome settings -### ASAL Substrate Protocol - -Formalize līlā as an ASAL-compatible substrate with the three-function interface: -- `Init(θ)` — parameterized world initialization from trait vectors + biome config -- `Step(θ)` — one tick of the hybrid automaton -- `Render(θ)` — headless 256×256 RGB image (PIL, no browser) +### Trait-Based Search (Track B) -### θ Parameterization (Three Variants) +Expand the shipped search pipeline from 17-dim rate tuning to trait-space search: -**EcoRates** (~15 dimensions) — rate multipliers + biome parameters. Answers: "what metabolic tuning produces the most interesting dynamics?" +**θ expansion** — `theta.py` grows to encode trait vectors (body masses, diet types, thermal tolerances, locomotion modes) alongside the existing rate/biome dimensions. `theta_to_world_config()` emits `species_definitions` for the trait compiler. -**EcoTopology** (~50–80 dimensions) — rates + species composition + spatial layout + water sources. Answers: "what ecosystem configurations produce the most diverse dynamics?" +**Three θ variants:** +- **EcoRates** (~17 dimensions, shipped) — rate multipliers + biome. "What tuning produces interesting dynamics?" +- **EcoTopology** (~50–80 dimensions) — rates + species composition + trait vectors. "What organisms produce interesting ecologies?" +- **EcoAdapt** (~550–600 dimensions) — topology + MLP adapter weights. "What learned behaviors produce the most lifelike dynamics?" -**EcoAdapt** (~550–600 dimensions) — topology + MLP adapter weights. Answers: "what learned behaviors, in what ecological contexts, produce the most lifelike dynamics?" +**Target search** — CMA-ES optimization toward text prompts via CLIP text embedding. Warm-start from illumination results. -### FM Evaluation Pipeline -- CLIP (ViT-B/32) and DINOv2 for embedding rendered simulation frames -- Three ASAL search modes: - - **Supervised target** — "find parameters matching these ecological prompts" (e.g., "thriving meadow" → "overgrazing" → "rain recovery") - - **Open-endedness** — maximize trajectory novelty in FM embedding space over long rollouts - - **Illumination** — discover maximally diverse set of ecosystem configurations -- CMA-ES optimization (gradient-free, handles 600-dim search spaces) -- Physical plausibility constraints (square-cube law, thermal homeostasis limits, trophic sanity) +**Open-ended search** — maximize temporal novelty in CLIP embedding space over long rollouts. Find ecosystems that don't reach equilibrium. -### Simulation Atlas -UMAP projection of all discovered ecosystems with rendered thumbnails. "The atlas of possible ecologies" — what does the space of all possible temperate meadows look like? +**Physical plausibility constraints** — square-cube law, thermal homeostasis limits, trophic sanity checks on θ. ### Milestone 3 Deliverables - Three new species as JSON trait vectors (zero engine code) - Updated interaction templates with parameterized mass-ratio windows - `examples/temperate_meadow_8sp.json` — 8-species trait-based world - Emergent dynamics validation report -- `search/` directory with own pyproject.toml -- `search/substrate.py`, `renderer.py`, `evaluator.py`, `search.py`, `theta.py`, `atlas.py`, `constraints.py` +- Expanded `theta.py` with EcoTopology and EcoAdapt variants +- `lila_search/target.py` — CMA-ES target search +- `lila_search/open_ended.py` — temporal novelty search - `docs/asal_substrate_guide.md` --- diff --git a/search/README.md b/search/README.md new file mode 100644 index 0000000..2eecb01 --- /dev/null +++ b/search/README.md @@ -0,0 +1,177 @@ + + +# līlā search — ASAL-compatible ecosystem search + +Discovers diverse ecosystem configurations using foundation-model-guided +illumination search. Wraps the existing ecosim engine in an ASAL substrate +protocol (`Init`/`Step`/`Render`) and searches over rate multipliers, biome +parameters, and entity counts to find maximally diverse simulations. + +## Setup + +```bash +cd search && uv sync --extra dev +``` + +ecosim must be importable for integration tests and search runs. + +**GPU recommended** for CLIP inference. The search runs on CPU if no GPU +is available, just slower (~3× for CLIP embedding). + +## Quick start + +```bash +# Smoke test (~30s, validates full pipeline) +uv run python -m scripts.run_illumination \ + --pop-size 8 --generations 3 --steps 200 --frames 5 \ + -o results/smoke + +# Full run (~100 min on RTX 5060 Ti, 4 workers) +uv run python -m scripts.run_illumination \ + --pop-size 64 --generations 100 --steps 2000 --frames 20 \ + --workers 4 --atlas-grid 8 \ + -o results/illuminate_v1 +``` + +## Output + +``` +results/ +├── atlas.png # Simulation atlas (UMAP grid of thumbnails) +├── scatter.png # Embedding space scatter plot +├── diversity.png # Min NN distance over generations +├── thetas_final.npy # Parameter vectors (pop_size, 17) +├── embeddings_final.npy # CLIP embeddings (pop_size, 512) +├── metadata.json # Run config and metrics +├── thumbnails/ # Rendered frame per ecosystem +└── checkpoints/ # Periodic saves during search +``` + +Don't check results into git — binary blobs. Use GitHub Releases for +sharing artifacts. Add atlas images to `docs/assets/` for the README. + +## Replay in browser + +Every simulation in the atlas is deterministic — θ + seed reproduces +it tick-for-tick. Export any atlas entry as a world config and replay +it through the browser visualizer with the full canvas renderer. + +```bash +# Export atlas entry #42 as a world config +uv run python -m scripts.export_world results/illuminate_v1 42 + +# Replay in browser (from server/) +cd ../server +WORLD_FILE=../search/replay.json uv run python -m ecosim.worker +# Open http://localhost:8001 +``` + +The exported config includes the `"rain"` key from the search, so +auto-rain fires at the discovered interval — matching what CLIP saw. + +Browse the atlas image, pick a tile, find its index (row × 8 + col +for an 8×8 atlas, or check `thumbnails/sim_NNNN.png`), export, replay. + +## Scripts + +| Script | Purpose | +|--------|---------| +| `scripts/run_illumination.py` | Run illumination search with full CLI args | +| `scripts/export_world.py` | Export atlas entry → world config JSON for replay | + +### run_illumination.py + +``` +--pop-size N Population size (default: 64) +--children N Children per generation (default: 32) +--generations N Number of generations (default: 100) +--steps N Simulation ticks per rollout (default: 2000) +--frames N Frames captured per rollout (default: 20) +--mutation-scale F Mutation scale relative to ranges (default: 0.1) +--seed N Random seed (default: 0) +--workers N Parallel CPU rollout workers (default: 1) +--atlas-grid N Atlas grid cells per side (default: 8) +--device STR Torch device for CLIP (default: auto) +--skip-atlas Skip atlas generation +-o, --output DIR Output directory +``` + +### export_world.py + +``` +uv run python -m scripts.export_world RESULTS_DIR INDEX [-o OUTPUT] [--seed N] +``` + +Prints the full θ breakdown and writes a world config JSON ready +for the worker. + +## Architecture + +``` +lila_search/ +├── renderer.py Engine state → 256×256 RGB (PIL, no browser) +├── theta.py 17-dim parameter space + θ → world config +├── substrate.py ASAL protocol: Init(θ)/Step/Render wrapping engine +├── evaluator.py CLIP ViT-B/32 embedding with batched multi-rollout +├── illumination.py Diversity GA with farthest-point selection +└── viz/atlas.py UMAP projection + grid-sampled atlas image +``` + +The search package **imports from ecosim but never modifies it**. ecosim +stays stdlib-only. All heavy dependencies (torch, CLIP, umap) live here. + +## Tests + +```bash +# Unit tests (no ecosim needed — uses mock engine) +uv run pytest tests/test_theta.py tests/test_renderer.py -v + +# Integration tests (requires ecosim) +uv run pytest tests/test_substrate.py -v +``` + +## What this searches over (Track A) + +17 dimensions: +- 6 rate multipliers (consumption, hunger, thirst, growth, reproduction, water replenishment) +- 3 biome values (soil nitrogen, soil moisture, climate temperature) +- 5 entity counts (deer, butterfly, oak, grass, wildflower) +- 2 water source params (count, radius) +- 1 rain interval + +This finds "interesting tunings of the same five species." When the +trait-based architecture lands, θ expands to encode body masses, diets, +and thermal tolerances — the search becomes "interesting ecologies." + +## First run results + +64 population, 100 generations, 2000-tick rollouts, RTX 5060 Ti 16GB, +4 parallel workers. ~100 minutes. + +- Initial diversity (min NN dist): 0.005 +- Final diversity: 0.022 (4× improvement) +- Mean NN distance still climbing at termination +- Atlas shows distinct ecological regimes: drought-stressed sparse worlds, + deer population explosions, plant-dominated high-moisture meadows, + balanced mixed communities + +See [WORKING_WITH_RESULTS.md](WORKING_WITH_RESULTS.md) for analysis +recipes, embedding exploration, target search, and replay instructions. + +## Adapting to your engine API + +If the ecosim attribute names differ from what the renderer expects, +edit the `_extract_*` functions in `renderer.py`. These isolate all +engine API assumptions in one place: + +- `_extract_entities(engine)` → list of entity dicts +- `_extract_moisture_grid(engine)` → 32×32 numpy array +- `_extract_water_sources(engine)` → list of water source dicts + +Similarly, if `theta_to_world_config()` produces a dict that doesn't +match your `EcosystemEngine.__init__()` format, adjust it in `theta.py`. diff --git a/search/lila_search/evaluator.py b/search/lila_search/evaluator.py index e26debe..4a3aedc 100644 --- a/search/lila_search/evaluator.py +++ b/search/lila_search/evaluator.py @@ -1,3 +1,17 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Foundation model evaluator for ASAL search. Wraps CLIP (via open_clip) to embed rendered simulation frames into a diff --git a/search/lila_search/illumination.py b/search/lila_search/illumination.py index 14dfa4b..b7968a2 100644 --- a/search/lila_search/illumination.py +++ b/search/lila_search/illumination.py @@ -1,3 +1,17 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Illumination search: discover maximally diverse ecosystem configurations. Implements ASAL's illumination mode — a genetic algorithm where the diff --git a/search/lila_search/renderer.py b/search/lila_search/renderer.py index 9ca99e0..2f3bbad 100644 --- a/search/lila_search/renderer.py +++ b/search/lila_search/renderer.py @@ -1,3 +1,17 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Headless renderer: EcosystemEngine state → 256×256 RGB numpy array. Produces a top-down 2D image encoding the semantically important features diff --git a/search/lila_search/substrate.py b/search/lila_search/substrate.py index 33b5506..1ecfa89 100644 --- a/search/lila_search/substrate.py +++ b/search/lila_search/substrate.py @@ -1,3 +1,17 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ASAL-compatible substrate wrapping the līlā EcosystemEngine. Provides the Init/Step/Render interface that the search loop expects. diff --git a/search/lila_search/theta.py b/search/lila_search/theta.py index 4b2428c..4de3592 100644 --- a/search/lila_search/theta.py +++ b/search/lila_search/theta.py @@ -1,3 +1,17 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """θ parameterization for ASAL search over līlā simulations. Defines the searchable parameter space and provides theta_to_world_config() diff --git a/search/lila_search/viz/atlas.py b/search/lila_search/viz/atlas.py index 6e3eda3..b3a5571 100644 --- a/search/lila_search/viz/atlas.py +++ b/search/lila_search/viz/atlas.py @@ -1,3 +1,17 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Simulation atlas: UMAP projection of discovered ecosystems. Takes illumination results (embeddings + thumbnails), projects into 2D diff --git a/search/pyproject.toml b/search/pyproject.toml index eaffca0..5118593 100644 --- a/search/pyproject.toml +++ b/search/pyproject.toml @@ -8,6 +8,7 @@ version = "0.1.0-dev" description = "ASAL-compatible search over līlā ecosystem simulations" requires-python = ">=3.11" license = {text = "Apache-2.0"} +authors = [{ name = "BioSynthArt Studios LLC" }] dependencies = [ "lila-ecosim @ file:///${PROJECT_ROOT}/../server", "numpy>=1.24", diff --git a/search/scripts/export_world.py b/search/scripts/export_world.py index f0eef32..4a0b8f2 100644 --- a/search/scripts/export_world.py +++ b/search/scripts/export_world.py @@ -1,4 +1,19 @@ #!/usr/bin/env python3 + +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Export an atlas entry as a world config JSON for browser replay. Usage: diff --git a/search/scripts/run_illumination.py b/search/scripts/run_illumination.py index 1cd63c8..af0755c 100644 --- a/search/scripts/run_illumination.py +++ b/search/scripts/run_illumination.py @@ -1,4 +1,19 @@ #!/usr/bin/env python3 + +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Run ASAL illumination search over līlā ecosystem configurations. Usage: diff --git a/search/tests/test_renderer.py b/search/tests/test_renderer.py new file mode 100644 index 0000000..be963dd --- /dev/null +++ b/search/tests/test_renderer.py @@ -0,0 +1,133 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the headless renderer. + +Uses a mock engine so these tests run without ecosim installed. +Validates that render_headless produces valid images from engine state. +""" + +import numpy as np +import pytest + +from lila_search.renderer import render_headless, IMG_SIZE + + +class MockVoxelManager: + """Minimal mock of ecosim's VoxelManager.""" + + def __init__(self, moisture: float = 0.5): + self._moisture = moisture + + def get_layer_slice(self, layer: str, y: int = 0) -> np.ndarray: + return np.full((32, 32), self._moisture, dtype=np.float32) + + +class MockEngine: + """Minimal mock of ecosim's EcosystemEngine.""" + + def __init__(self, entities: dict | None = None, moisture: float = 0.5): + self.entities = entities or {} + self.voxel_manager = MockVoxelManager(moisture) + self.water_sources = [ + {"x": 16.0, "z": 16.0, "radius": 3.0, "water_level": 0.8}, + ] + + +class TestRenderHeadless: + def test_output_shape_default(self): + engine = MockEngine() + img = render_headless(engine) + assert img.shape == (IMG_SIZE, IMG_SIZE, 3) + assert img.dtype == np.uint8 + + def test_output_shape_custom_size(self): + engine = MockEngine() + img = render_headless(engine, img_size=128) + # Note: renderer uses IMG_SIZE constant for grid math internally. + # Custom sizes work but grid-to-pixel mapping uses the default. + # This test just verifies no crash with different size. + assert img.shape[2] == 3 + assert img.dtype == np.uint8 + + def test_not_all_black(self): + engine = MockEngine(moisture=0.5) + img = render_headless(engine) + assert img.sum() > 0, "Image should not be all black" + + def test_moisture_affects_background(self): + engine_dry = MockEngine(moisture=0.1) + engine_wet = MockEngine(moisture=0.9) + img_dry = render_headless(engine_dry) + img_wet = render_headless(engine_wet) + # Wet soil should have more blue/teal, dry more amber + # Just verify they're different + assert not np.array_equal(img_dry, img_wet) + + def test_entities_render_without_crash(self): + entities = { + "deer_0": { + "type": "ANIMAL", "state": "FORAGING", + "x": 10.0, "y": 0.0, "z": 10.0, + "health": 1.0, "growth": 0.0, "hydration": 0.8, + }, + "butterfly_0": { + "type": "INSECT", "state": "SEEKING", + "x": 15.0, "y": 0.0, "z": 15.0, + "health": 1.0, "growth": 0.0, "hydration": 0.8, + }, + "oak_0": { + "type": "TREE", "state": "GROWING", + "x": 20.0, "y": 0.0, "z": 20.0, + "health": 1.0, "growth": 0.8, "hydration": 0.8, + }, + "grass_0": { + "type": "PLANT", "state": "GROWING", + "x": 5.0, "y": 0.0, "z": 5.0, + "health": 1.0, "growth": 0.6, "hydration": 0.7, + }, + "wildflower_0": { + "type": "PLANT", "state": "FRUITING", + "x": 8.0, "y": 0.0, "z": 8.0, + "health": 1.0, "growth": 0.7, "hydration": 0.5, + }, + "dormant_grass": { + "type": "PLANT", "state": "DORMANT", + "x": 25.0, "y": 0.0, "z": 25.0, + "health": 0.0, "growth": 0.0, "hydration": 0.0, + }, + } + engine = MockEngine(entities=entities) + img = render_headless(engine) + assert img.shape == (IMG_SIZE, IMG_SIZE, 3) + + def test_empty_entities(self): + engine = MockEngine(entities={}) + img = render_headless(engine) + assert img.shape == (IMG_SIZE, IMG_SIZE, 3) + + def test_dried_water_source_not_rendered(self): + engine = MockEngine() + engine.water_sources = [ + {"x": 16.0, "z": 16.0, "radius": 3.0, "water_level": 0.01}, + ] + img_dry = render_headless(engine) + + engine.water_sources = [ + {"x": 16.0, "z": 16.0, "radius": 3.0, "water_level": 0.8}, + ] + img_wet = render_headless(engine) + + # The wet version should have blue pixels that the dry one doesn't + assert not np.array_equal(img_dry, img_wet) diff --git a/search/tests/test_substrate.py b/search/tests/test_substrate.py new file mode 100644 index 0000000..6459fdc --- /dev/null +++ b/search/tests/test_substrate.py @@ -0,0 +1,92 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Integration tests for the substrate protocol. + +These require ecosim to be installed/importable. Skip if not available. +Run with: pytest tests/test_substrate.py -v +""" + +import numpy as np +import pytest + +try: + from ecosim.engine import EcosystemEngine + HAS_ECOSIM = True +except ImportError: + HAS_ECOSIM = False + +from lila_search.theta import make_eco_rates_spec, theta_to_world_config + + +pytestmark = pytest.mark.skipif(not HAS_ECOSIM, reason="ecosim not installed") + + +class TestSubstrateIntegration: + """Tests that require the actual ecosim engine.""" + + def test_default_theta_creates_valid_engine(self): + """Default θ → world config → engine init without crash.""" + spec = make_eco_rates_spec() + config = theta_to_world_config(spec.defaults, seed=0) + engine = EcosystemEngine(config) + # If we get here, the config format matches what the engine expects + + def test_engine_steps_without_crash(self): + """Engine can step for 50 ticks with a θ-generated config.""" + spec = make_eco_rates_spec() + config = theta_to_world_config(spec.defaults, seed=0) + engine = EcosystemEngine(config) + for _ in range(50): + engine.step() + + def test_random_theta_roundtrip(self): + """10 random θ vectors all produce valid engines that step cleanly.""" + spec = make_eco_rates_spec() + rng = np.random.default_rng(42) + for i in range(10): + theta = spec.sample_uniform(rng) + config = theta_to_world_config(theta, seed=i) + engine = EcosystemEngine(config) + for _ in range(20): + engine.step() + + def test_renderer_on_live_engine(self): + """Headless renderer produces valid image from a running engine.""" + from lila_search.renderer import render_headless + + spec = make_eco_rates_spec() + config = theta_to_world_config(spec.defaults, seed=0) + engine = EcosystemEngine(config) + for _ in range(100): + engine.step() + + img = render_headless(engine) + assert img.shape == (256, 256, 3) + assert img.dtype == np.uint8 + assert img.sum() > 0 + + def test_full_substrate_rollout(self): + """Full substrate protocol: init → step → render cycle.""" + from lila_search.substrate import LilaSubstrate + + substrate = LilaSubstrate() + spec = substrate.theta_spec() + theta = spec.defaults + + frames = substrate.rollout(theta, n_steps=100, n_frames=5, seed=0) + assert len(frames) == 5 + for frame in frames: + assert frame.shape == (256, 256, 3) + assert frame.dtype == np.uint8 diff --git a/search/tests/test_theta.py b/search/tests/test_theta.py new file mode 100644 index 0000000..77d7197 --- /dev/null +++ b/search/tests/test_theta.py @@ -0,0 +1,191 @@ +# Copyright 2025 BioSynthArt Studios LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for theta parameterization and world config generation. + +These tests validate that: +- θ spec has correct dimensionality and valid ranges +- Clipping keeps θ within bounds +- theta_to_world_config produces valid world configs for any θ in range +- Entity counts and water sources match θ values +""" + +import numpy as np +import pytest + +from lila_search.theta import ThetaSpec, ThetaDim, make_eco_rates_spec, theta_to_world_config + + +class TestThetaSpec: + def test_eco_rates_spec_dimensions(self): + spec = make_eco_rates_spec() + assert spec.ndim == 17 + + def test_bounds_shape(self): + spec = make_eco_rates_spec() + bounds = spec.bounds + assert bounds.shape == (17, 2) + assert (bounds[:, 0] < bounds[:, 1]).all(), "All lows must be < highs" + + def test_defaults_within_bounds(self): + spec = make_eco_rates_spec() + defaults = spec.defaults + bounds = spec.bounds + assert (defaults >= bounds[:, 0]).all() + assert (defaults <= bounds[:, 1]).all() + + def test_clip_enforces_bounds(self): + spec = make_eco_rates_spec() + # Way out of range + theta_low = np.full(spec.ndim, -100.0) + theta_high = np.full(spec.ndim, 10000.0) + + clipped_low = spec.clip(theta_low) + clipped_high = spec.clip(theta_high) + + bounds = spec.bounds + np.testing.assert_array_equal(clipped_low, bounds[:, 0]) + np.testing.assert_array_equal(clipped_high, bounds[:, 1]) + + def test_sample_uniform_within_bounds(self): + spec = make_eco_rates_spec() + rng = np.random.default_rng(42) + for _ in range(100): + theta = spec.sample_uniform(rng) + bounds = spec.bounds + assert (theta >= bounds[:, 0]).all() + assert (theta <= bounds[:, 1]).all() + + +class TestThetaToWorldConfig: + def test_default_config_structure(self): + spec = make_eco_rates_spec() + config = theta_to_world_config(spec.defaults) + + assert "version" in config + assert "environment" in config + assert "rates" in config + assert "entities" in config + assert "model" in config + + env = config["environment"] + assert "climate" in env + assert "soil" in env + assert "voxel_grid" in env + assert "water_sources" in env + assert env["voxel_grid"]["dimensions"] == [32, 32, 32] + + def test_entity_counts_match_theta(self): + spec = make_eco_rates_spec() + theta = spec.defaults.copy() + + names = spec.names + theta[names.index("deer_count")] = 3.0 + theta[names.index("butterfly_count")] = 5.0 + theta[names.index("oak_count")] = 2.0 + theta[names.index("grass_count")] = 7.0 + theta[names.index("wildflower_count")] = 4.0 + + config = theta_to_world_config(theta) + entities = config["entities"] + + deer = [e for e in entities if e["species"] == "deer"] + butterflies = [e for e in entities if e["species"] == "monarch"] + oaks = [e for e in entities if e["species"] == "meadow_oak"] + grass = [e for e in entities if e["species"] == "meadow_grass"] + flowers = [e for e in entities if e["species"] == "wildflower"] + + assert len(deer) == 3 + assert len(butterflies) == 5 + assert len(oaks) == 2 + assert len(grass) == 7 + assert len(flowers) == 4 + + def test_water_count_matches_theta(self): + spec = make_eco_rates_spec() + theta = spec.defaults.copy() + theta[spec.names.index("water_count")] = 3.0 + + config = theta_to_world_config(theta) + assert len(config["environment"]["water_sources"]) == 3 + + def test_rate_multipliers_match_theta(self): + spec = make_eco_rates_spec() + theta = spec.defaults.copy() + theta[spec.names.index("rate_hunger")] = 2.5 + + config = theta_to_world_config(theta) + assert config["rates"]["hunger"] == 2.5 + + def test_deterministic_with_same_seed(self): + spec = make_eco_rates_spec() + theta = spec.sample_uniform(np.random.default_rng(99)) + + config1 = theta_to_world_config(theta, seed=42) + config2 = theta_to_world_config(theta, seed=42) + + for e1, e2 in zip(config1["entities"], config2["entities"]): + assert e1["position"] == e2["position"] + + def test_different_seeds_produce_different_positions(self): + spec = make_eco_rates_spec() + theta = spec.defaults + + config1 = theta_to_world_config(theta, seed=0) + config2 = theta_to_world_config(theta, seed=1) + + positions1 = [e["position"] for e in config1["entities"]] + positions2 = [e["position"] for e in config2["entities"]] + assert positions1 != positions2 + + def test_entities_within_grid(self): + spec = make_eco_rates_spec() + rng = np.random.default_rng(0) + for _ in range(20): + theta = spec.sample_uniform(rng) + config = theta_to_world_config(theta, seed=rng.integers(1000)) + for e in config["entities"]: + pos = e["position"] + assert 0 <= pos[0] <= 32, f"Entity {e['id']} x={pos[0]} out of grid" + assert 0 <= pos[2] <= 32, f"Entity {e['id']} z={pos[2]} out of grid" + + def test_entities_have_position_arrays(self): + spec = make_eco_rates_spec() + config = theta_to_world_config(spec.defaults) + for e in config["entities"]: + assert isinstance(e["position"], list) + assert len(e["position"]) == 3 + + def test_entities_have_metadata(self): + spec = make_eco_rates_spec() + config = theta_to_world_config(spec.defaults) + for e in config["entities"]: + assert "metadata" in e + + def test_rain_config_zero_interval(self): + spec = make_eco_rates_spec() + theta = spec.defaults.copy() + theta[spec.names.index("rain_interval")] = 0.0 + + config = theta_to_world_config(theta) + assert "rain" not in config + + def test_rain_config_nonzero_interval(self): + spec = make_eco_rates_spec() + theta = spec.defaults.copy() + theta[spec.names.index("rain_interval")] = 500.0 + + config = theta_to_world_config(theta) + assert "rain" in config + assert config["rain"]["interval"] == 500 From 17a7f5ea91d17e19b4fb6534df771f4d268474c7 Mon Sep 17 00:00:00 2001 From: Joshua Natarajan Date: Wed, 13 May 2026 19:57:40 -0400 Subject: [PATCH 3/3] Working with results doc --- search/docs/WORKING_WITH_RESULTS.md | 374 ++++++++++++++++++++++++++++ 1 file changed, 374 insertions(+) create mode 100644 search/docs/WORKING_WITH_RESULTS.md diff --git a/search/docs/WORKING_WITH_RESULTS.md b/search/docs/WORKING_WITH_RESULTS.md new file mode 100644 index 0000000..0fcfece --- /dev/null +++ b/search/docs/WORKING_WITH_RESULTS.md @@ -0,0 +1,374 @@ + + +# Working with Illumination Results + +## Output Files + +### `thetas_final.npy` — The discovered parameter vectors + +Shape: `(64, 17)` — 64 surviving ecosystem configurations, each a 17-dimensional θ vector. + +Each row is a complete world recipe. The columns map to `dim_names` in metadata.json: + +``` + 0: rate_consumption 6: soil_nitrogen 12: butterfly_count + 1: rate_hunger 7: soil_moisture 13: oak_count + 2: rate_thirst 8: climate_temperature 14: grass_count + 3: rate_growth 9: water_count 15: wildflower_count + 4: rate_reproduction 10: water_radius 16: rain_interval + 5: rate_water_replenishment 11: deer_count +``` + +**Load and inspect:** + +```python +import numpy as np +from lila_search.theta import make_eco_rates_spec, theta_to_world_config + +thetas = np.load("results/illuminate_v1/thetas_final.npy") +spec = make_eco_rates_spec() + +# Look at simulation #0 +print(dict(zip(spec.names, thetas[0]))) + +# Which simulation has the most deer? +deer_idx = spec.names.index("deer_count") +densest = thetas[:, deer_idx].argmax() +print(f"Sim {densest}: {thetas[densest, deer_idx]:.0f} deer") + +# Regenerate and run any discovered world +config = theta_to_world_config(thetas[42], seed=42) +# Pass to EcosystemEngine(config) to replay it +``` + +### `embeddings_final.npy` — CLIP representations + +Shape: `(64, 512)` — each simulation's visual identity as a 512-dimensional +vector in CLIP space. L2-normalized, so cosine similarity = dot product. + +**What these encode:** CLIP was trained on internet-scale image-text pairs, so +these embeddings capture what looks visually *meaningful* to a human observer. +Two simulations that are "far apart" in this space look genuinely different — +different spatial patterns, densities, color distributions, entity arrangements. + +```python +embeddings = np.load("results/illuminate_v1/embeddings_final.npy") + +# Find the two most similar simulations +sim_matrix = embeddings @ embeddings.T +np.fill_diagonal(sim_matrix, -1) # ignore self-similarity +i, j = np.unravel_index(sim_matrix.argmax(), sim_matrix.shape) +print(f"Most similar pair: sim {i} and sim {j} (cosine sim: {sim_matrix[i,j]:.4f})") + +# Find the most unique simulation (highest mean distance to all others) +dist_matrix = 1 - sim_matrix +np.fill_diagonal(dist_matrix, 0) +most_unique = dist_matrix.mean(axis=1).argmax() +print(f"Most unique: sim {most_unique}") +``` + +### `metadata.json` — Run configuration and metrics + +Contains the full config (pop_size, generations, etc.), dimension names, +diversity history, and elapsed time. Use it to reproduce or compare runs. + +### `thumbnails/` — Rendered snapshots + +64 PNG images (`sim_0000.png` through `sim_0063.png`), one per surviving +configuration. These are the final-tick renders used in the atlas. + +### `checkpoints/` — Evolution snapshots + +Saved every 10 generations: +- `thetas_gen0010.npy`, `thetas_gen0020.npy`, ... — population at that generation +- `embeddings_gen0010.npy`, ... — corresponding embeddings + +**Track how a specific region of the atlas evolved:** + +```python +import numpy as np + +# Load early and late populations +early = np.load("results/illuminate_v1/checkpoints/thetas_gen0010.npy") +late = np.load("results/illuminate_v1/checkpoints/thetas_gen0100.npy") + +# Compare parameter distributions +spec_names = ["rate_consumption", "rate_hunger", "rate_thirst", ...] +for i, name in enumerate(spec_names): + print(f"{name:30s} gen10: {early[:,i].mean():.2f} ± {early[:,i].std():.2f}" + f" gen100: {late[:,i].mean():.2f} ± {late[:,i].std():.2f}") +``` + +--- + +## Exploring the Embedding Space + +### Which θ dimensions drive visual diversity? + +This tells you what the search actually learned to vary: + +```python +import numpy as np +from lila_search.theta import make_eco_rates_spec + +thetas = np.load("results/illuminate_v1/thetas_final.npy") +embeddings = np.load("results/illuminate_v1/embeddings_final.npy") +spec = make_eco_rates_spec() + +# Correlation between each θ dimension and each embedding dimension +# High correlation = that parameter strongly influences the visual output +correlations = np.zeros(spec.ndim) +for d in range(spec.ndim): + # Correlation between θ_d and the first 3 PCA components of embeddings + from sklearn.decomposition import PCA + pca = PCA(n_components=3) + emb_pca = pca.fit_transform(embeddings) + for pc in range(3): + correlations[d] = max(correlations[d], + abs(np.corrcoef(thetas[:, d], emb_pca[:, pc])[0, 1])) + +# Rank dimensions by influence +ranked = np.argsort(correlations)[::-1] +for idx in ranked: + print(f"{spec.names[idx]:30s} max |corr|: {correlations[idx]:.3f}") +``` + +### Cluster the atlas into ecological regimes + +```python +from sklearn.cluster import KMeans + +embeddings = np.load("results/illuminate_v1/embeddings_final.npy") +thetas = np.load("results/illuminate_v1/thetas_final.npy") +spec = make_eco_rates_spec() + +# Find 4-6 natural clusters +km = KMeans(n_clusters=5, random_state=42) +labels = km.fit_predict(embeddings) + +# What characterizes each cluster? +for c in range(5): + mask = labels == c + print(f"\n--- Cluster {c} ({mask.sum()} sims) ---") + for d in range(spec.ndim): + mean_val = thetas[mask, d].mean() + global_mean = thetas[:, d].mean() + if abs(mean_val - global_mean) > 0.3 * thetas[:, d].std(): + print(f" {spec.names[d]:30s} cluster: {mean_val:.2f} overall: {global_mean:.2f}") +``` + +--- + +## ASAL Search Modes Using These Embeddings + +The illumination run discovered *what's out there*. The embeddings enable +two more search modes that answer different questions: + +### 1. Target Search — "Find me a simulation that looks like X" + +Uses CLIP's text encoder to embed a natural language description, then +searches for a θ that produces a simulation whose embedding is close to +the text embedding. This is ASAL mode 1. + +```python +import numpy as np +from lila_search.evaluator import CLIPEvaluator +from lila_search.substrate import LilaSubstrate +from lila_search.theta import make_eco_rates_spec + +evaluator = CLIPEvaluator() +spec = make_eco_rates_spec() + +# Embed a target description +target_emb = evaluator.embed_text(["a barren landscape with dried ponds"])[0] + +# Option A: search existing population (instant) +embeddings = np.load("results/illuminate_v1/embeddings_final.npy") +thetas = np.load("results/illuminate_v1/thetas_final.npy") + +similarities = embeddings @ target_emb +best_idx = similarities.argmax() +print(f"Best match: sim {best_idx} (similarity: {similarities[best_idx]:.4f})") +print(f" θ = {dict(zip(spec.names, thetas[best_idx]))}") + +# Option B: optimize with CMA-ES to find new θ (minutes) +import cma + +substrate = LilaSubstrate() + +def objective(theta): + theta = np.array(theta) + theta = spec.clip(theta) + frames = substrate.rollout(theta, n_steps=2000, n_frames=20, seed=0) + emb = evaluator.embed_rollout(frames) + return -float(emb @ target_emb) # maximize similarity = minimize negative + +x0 = thetas[best_idx] # warm-start from best existing match +sigma0 = 0.3 +opts = {"maxiter": 50, "popsize": 16, "seed": 42} +es = cma.CMAEvolutionStrategy(x0, sigma0, opts) +es.optimize(objective) + +best_theta = spec.clip(np.array(es.result.xbest)) +print(f"\nOptimized θ: {dict(zip(spec.names, best_theta))}") +``` + +**Prompt ideas to try:** +- "a dense forest with many animals" +- "a barren landscape with dried ponds" +- "wildflowers and butterflies" +- "an ecosystem in collapse" +- "a thriving balanced ecosystem" + +### 2. Open-Ended Search — "Find simulations that keep changing" + +Instead of diversity across the *population*, this optimizes for diversity +across *time* within a single simulation. Finds θ values that produce +ecosystems which don't reach equilibrium — they keep generating novel +visual states. This is ASAL mode 2. + +```python +def temporal_novelty(theta, substrate, evaluator, n_steps=2000, n_frames=40): + """Score how much visual change happens over a rollout.""" + theta = np.array(theta) + frames = substrate.rollout(theta, n_steps=n_steps, n_frames=n_frames, seed=0) + frame_embeddings = evaluator.embed_frames(frames) + + # Measure mean distance between consecutive frame embeddings + diffs = np.diff(frame_embeddings, axis=0) + novelty = np.linalg.norm(diffs, axis=1).mean() + return novelty + +# Find which existing simulations are most temporally dynamic +thetas = np.load("results/illuminate_v1/thetas_final.npy") +substrate = LilaSubstrate() +evaluator = CLIPEvaluator() + +novelties = [] +for i in range(len(thetas)): + n = temporal_novelty(thetas[i], substrate, evaluator) + novelties.append(n) + print(f"Sim {i:3d}: temporal novelty = {n:.4f}") + +most_dynamic = np.argmax(novelties) +print(f"\nMost dynamic: sim {most_dynamic}") +``` + +### 3. Replay Any Discovered World + +```python +from ecosim.engine import EcosystemEngine +from lila_search.theta import make_eco_rates_spec, theta_to_world_config +from lila_search.renderer import render_headless +from PIL import Image +import numpy as np + +thetas = np.load("results/illuminate_v1/thetas_final.npy") + +# Pick a simulation to replay +sim_idx = 42 +config = theta_to_world_config(thetas[sim_idx], seed=sim_idx) +engine = EcosystemEngine(config) + +# Run and capture frames +frames = [] +for tick in range(2000): + engine.step() + if tick % 50 == 0: + frames.append(render_headless(engine)) + +# Save as individual images +for i, frame in enumerate(frames): + Image.fromarray(frame).save(f"replay/frame_{i:04d}.png") + +# Or make a GIF +images = [Image.fromarray(f) for f in frames] +images[0].save("replay/sim42.gif", save_all=True, + append_images=images[1:], duration=200, loop=0) +``` + +--- + +## Replay in Browser + +Every simulation in the atlas is fully deterministic — θ + seed +reproduces it tick-for-tick. The search renderer (PIL, 256×256) is +just for CLIP. The browser visualizer gives you the full experience: +entity labels, state transitions, event log, soil heatmap, rain button. + +### Export and run + +```bash +# From search/ — export atlas entry #42 as a world config +uv run python -m scripts.export_world results/illuminate_v1 42 + +# Output: +# Atlas entry #42 +# Seed: 42 +# Parameters: +# rate_consumption = 3.214 +# rate_hunger = 1.872 +# ... +# World config written to replay.json + +# From server/ — run the worker with that config +cd ../server +WORLD_FILE=../search/replay.json uv run python -m ecosim.worker + +# Open http://localhost:8001 — you're watching that atlas entry live +``` + +### Export with a custom seed or output path + +```bash +# Different seed (changes entity placement, same rates) +uv run python -m scripts.export_world results/illuminate_v1 42 --seed 99 + +# Custom output path +uv run python -m scripts.export_world results/illuminate_v1 42 -o worlds/drought_world.json +``` + +### Browse the atlas, then replay + +The workflow: look at the atlas image, pick a tile that looks +interesting, find its index (row × 8 + col for an 8×8 atlas, +or check `thumbnails/sim_NNNN.png`), export it, replay in browser. + +```bash +# Top-right tile of an 8×8 atlas = index 7 +uv run python -m scripts.export_world results/illuminate_v1 7 + +# Bottom-left = index 56 +uv run python -m scripts.export_world results/illuminate_v1 56 +``` + +### Future: Godot holodeck + +The Godot client connects to the same WebSocket as the browser. +When it ships, replay works identically — same worker, same config, +same tick packets. The atlas becomes a map you browse in 2D, then +step into in 3D. + +--- + +## What to Try Next + +**Immediate (uses existing results, no new search run):** +1. Run the dimension correlation analysis — find out which θ dims CLIP cares about +2. Cluster the atlas into ecological regimes — name them +3. Try target search against a few text prompts — see what CLIP thinks matches +4. Measure temporal novelty across the 64 sims — find the most dynamic worlds + +**Next search run:** +- Target search with CMA-ES for a specific prompt ("ecosystem in collapse") +- `--workers 8` to use all your cores +- Try `--mutation-scale 0.15` for more exploration (current 0.1 might be conservative) + +**When traits land:** +- Expand θ to encode body masses, diets, thermal tolerances +- Re-run illumination — the atlas goes from "interesting tunings" to "interesting ecologies"