nesl · Joseph-Q-Zales · May 29, 2026 · May 9, 2026 · May 11, 2026 · May 12, 2026
diff --git a/Makefile b/Makefile
@@ -4,6 +4,8 @@ PYTHON ?= python
 ENV ?= tinyodomex
 OXIOD_ZIP ?= OxIOD.zip
 URBANSOUND8K_ARGS ?=
+PYTHON_LIB_DIR := $(shell $(PYTHON) -c 'import sys; print(sys.prefix + "/lib")')
+TEST_ENV := LD_LIBRARY_PATH="$(PYTHON_LIB_DIR):$(LD_LIBRARY_PATH)"
 
 help:
 	@echo "Targets:"
@@ -25,13 +27,13 @@ install:
 	pip install -e . --no-deps
 
 test:
-	pytest test/
+	$(TEST_ENV) $(PYTHON) -m pytest test/
 
 integration-test:
-	RUN_INTEGRATION_TESTS=1 pytest test/integration/
+	$(TEST_ENV) RUN_INTEGRATION_TESTS=1 $(PYTHON) -m pytest test/integration/
 
 test-all:
-	RUN_INTEGRATION_TESTS=1 pytest test/
+	$(TEST_ENV) RUN_INTEGRATION_TESTS=1 $(PYTHON) -m pytest test/
 
 start-gpu:
 	$(PYTHON) src/nas_model_client.py $(ARGS)

diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ For the source-level architecture and extension points, see
 
 1. **Training only**
    Use this when you want to run NAS/training without talking to hardware.
-   Start from `src/config/nas_config.yaml`, set `device.hil: false`, and read
+   Start from `src/config/nas_config_stm32.yaml`, set `device.hil: false`, and read
    [src/config/README.md](src/config/README.md) plus [src/README.md](src/README.md).
    For the UrbanSound8K audio DS-CNN path, start from
    [src/config/nas_config_audio_stm32.yaml](src/config/nas_config_audio_stm32.yaml)
@@ -44,7 +44,7 @@ For the source-level architecture and extension points, see
 
 3. **STM32 HIL**
    Use this for the current STM32 N6 backend.
-   Start from [src/config/nas_config.yaml](src/config/nas_config.yaml), then
+   Start from [src/config/nas_config_stm32.yaml](src/config/nas_config_stm32.yaml), then
    use [src/config/nas_config_audio_stm32.yaml](src/config/nas_config_audio_stm32.yaml)
    for the audio DS-CNN HIL path. Then
    read [src/tinyodom/microcontrollers/README.md](src/tinyodom/microcontrollers/README.md)
@@ -183,10 +183,10 @@ refreshes the repo-local STM32 vendor subsets.
 
 The shipped starting points are:
 
-- [src/config/nas_config.yaml](src/config/nas_config.yaml)
-  Default STM32-oriented config for the current `STM32_NUCLEO_N657X0_Q`
-  backend. This is the main starting point for STM32 runs and the general
-  example config for the repo.
+- [src/config/nas_config_stm32.yaml](src/config/nas_config_stm32.yaml)
+  STM32-oriented config for the current `STM32_NUCLEO_N657X0_Q`
+  backend. This is the main starting point for STM32 runs and the most
+  complete commented example config in the repo.
 - [src/config/nas_config_ble.yaml](src/config/nas_config_ble.yaml)
   BLE-focused starting point for `ARDUINO_NANO_33_BLE_SENSE`.
 - [src/config/nas_config_portenta.yaml](src/config/nas_config_portenta.yaml)

diff --git a/analysis_scripts/arena_latency_curve/run_arena_latency_curve.py b/analysis_scripts/arena_latency_curve/run_arena_latency_curve.py
@@ -276,7 +276,7 @@ def _build_arg_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument(
         "--config",
-        default=str(SRC_DIR / "config" / "nas_config.yaml"),
+        default=str(SRC_DIR / "config" / "nas_config_stm32.yaml"),
         help="Path to TinyODOM config YAML.",
     )
     parser.add_argument(

diff --git a/analysis_scripts/arena_latency_curve/run_arena_latency_curve_failure_probe.py b/analysis_scripts/arena_latency_curve/run_arena_latency_curve_failure_probe.py
@@ -280,7 +280,7 @@ def _build_arg_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument(
         "--config",
-        default=str(SRC_DIR / "config" / "nas_config.yaml"),
+        default=str(SRC_DIR / "config" / "nas_config_stm32.yaml"),
         help="Path to TinyODOM config YAML.",
     )
     parser.add_argument(

diff --git a/analysis_scripts/cadenced_portenta_h7/README.md b/analysis_scripts/cadenced_portenta_h7/README.md
@@ -42,7 +42,7 @@ Common overrides:
 
 ```bash
 python analysis_scripts/cadenced_portenta_h7/run_cadenced_portenta_h7.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --repeats 3 \
   --cores cm7 cm4 \
   --latency-budget-ms 200 \

diff --git a/analysis_scripts/cadenced_portenta_h7/run_cadenced_portenta_h7.py b/analysis_scripts/cadenced_portenta_h7/run_cadenced_portenta_h7.py
@@ -475,7 +475,7 @@ def _build_arg_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument(
         "--config",
-        default=str(REPO_ROOT / "src" / "config" / "nas_config.yaml"),
+        default=str(REPO_ROOT / "src" / "config" / "nas_config_stm32.yaml"),
         help="Path to TinyODOM config YAML.",
     )
     parser.add_argument(

diff --git a/analysis_scripts/clock_tick_latency/run_clock_tick_latency.py b/analysis_scripts/clock_tick_latency/run_clock_tick_latency.py
@@ -206,7 +206,7 @@ def _build_arg_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument(
         "--config",
-        default=str(SRC_DIR / "config" / "nas_config.yaml"),
+        default=str(SRC_DIR / "config" / "nas_config_stm32.yaml"),
         help="Path to TinyODOM config YAML.",
     )
     parser.add_argument(

diff --git a/analysis_scripts/hil_noise_analysis/README.md b/analysis_scripts/hil_noise_analysis/README.md
@@ -73,7 +73,7 @@ Dataset-specific generated headers live alongside them:
 
 ## Config selection
 
-Set the following in `src/config/nas_config.yaml` to choose a variant when `energy_aware: true`:
+Set the following in `src/config/nas_config_stm32.yaml` to choose a variant when `energy_aware: true`:
 
 - `input_mode: "uniform"` uses `sketches/tinyodom_inference_energy.ino`
 - `input_mode: "oxiod_representative"` uses `sketches/analysis_sketches/tinyodom_inference_representative.ino` with `oxiod_input_data.h`
@@ -97,7 +97,7 @@ python analysis_scripts/hil_noise_analysis/oxiod_input_profile.py --split train
 
 # 2) On the GPU host, train and package the fixed 50-epoch artifact
 python analysis_scripts/hil_noise_analysis/train_noise_scan_model.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --epochs 50 \
   --out-dir analysis_scripts/hil_noise_analysis/artifacts \
   --artifact-prefix noise_scan_50ep
@@ -126,7 +126,7 @@ just a trained-vs-untrained comparison:
 ```bash
 # 1) Train and export checkpoint stages on the GPU host
 python analysis_scripts/hil_noise_analysis/epoch_sweep/train_epoch_sweep.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --out-dir analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts \
   --artifact-prefix noise_scan_epoch_sweep
 
@@ -135,7 +135,7 @@ python analysis_scripts/hil_noise_analysis/epoch_sweep/audit_fresh_untrained_tfl
 
 # 3) Run HIL metrics across staged checkpoints
 python analysis_scripts/hil_noise_analysis/epoch_sweep/hil_epoch_sweep_scan.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --training-csv analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts/epoch_sweep_training_stats.csv \
   --runs 1 \
   --input-modes uniform

diff --git a/analysis_scripts/hil_noise_analysis/epoch_sweep/README.md b/analysis_scripts/hil_noise_analysis/epoch_sweep/README.md
@@ -32,7 +32,7 @@ This folder contains a two-step experiment flow:
 
 ```bash
 python analysis_scripts/hil_noise_analysis/epoch_sweep/train_epoch_sweep.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --out-dir analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts \
   --artifact-prefix noise_scan_epoch_sweep \
   --max-epochs 500 \
@@ -46,7 +46,7 @@ python analysis_scripts/hil_noise_analysis/epoch_sweep/train_epoch_sweep.py \
 
 ```bash
 python analysis_scripts/hil_noise_analysis/epoch_sweep/train_epoch_sweep.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --out-dir analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts \
   --plots-dir analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts/plots \
   --csv-path analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts/epoch_sweep_training_stats.csv \
@@ -76,7 +76,7 @@ scp -r analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts <hil_host>:<rep
 
 ```bash
 python analysis_scripts/hil_noise_analysis/epoch_sweep/hil_epoch_sweep_scan.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --training-csv analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts/epoch_sweep_training_stats.csv \
   --runs 1 \
   --input-modes uniform \
@@ -87,7 +87,7 @@ Verbose logging:
 
 ```bash
 python analysis_scripts/hil_noise_analysis/epoch_sweep/hil_epoch_sweep_scan.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --training-csv analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts/epoch_sweep_training_stats.csv \
   --runs 1 \
   --input-modes uniform \
@@ -98,7 +98,7 @@ Explicit output paths + checkpoint remap (useful when CSV paths came from anothe
 
 ```bash
 python analysis_scripts/hil_noise_analysis/epoch_sweep/hil_epoch_sweep_scan.py \
-  --config src/config/nas_config.yaml \
+  --config src/config/nas_config_stm32.yaml \
   --training-csv analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts/epoch_sweep_training_stats.csv \
   --checkpoint-root analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts \
   --csv-path analysis_scripts/hil_noise_analysis/epoch_sweep/artifacts/epoch_sweep_hil_metrics.csv \

diff --git a/analysis_scripts/portenta_baseline_load/run_portenta_baseline_load.py b/analysis_scripts/portenta_baseline_load/run_portenta_baseline_load.py
@@ -702,7 +702,7 @@ def _build_parser() -> argparse.ArgumentParser:
             "using harness timing/energy telemetry."
         )
     )
-    parser.add_argument("--config", default=str(SRC_DIR / "config" / "nas_config.yaml"), help="TinyODOM config path (optional defaults source).")
+    parser.add_argument("--config", default=str(SRC_DIR / "config" / "nas_config_stm32.yaml"), help="TinyODOM config path (optional defaults source).")
     parser.add_argument("--dut-port", default=None, help="DUT serial port (default from config or /dev/ttyACM0).")
     parser.add_argument("--harness-port", default=None, help="Harness serial port (default from config or /dev/ttyACM1).")
     parser.add_argument("--harness-fqbn", default=None, help="Harness board FQBN (default from config or arduino:mbed_nano:nano33ble).")

diff --git a/analysis_scripts/static_memory_proxy/.gitignore b/analysis_scripts/static_memory_proxy/.gitignore
@@ -0,0 +1,3 @@
+# Generated outputs from compute_static_memory_proxy.py
+*.csv
+*.png
diff --git a/analysis_scripts/static_memory_proxy/README.md b/analysis_scripts/static_memory_proxy/README.md
@@ -0,0 +1,83 @@
+# Static Memory Proxy
+
+Offline prototype for adding a second cheap proxy line beside FLOPs for OdomTCN analysis. This does not modify NAS training, configs, export, flashing, or HIL. It reads logged trial CSVs, rebuilds each OdomTCN candidate from logged hyperparameters, and writes an augmented CSV with static memory proxy columns.
+
+## Proxy Definition
+
+The script estimates static memory traffic as:
+
+```text
+memory_traffic_bytes =
+  sum_layers(input_activation_bytes + layer_weight_bytes + output_activation_bytes)
+```
+
+The prototype assumes batch size 1 and deployment dtype bytes:
+
+```text
+float    -> 4 bytes
+int8_ptq -> 1 byte
+```
+
+If a row does not expose quantization mode, the script defaults to `int8_ptq` and records that in `proxy_quantization_mode_source`.
+
+## Outputs
+
+The augmented CSV keeps all original columns and adds:
+
+```text
+proxy_weight_bytes
+proxy_activation_bytes
+proxy_memory_traffic_bytes
+proxy_dtype_bytes
+proxy_warning_count
+proxy_quantization_mode
+proxy_quantization_mode_source
+```
+
+Use `--include-layer-details` to also write `proxy_layer_details_json`.
+
+## Run
+
+From the repository root:
+
+```bash
+python analysis_scripts/static_memory_proxy/compute_static_memory_proxy.py \
+  --config src/config/nas_config_memory_proxy.yaml \
+  --trials-csv path/to/trials.csv \
+  --output-csv analysis_scripts/static_memory_proxy/stm32_trials_with_memory_proxy.csv \
+  --plot \
+  --plot-dir analysis_scripts/static_memory_proxy
+```
+
+This writes:
+
+```text
+analysis_scripts/static_memory_proxy/stm32_trials_with_memory_proxy.csv
+analysis_scripts/static_memory_proxy/stm32_trials_with_memory_proxy_flops_vs_memory_traffic.png
+analysis_scripts/static_memory_proxy/stm32_trials_with_memory_proxy_rmse_total_vs_memory_traffic.png
+analysis_scripts/static_memory_proxy/stm32_trials_with_memory_proxy_energy_mj_per_inference_vs_memory_traffic.png
+```
+
+For multiple input CSVs, use `--output-dir` instead of `--output-csv`.
+
+## Plots
+
+With `--plot`, the script writes scatter plots for available columns:
+
+```text
+flops vs proxy_memory_traffic_bytes
+rmse_total vs proxy_memory_traffic_bytes
+energy_mj_per_inference vs proxy_memory_traffic_bytes
+```
+
+It also prints Spearman and Kendall rank correlations for each plotted pair. These are rank correlations, so they measure monotonic ordering rather than exact linear fit.
+
+## Warning Count
+
+`proxy_warning_count` is the number of layer-level estimates where the script could not read exact symbolic activation tensors directly from Keras and used an inference path. OdomTCN uses the custom `TCN` layer; its residual blocks and child layers are visible, and weights are counted directly, but nested child input/output tensors are not exposed cleanly by Keras after build. The script infers those internal activation shapes from timestep count and channel/filter counts and marks them as warnings.
+
+A nonzero warning count does not mean the row failed. It means part of the estimate is architecture-aware static inference rather than a direct Keras tensor-shape read.
+
+## Limitations
+
+This is not predicted latency and not measured energy. It ignores cache behavior, DMA, tiling, operator fusion, im2col or temporary buffers, allocator overhead, flash/SRAM placement, alignment, backend-specific rereads, and kernel implementation details. Use it as a ranking proxy to compare against FLOPs and measured energy before deciding whether to wire it into NAS.