11# statline/core/adapters/loader.py
22from __future__ import annotations
33
4+ import math
5+ import os
6+ import warnings
47from pathlib import Path
58from typing import Any , Dict , List , Mapping , Optional , Sequence , Tuple , cast
69
1013
1114_BASE = Path (__file__ ).parent / "defs"
1215
16+ # Configurable strictness:
17+ # STATLINE_LOADER_STRICT = "1" -> raise on unknown keys / unknown buckets
18+ # STATLINE_LOADER_STRICT = "0" (default) -> warn-and-continue with 0.0 / None
19+ _STRICT = os .environ .get ("STATLINE_LOADER_STRICT" , "0" ) not in ("0" , "" , "false" , "False" )
20+
21+
22+ def _warn (msg : str ) -> None :
23+ warnings .warn (f"[statline.loader] { msg } " , RuntimeWarning , stacklevel = 2 )
24+
25+
26+ def _finite_float (x : Any , default : float = 0.0 ) -> float :
27+ """Coerce to finite float; warn and return default on failure/NaN/inf."""
28+ try :
29+ v = float (x )
30+ except Exception :
31+ _warn (f"Non-numeric value '{ x } ' coerced to { default } " )
32+ return default
33+ if not math .isfinite (v ):
34+ _warn (f"Non-finite value '{ x } ' coerced to { default } " )
35+ return default
36+ return v
37+
38+
1339# Allowed top-level keys in an adapter YAML (helps catch typos).
1440_ALLOWED_TOP_KEYS : set [str ] = {
1541 "key" ,
@@ -29,7 +55,9 @@ def _read_yaml_for(name: str) -> Dict[str, Any]:
2955 if not p .exists ():
3056 p = _BASE / f"{ name } .yml"
3157 if not p .exists ():
32- raise FileNotFoundError (f"Adapter spec not found: { name } (expected { name } .yaml or { name } .yml)" )
58+ raise FileNotFoundError (
59+ f"Adapter spec not found: { name } (expected { name } .yaml or { name } .yml)"
60+ )
3361
3462 try :
3563 loaded : Any = yaml .safe_load (p .read_text (encoding = "utf-8" ))
@@ -40,17 +68,28 @@ def _read_yaml_for(name: str) -> Dict[str, Any]:
4068 if loaded is None :
4169 data = {}
4270 elif isinstance (loaded , dict ):
43- # Force Dict[str, Any] shape; cast so keys/values aren’t Unknown to Pylance .
71+ # Force Dict[str, Any] shape; cast so keys/values aren’t Unknown to type checkers .
4472 loaded_map : Mapping [Any , Any ] = cast (Mapping [Any , Any ], loaded )
4573 data = {str (k ): v for k , v in loaded_map .items ()}
4674 else :
47- raise TypeError (f"Top-level YAML for '{ p .name } ' must be a mapping (dict), got { type (loaded ).__name__ } " )
75+ raise TypeError (
76+ f"Top-level YAML for '{ p .name } ' must be a mapping (dict), got { type (loaded ).__name__ } "
77+ )
4878
49- # Unknown top-level keys -> explicit error to avoid silent typos.
79+ # Unknown top-level keys -> warn or raise (configurable) to avoid silent typos.
5080 keys : set [str ] = set (data .keys ())
5181 unknown : set [str ] = keys .difference (_ALLOWED_TOP_KEYS )
5282 if unknown :
53- raise KeyError (f"Unknown top-level key(s) in adapter '{ name } ': { ', ' .join (sorted (unknown ))} " )
83+ msg = (
84+ f"Unknown top-level key(s) in adapter '{ name } ' ({ p } ): "
85+ f"{ ', ' .join (sorted (unknown ))} "
86+ )
87+ if _STRICT :
88+ raise KeyError (msg )
89+ _warn (msg + " — ignoring." )
90+ for k in list (unknown ):
91+ data .pop (k , None )
92+
5493 return data
5594
5695
@@ -62,38 +101,48 @@ def _uniform_weights(buckets: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str,
62101
63102
64103def _as_clamp (v : Any ) -> Optional [Tuple [float , float ]]:
65- """Normalize clamp configs to (lo, hi) or None. Swaps if lo > hi."""
104+ """Normalize clamp configs to (lo, hi) or None. Swaps if lo > hi. Warns on bad forms. """
66105 if v is None or v is False :
67106 return None
68107
108+ def _pair (lo : Any , hi : Any ) -> Optional [Tuple [float , float ]]:
109+ try :
110+ a = float (lo )
111+ b = float (hi )
112+ except Exception :
113+ _warn (f"Clamp values '{ lo } ','{ hi } ' non-numeric — ignoring clamp" )
114+ return None
115+ if not (math .isfinite (a ) and math .isfinite (b )):
116+ _warn (f"Clamp values '{ lo } ','{ hi } ' non-finite — ignoring clamp" )
117+ return None
118+ if a > b :
119+ a , b = b , a
120+ if a == b :
121+ _warn (f"Clamp with lo==hi ({ a } ) — ignoring clamp" )
122+ return None
123+ return (a , b )
124+
125+ # Dict form: {"lo": X, "hi": Y}
126+ if isinstance (v , dict ) and ("lo" in v and "hi" in v ):
127+ return _pair (v ["lo" ], v ["hi" ])
128+
69129 # Sequence form: [lo, hi] or (lo, hi)
70130 if isinstance (v , (list , tuple )):
71131 seq : Sequence [Any ] = cast (Sequence [Any ], v )
72132 if len (seq ) >= 2 :
73- try :
74- lo = float (seq [0 ])
75- hi = float (seq [1 ])
76- except (TypeError , ValueError ):
77- return None
78- if lo > hi :
79- lo , hi = hi , lo
80- return (lo , hi )
133+ return _pair (seq [0 ], seq [1 ])
134+ _warn (f"Clamp sequence too short: { v } — ignoring clamp" )
81135 return None
82136
83137 # String forms like "0,1" / "0..1" / "0 1"
84138 if isinstance (v , str ):
85139 parts : List [str ] = v .replace ("," , " " ).replace (".." , " " ).split ()
86140 if len (parts ) >= 2 :
87- try :
88- lo = float (parts [0 ])
89- hi = float (parts [1 ])
90- except ValueError :
91- return None
92- if lo > hi :
93- lo , hi = hi , lo
94- return (lo , hi )
141+ return _pair (parts [0 ], parts [1 ])
142+ _warn (f"Clamp string malformed: '{ v } ' — ignoring clamp" )
95143 return None
96144
145+ _warn (f"Unsupported clamp type { type (v ).__name__ } — ignoring clamp" )
97146 return None
98147
99148
@@ -138,6 +187,7 @@ def load_spec(name: str) -> AdapterSpec:
138187 }
139188 if not buckets :
140189 raise ValueError (f"Adapter '{ name } ': 'buckets' cannot be empty" )
190+ bucket_names = set (buckets .keys ())
141191
142192 # Weights (optional; default to uniform across buckets for 'pri')
143193 weights_raw : Any = data .get ("weights" )
@@ -152,11 +202,24 @@ def load_spec(name: str) -> AdapterSpec:
152202 for profile_any , bw_any in weights_map .items ():
153203 profile = str (profile_any )
154204 if not isinstance (bw_any , dict ):
155- raise TypeError (f"Adapter '{ name } ': weights profile '{ profile } ' must be a mapping" )
156- inner : Dict [str , float ] = {}
205+ raise TypeError (
206+ f"Adapter '{ name } ': weights profile '{ profile } ' must be a mapping"
207+ )
208+ # Initialize all known buckets to 0.0 to make omissions explicit.
209+ inner : Dict [str , float ] = {bk : 0.0 for bk in bucket_names }
157210 bw_map : Mapping [Any , Any ] = cast (Mapping [Any , Any ], bw_any )
158211 for b_any , v_any in bw_map .items ():
159- inner [str (b_any )] = float (v_any )
212+ bk = str (b_any )
213+ if bk not in bucket_names :
214+ msg = (
215+ f"Adapter '{ name } ': weights profile '{ profile } ' references "
216+ f"unknown bucket '{ bk } '"
217+ )
218+ if _STRICT :
219+ raise KeyError (msg )
220+ _warn (msg + " — treating as 0.0 and ignoring." )
221+ continue
222+ inner [bk ] = _finite_float (v_any , default = 0.0 )
160223 weights_out [profile ] = inner
161224
162225 # Penalties (optional; adapter-defined semantics). Keep as {profile: {key: float}}.
@@ -168,11 +231,23 @@ def load_spec(name: str) -> AdapterSpec:
168231 for profile_any , pw_any in penalties_map .items ():
169232 profile = str (profile_any )
170233 if not isinstance (pw_any , dict ):
171- raise TypeError (f"Adapter '{ name } ': penalties profile '{ profile } ' must be a mapping" )
234+ raise TypeError (
235+ f"Adapter '{ name } ': penalties profile '{ profile } ' must be a mapping"
236+ )
172237 inner_p : Dict [str , float ] = {}
173238 pw_map : Mapping [Any , Any ] = cast (Mapping [Any , Any ], pw_any )
174239 for k_any , v_any in pw_map .items ():
175- inner_p [str (k_any )] = float (v_any )
240+ bk = str (k_any )
241+ if bk not in bucket_names :
242+ msg = (
243+ f"Adapter '{ name } ': penalties profile '{ profile } ' references "
244+ f"unknown bucket '{ bk } '"
245+ )
246+ if _STRICT :
247+ raise KeyError (msg )
248+ _warn (msg + " — dropping penalty." )
249+ continue
250+ inner_p [bk ] = _finite_float (v_any , default = 0.0 )
176251 penalties [profile ] = inner_p
177252
178253 # Metrics
@@ -189,16 +264,25 @@ def load_spec(name: str) -> AdapterSpec:
189264 raise KeyError (f"Adapter '{ name } ': every metric must have a 'key'" )
190265 mkey = str (m ["key" ])
191266 if mkey in seen_keys :
192- raise ValueError (f"Adapter '{ name } ': duplicate metric key '{ mkey } '" )
267+ _warn (
268+ f"Adapter '{ name } ': duplicate metric key '{ mkey } ' — keeping first, skipping duplicate."
269+ )
270+ continue
193271 seen_keys .add (mkey )
194272
195273 bucket_val : Any = m .get ("bucket" )
196274 bucket_name : Optional [str ] = None
197275 if bucket_val is not None :
198276 bname = str (bucket_val )
199- if bname not in buckets :
200- raise KeyError (f"Adapter '{ name } ': metric '{ mkey } ' references unknown bucket '{ bname } '" )
201- bucket_name = bname
277+ if bname not in bucket_names :
278+ msg = (
279+ f"Adapter '{ name } ': metric '{ mkey } ' references unknown bucket '{ bname } '"
280+ )
281+ if _STRICT :
282+ raise KeyError (msg )
283+ _warn (msg + " — treating as unscored telemetry (no bucket)." )
284+ else :
285+ bucket_name = bname
202286
203287 metrics .append (
204288 MetricSpec (
@@ -225,15 +309,21 @@ def load_spec(name: str) -> AdapterSpec:
225309 raise KeyError (f"Adapter '{ name } ': efficiency item missing '{ req } '" )
226310 ekey = str (e ["key" ])
227311 ebucket = str (e ["bucket" ])
228- if ebucket not in buckets :
229- raise KeyError (f"Adapter '{ name } ': efficiency '{ ekey } ' references unknown bucket '{ ebucket } '" )
312+ if ebucket not in bucket_names :
313+ msg = (
314+ f"Adapter '{ name } ': efficiency '{ ekey } ' references unknown bucket '{ ebucket } '"
315+ )
316+ if _STRICT :
317+ raise KeyError (msg )
318+ _warn (msg + " — skipping efficiency item." )
319+ continue
230320 eff_list .append (
231321 EffSpec (
232322 key = ekey ,
233323 make = str (e ["make" ]),
234324 attempt = str (e ["attempt" ]),
235325 bucket = ebucket ,
236- min_den = float (e .get ("min_den" , 1.0 )),
326+ min_den = _finite_float (e .get ("min_den" , 1.0 ), default = 1.0 ),
237327 clamp = _as_clamp (e .get ("clamp" )),
238328 invert = bool (e .get ("invert" , False )),
239329 transform = cast (Optional [Mapping [str , Any ]], e .get ("transform" )),
@@ -254,4 +344,4 @@ def load_spec(name: str) -> AdapterSpec:
254344 )
255345
256346
257- __all__ = ["load_spec" ]
347+ __all__ = ["load_spec" ]
0 commit comments