33from __future__ import annotations
44
55from collections .abc import Sequence
6- from typing import Any , Callable
6+ from typing import Callable
77from typing_extensions import Protocol
88
9- from pydantic import BaseModel , Field
9+ from pydantic import BaseModel , Field , field_validator
1010
1111from eval_protocol .models import EvaluationRow
12- from eval_protocol .pytest .types import EvaluationTestMode
13- from eval_protocol .dataset_logger .dataset_logger import DatasetLogger
1412
1513
1614class DataLoaderContext (BaseModel ):
17- """Context provided to loader variants when materializing data."""
15+ """Context provided to loader variants when materializing data. This is mainly used internally by eval-protocol. """
1816
19- max_rows : int | None = Field (default = None , ge = 1 , description = "Maximum number of rows to load" )
2017 preprocess_fn : Callable [[list [EvaluationRow ]], list [EvaluationRow ]] | None = Field (
21- default = None , description = "Optional preprocessing function for evaluation rows"
18+ default = None ,
19+ description = "Optional preprocessing function for evaluation rows. This function is applied "
20+ "to the loaded data before it's returned, allowing for data cleaning, transformation, "
21+ "filtering, or other modifications. The function receives a list of EvaluationRow objects "
22+ "and should return a modified list of EvaluationRow objects." ,
23+ )
24+ variant_id : str = Field (
25+ ...,
26+ description = "Unique identifier for the data loader variant. Used to distinguish between "
27+ "different variants of the same data loader and for tracking purposes in evaluation results." ,
28+ )
29+ variant_description : str | None = Field (
30+ default = None ,
31+ description = "Human-readable description of the data loader variant. Provides context about what "
32+ "this variant represents, its purpose, or any special characteristics that distinguish "
33+ "it from other variants." ,
2234 )
23- logger : DatasetLogger = Field (description = "Dataset logger for tracking operations" )
24- invocation_id : str = Field (description = "Unique identifier for this invocation" )
25- experiment_id : str = Field (description = "Unique identifier for this experiment" )
26- mode : EvaluationTestMode = Field (description = "The evaluation test mode" )
2735
28- class Config :
29- arbitrary_types_allowed = True # For Callable and DatasetLogger types
36+ @field_validator ("variant_id" )
37+ @classmethod
38+ def validate_variant_id (cls , v : str ) -> str :
39+ if not v or not v .strip ():
40+ raise ValueError ("variant_id must be non-empty" )
41+ return v
3042
3143
3244class DataLoaderResult (BaseModel ):
3345 """Rows and metadata returned by a loader variant."""
3446
35- rows : list [EvaluationRow ] = Field (description = "List of evaluation rows loaded" )
36- source_id : str = Field (description = "Unique identifier for the data source" )
37- source_metadata : dict [str , Any ] = Field (
38- default_factory = dict , description = "Additional metadata about the data source"
47+ rows : list [EvaluationRow ] = Field (
48+ description = "List of evaluation rows loaded from the data source. These are the "
49+ "processed and ready-to-use evaluation data that will be fed into the evaluation pipeline."
50+ )
51+ num_rows : int = Field (
52+ ...,
53+ description = "Number of rows loaded. This should match the length of the rows list "
54+ "and is used for validation and reporting purposes." ,
55+ )
56+ type : str = Field (
57+ ...,
58+ description = "Type of the data loader that produced this result. Used for identification "
59+ "and debugging purposes (e.g., 'InlineDataLoader', 'FactoryDataLoader')." ,
60+ )
61+ variant_id : str = Field (
62+ ...,
63+ description = "Unique identifier for the data loader variant that produced this result. "
64+ "Used for tracking and organizing evaluation results from different data sources." ,
3965 )
40- raw_payload : Any | None = Field (default = None , description = "Raw payload data if available" )
41- preprocessed : bool = Field (default = False , description = "Whether the data has been preprocessed" )
4266
43- class Config :
44- arbitrary_types_allowed = True # For Any type in raw_payload
67+ variant_description : str | None = Field (
68+ default = None ,
69+ description = "Human-readable description of the data loader variant that produced this result. "
70+ "Provides context about what this variant represents, its purpose, or any special characteristics that distinguish "
71+ "it from other variants." ,
72+ )
73+
74+ preprocessed : bool = Field (
75+ default = False ,
76+ description = "Whether the data has been preprocessed. This flag indicates if any "
77+ "preprocessing functions have been applied to the data, helping to avoid duplicate "
78+ "processing and track data transformation state." ,
79+ )
80+
81+ @field_validator ("type" )
82+ @classmethod
83+ def validate_type (cls , v : str ) -> str :
84+ if not v or not v .strip ():
85+ raise ValueError ("type must be non-empty" )
86+ return v
87+
88+ @field_validator ("num_rows" )
89+ @classmethod
90+ def validate_num_rows (cls , v : int ) -> int :
91+ if v <= 0 :
92+ raise ValueError ("num_rows must be greater than 0" )
93+ return v
94+
95+ @field_validator ("variant_id" )
96+ @classmethod
97+ def validate_variant_id (cls , v : str ) -> str :
98+ if not v or not v .strip ():
99+ raise ValueError ("variant_id must be non-empty" )
100+ return v
45101
46102
47103class DataLoaderVariant (BaseModel ):
48104 """Single parameterizable variant from a data loader."""
49105
50- id : str = Field (description = "Unique identifier for this variant" )
51- description : str = Field (description = "Human-readable description of this variant" )
106+ id : str = Field (
107+ description = "Unique identifier for this variant. Used to distinguish between different "
108+ "variants of the same data loader and for tracking purposes in evaluation results."
109+ )
110+ description : str | None = Field (
111+ default = None ,
112+ description = "Human-readable description of this variant. Provides context about what "
113+ "this variant represents, its purpose, or any special characteristics that distinguish "
114+ "it from other variants." ,
115+ )
52116 loader : Callable [[DataLoaderContext ], DataLoaderResult ] = Field (
53- description = "Function that loads data for this variant"
117+ description = "Function that loads data for this variant. This callable is invoked with "
118+ "a DataLoaderContext and should return a DataLoaderResult containing the loaded "
119+ "evaluation rows and associated metadata. The loader function is responsible for "
120+ "the actual data retrieval and any necessary processing."
54121 )
55- metadata : dict [str , Any ] = Field (default_factory = dict , description = "Additional metadata for this variant" )
122+
123+ @field_validator ("id" )
124+ @classmethod
125+ def validate_id (cls , v : str ) -> str :
126+ if not v or not v .strip ():
127+ raise ValueError ("DataLoaderVariant.id must be non-empty" )
128+ return v
56129
57130 class Config :
58131 arbitrary_types_allowed = True # For Callable type
@@ -69,3 +142,10 @@ class EvaluationDataLoader(Protocol):
69142 def variants (self ) -> Sequence [DataLoaderVariant ]:
70143 """Return parameterizable variants emitted by this loader."""
71144 ...
145+
146+ def load (self , ctx : DataLoaderContext ) -> list [DataLoaderResult ]:
147+ """
148+ Loads all variants of this data loader and return a list of DataLoaderResult.
149+ """
150+ variants = self .variants ()
151+ return [variant .load (ctx ) for variant in variants ]
0 commit comments