@@ -158,3 +158,96 @@ def read_schema(file_path: str):
158158 schema = yaml .safe_load (f )
159159
160160 return SchemaModel (** schema )
161+
162+
163+ class MetadataSchemaGenerator :
164+ """
165+ A class to generate a schema for metadata, categorizing fields into text, numeric, and tag types.
166+ """
167+
168+ def _test_numeric (self , value ) -> bool :
169+ """
170+ Test if the given value can be represented as a numeric value.
171+
172+ Args:
173+ value: The value to test.
174+
175+ Returns:
176+ bool: True if the value can be converted to float, False otherwise.
177+ """
178+ try :
179+ float (value )
180+ return True
181+ except (ValueError , TypeError ):
182+ return False
183+
184+ def _infer_type (self , value ) -> Optional [str ]:
185+ """
186+ Infer the type of the given value.
187+
188+ Args:
189+ value: The value to infer the type of.
190+
191+ Returns:
192+ Optional[str]: The inferred type of the value, or None if the type is unrecognized or the value is empty.
193+ """
194+ if value is None or value == "" :
195+ return None
196+ elif self ._test_numeric (value ):
197+ return "numeric"
198+ elif isinstance (value , (list , set , tuple )) and all (
199+ isinstance (v , str ) for v in value
200+ ):
201+ return "tag"
202+ elif isinstance (value , str ):
203+ return "text"
204+ else :
205+ return "unknown"
206+
207+ def generate (
208+ self , metadata : Dict [str , Any ], strict : Optional [bool ] = False
209+ ) -> Dict [str , List [Dict [str , Any ]]]:
210+ """
211+ Generate a schema from the provided metadata.
212+
213+ This method categorizes each metadata field into text, numeric, or tag types based on the field values.
214+ It also allows forcing strict type determination by raising an exception if a type cannot be inferred.
215+
216+ Args:
217+ metadata: The metadata dictionary to generate the schema from.
218+ strict: If True, the method will raise an exception for fields where the type cannot be determined.
219+
220+ Returns:
221+ Dict[str, List[Dict[str, Any]]]: A dictionary with keys 'text', 'numeric', and 'tag', each mapping to a list of field schemas.
222+
223+ Raises:
224+ ValueError: If the force parameter is True and a field's type cannot be determined.
225+ """
226+ result : Dict [str , List [Dict [str , Any ]]] = {"text" : [], "numeric" : [], "tag" : []}
227+
228+ for key , value in metadata .items ():
229+ field_type = self ._infer_type (value )
230+
231+ if field_type in ["unknown" , None ]:
232+ if strict :
233+ raise ValueError (
234+ f"Unable to determine field type for key '{ key } ' with value '{ value } '"
235+ )
236+ print (
237+ f"Warning: Unable to determine field type for key '{ key } ' with value '{ value } '"
238+ )
239+ continue
240+
241+ # Extract the field class with defaults
242+ field_class = {
243+ "text" : TextFieldSchema ,
244+ "tag" : TagFieldSchema ,
245+ "numeric" : NumericFieldSchema ,
246+ }.get (
247+ field_type # type: ignore
248+ )
249+
250+ if field_class :
251+ result [field_type ].append (field_class (name = key ).dict (exclude_none = True )) # type: ignore
252+
253+ return result
0 commit comments