From 741c278ddb8dfefd19a26cdc7ce500beed286384 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Thu, 30 Apr 2026 16:18:59 -0600 Subject: [PATCH 1/4] feat(models): deprecate implicit default provider routing Emit DeprecationWarning whenever the legacy "implicit default provider" path is exercised: `ModelConfig.provider=None`, the registry-level `ModelProviderRegistry.default`, the YAML `default:` key in `~/.data-designer/model_providers.yaml`, and the CLI's "Change default provider" workflow. `resolve_model_provider_registry` skips passing `default=` in the single-provider case so the common construction path stays quiet. Multi-provider registries still pass `default` (per `check_implicit_default`) and warn accordingly. Update docs, the package README, and test fixtures to specify `provider=` explicitly on every `ModelConfig`. New tests cover each warning entry point and pin the post-deprecation happy paths. Refs #589 Made-with: Cursor --- docs/concepts/architecture-and-performance.md | 1 + .../configure-model-settings-with-the-cli.md | 8 +++- docs/concepts/models/custom-model-settings.md | 7 +++ .../concepts/models/default-model-settings.md | 7 +++ docs/concepts/models/inference-parameters.md | 1 + docs/concepts/models/model-providers.md | 8 ++++ packages/data-designer-config/README.md | 1 + .../config/default_model_settings.py | 18 +++++++- .../src/data_designer/config/models.py | 17 ++++++- .../data_designer/config/testing/fixtures.py | 1 + .../tests/config/test_config_builder.py | 4 ++ .../config/test_default_model_settings.py | 14 +++++- .../tests/config/test_fingerprint.py | 10 ++++- .../tests/config/test_models.py | 41 ++++++++++++++--- .../data_designer/engine/model_provider.py | 29 ++++++++++++ .../tests/engine/test_model_provider.py | 45 +++++++++++++++++++ .../cli/controllers/provider_controller.py | 10 +++++ .../cli/repositories/provider_repository.py | 9 ++++ .../controllers/test_provider_controller.py | 15 +++++++ .../repositories/test_provider_repository.py | 42 +++++++++++++++++ 20 files changed, 275 insertions(+), 13 deletions(-) diff --git a/docs/concepts/architecture-and-performance.md b/docs/concepts/architecture-and-performance.md index 6590ac1f1..78e7d642f 100644 --- a/docs/concepts/architecture-and-performance.md +++ b/docs/concepts/architecture-and-performance.md @@ -161,6 +161,7 @@ import data_designer.config as dd model = dd.ModelConfig( alias="my-model", model="nvidia/nemotron-3-nano-30b-a3b", + provider="nvidia", inference_parameters=dd.ChatCompletionInferenceParams( max_parallel_requests=8, ), diff --git a/docs/concepts/models/configure-model-settings-with-the-cli.md b/docs/concepts/models/configure-model-settings-with-the-cli.md index 9fa5b38db..e7c1b5edd 100644 --- a/docs/concepts/models/configure-model-settings-with-the-cli.md +++ b/docs/concepts/models/configure-model-settings-with-the-cli.md @@ -67,6 +67,12 @@ data-designer config providers **Change default provider**: Set which provider is used by default. This option is only available when multiple providers are configured. +!!! warning "Deprecated: 'Change default provider' workflow" + The "Change default provider" workflow is **deprecated** and will be removed in a future + release alongside the registry-level default. Specify `provider=` explicitly on each + `ModelConfig` instead — the workflow now emits a `DeprecationWarning` when entered. + See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). + ## Managing Model Configurations Run the interactive model configuration command: @@ -111,7 +117,7 @@ data-designer config list This command displays: - **Model Providers**: All configured providers with their endpoints (API keys are masked) -- **Default Provider**: The currently selected default provider +- **Default Provider**: The currently selected default provider _(deprecated; see issue #589)_ - **Model Configurations**: All configured models with their settings ## Resetting Configurations diff --git a/docs/concepts/models/custom-model-settings.md b/docs/concepts/models/custom-model-settings.md index 42e32d15c..be73ae408 100644 --- a/docs/concepts/models/custom-model-settings.md +++ b/docs/concepts/models/custom-model-settings.md @@ -90,6 +90,13 @@ preview_result.display_sample_record() !!! note "Default Providers Always Available" When you only specify `model_configs`, the default model providers (NVIDIA, OpenAI, and OpenRouter) are still available. You only need to create custom providers if you want to connect to different endpoints or modify provider settings. +!!! warning "Always specify `provider=` on `ModelConfig`" + Leaving `provider` unset (or passing `provider=None`) on `ModelConfig` is **deprecated**. + The legacy "implicit default provider" routing — used when `provider` is omitted — emits + a `DeprecationWarning` and will be removed in a future release. Always reference the + intended provider by name, as the examples below do. See + [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). + !!! tip "Mixing Custom and Default Models" When you provide custom `model_configs` to `DataDesignerConfigBuilder`, they **replace** the defaults entirely. To use custom model configs in addition to the default configs, use the add_model_config method: diff --git a/docs/concepts/models/default-model-settings.md b/docs/concepts/models/default-model-settings.md index 8f6f6cd47..31fd14449 100644 --- a/docs/concepts/models/default-model-settings.md +++ b/docs/concepts/models/default-model-settings.md @@ -107,6 +107,13 @@ Both methods operate on the same files, ensuring consistency across your entire !!! warning "API Key Requirements" While default model configurations are always available, you need to set the appropriate API key environment variable (`NVIDIA_API_KEY`, `OPENAI_API_KEY`, or `OPENROUTER_API_KEY`) to actually use the corresponding models for data generation. Without a valid API key, any attempt to generate data using that provider's models will fail. +!!! warning "Deprecated: implicit default provider routing" + The `default:` key in `~/.data-designer/model_providers.yaml` and the registry-level + "default provider" concept are **deprecated** and will be removed in a future release. + Specify `provider=` explicitly on every `ModelConfig` instead — the built-in defaults + above already do this, and a `DeprecationWarning` is now emitted whenever the legacy + routing is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). + !!! tip "Environment Variables" Store your API keys in environment variables rather than hardcoding them in your scripts: diff --git a/docs/concepts/models/inference-parameters.md b/docs/concepts/models/inference-parameters.md index e5772439d..03e932ed4 100644 --- a/docs/concepts/models/inference-parameters.md +++ b/docs/concepts/models/inference-parameters.md @@ -167,6 +167,7 @@ dd.ModelConfig( dd.ModelConfig( alias="dalle", model="dall-e-3", + provider="openai", inference_parameters=dd.ImageInferenceParams( extra_body={"size": "1024x1024", "quality": "hd"} ), diff --git a/docs/concepts/models/model-providers.md b/docs/concepts/models/model-providers.md index 9d397a87a..f8625ae9b 100644 --- a/docs/concepts/models/model-providers.md +++ b/docs/concepts/models/model-providers.md @@ -6,6 +6,14 @@ Model providers are external services that host and serve models. Data Designer A `ModelProvider` defines how Data Designer connects to a provider's API endpoint. When you create a `ModelConfig`, you reference a provider by name, and Data Designer uses that provider's settings to make API calls to the appropriate endpoint. +!!! warning "Deprecated: implicit default provider routing" + Earlier versions of Data Designer let you omit `provider=` on `ModelConfig` and + fall back to a registry-level default — including the `default:` key in + `~/.data-designer/model_providers.yaml`. That implicit routing is **deprecated** + and will be removed in a future release. Always reference a provider by name on + every `ModelConfig`. A `DeprecationWarning` is now emitted when the legacy path + is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). + ## ModelProvider Configuration The `ModelProvider` class has the following fields: diff --git a/packages/data-designer-config/README.md b/packages/data-designer-config/README.md index 78eaa7933..3c7faaf9e 100644 --- a/packages/data-designer-config/README.md +++ b/packages/data-designer-config/README.md @@ -21,6 +21,7 @@ config_builder = dd.DataDesignerConfigBuilder( dd.ModelConfig( alias="my-model", model="nvidia/nemotron-3-nano-30b-a3b", + provider="nvidia", inference_parameters=dd.ChatCompletionInferenceParams(temperature=0.7), ), ] diff --git a/packages/data-designer-config/src/data_designer/config/default_model_settings.py b/packages/data-designer-config/src/data_designer/config/default_model_settings.py index d97a286a6..84878417b 100644 --- a/packages/data-designer-config/src/data_designer/config/default_model_settings.py +++ b/packages/data-designer-config/src/data_designer/config/default_model_settings.py @@ -5,6 +5,7 @@ import logging import os +import warnings from functools import lru_cache from pathlib import Path from typing import Any, Literal @@ -95,7 +96,22 @@ def get_default_providers() -> list[ModelProvider]: def get_default_provider_name() -> str | None: - return _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default") + """Return the YAML's ``default:`` provider name, if set. + + Deprecated: this function and the underlying YAML key are deprecated and + will be removed in a future release. Specify ``provider=`` explicitly on + each ``ModelConfig`` instead. See issue #589. + """ + default = _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default") + if default is not None: + warnings.warn( + f"The 'default:' key in {MODEL_PROVIDERS_FILE_PATH} is deprecated and will " + "be removed in a future release. Remove it and specify provider= explicitly " + "on each ModelConfig instead. See issue #589.", + DeprecationWarning, + stacklevel=2, + ) + return default def resolve_seed_default_model_settings() -> None: diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index cfe6520df..35d724447 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -5,6 +5,7 @@ import json import logging +import warnings from abc import ABC, abstractmethod from enum import Enum from pathlib import Path @@ -503,7 +504,10 @@ class ModelConfig(ConfigBase): model: Model identifier (e.g., from build.nvidia.com or other providers). inference_parameters: Inference parameters for the model (temperature, top_p, max_tokens, etc.). The generation_type is determined by the type of inference_parameters. - provider: Optional model provider name if using custom providers. + provider: Name of the model provider. Required in a future release. Leaving + ``provider`` unset (or ``None``) currently routes through the registry's + implicit default and is **deprecated**; specify ``provider=`` explicitly. + See issue #589. skip_health_check: Whether to skip the health check for this model. Defaults to False. """ @@ -535,6 +539,17 @@ def _convert_inference_parameters(cls, value: Any) -> Any: return ChatCompletionInferenceParams(**value) return value + @model_validator(mode="after") + def _warn_on_implicit_provider(self) -> Self: + if self.provider is None: + msg = ( + f"ModelConfig.provider=None is deprecated and will be required in a future release. " + f"Specify provider= explicitly on ModelConfig(alias={self.alias!r}, ...). " + "See issue #589." + ) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + return self + class ModelProvider(ConfigBase): """Configuration for a custom model provider. diff --git a/packages/data-designer-config/src/data_designer/config/testing/fixtures.py b/packages/data-designer-config/src/data_designer/config/testing/fixtures.py index 113ee3080..8fece3bf8 100644 --- a/packages/data-designer-config/src/data_designer/config/testing/fixtures.py +++ b/packages/data-designer-config/src/data_designer/config/testing/fixtures.py @@ -139,6 +139,7 @@ def stub_model_configs() -> list[ModelConfig]: ModelConfig( alias="stub-model", model="stub-model", + provider="provider-1", inference_parameters=ChatCompletionInferenceParams( temperature=0.9, top_p=0.9, diff --git a/packages/data-designer-config/tests/config/test_config_builder.py b/packages/data-designer-config/tests/config/test_config_builder.py index 2cd2cce93..13d34fe13 100644 --- a/packages/data-designer-config/tests/config/test_config_builder.py +++ b/packages/data-designer-config/tests/config/test_config_builder.py @@ -786,6 +786,7 @@ def test_add_model_config(stub_empty_builder): new_model_config = ModelConfig( alias="new-model", model="openai/gpt-4", + provider="openai", inference_parameters=ChatCompletionInferenceParams( temperature=0.7, top_p=0.95, @@ -833,6 +834,7 @@ def test_add_model_config_duplicate_alias(stub_empty_builder): duplicate_model_config = ModelConfig( alias="stub-model", model="different/model", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams(temperature=0.5), ) @@ -849,11 +851,13 @@ def test_delete_model_config(stub_empty_builder): model_config_1 = ModelConfig( alias="model-to-delete", model="model/delete", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams(temperature=0.5), ) model_config_2 = ModelConfig( alias="model-to-keep", model="model/keep", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams(temperature=0.6), ) stub_empty_builder.add_model_config(model_config_1) diff --git a/packages/data-designer-config/tests/config/test_default_model_settings.py b/packages/data-designer-config/tests/config/test_default_model_settings.py index 7df4c731a..445984745 100644 --- a/packages/data-designer-config/tests/config/test_default_model_settings.py +++ b/packages/data-designer-config/tests/config/test_default_model_settings.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import json +import warnings from pathlib import Path from unittest.mock import patch @@ -142,19 +143,28 @@ def test_get_default_providers_path_does_not_exist(): def test_get_default_provider_name_with_default_key(tmp_path: Path): + """When the YAML carries a non-None ``default:``, the function must + return that value AND emit a ``DeprecationWarning`` (regression for #589). + """ providers_file_path = tmp_path / "providers.yaml" providers_file_path.write_text( json.dumps(dict(providers=[p.model_dump() for p in get_builtin_model_providers()], default="nvidia")) ) with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path): - assert get_default_provider_name() == "nvidia" + with pytest.warns(DeprecationWarning, match="'default:' key.*is deprecated"): + assert get_default_provider_name() == "nvidia" def test_get_default_provider_name_without_default_key(tmp_path: Path): + """Pin the post-deprecation happy path: a YAML without ``default:`` must + return ``None`` and NOT emit a ``DeprecationWarning``. + """ providers_file_path = tmp_path / "providers.yaml" providers_file_path.write_text(json.dumps({"providers": [p.model_dump() for p in get_builtin_model_providers()]})) with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path): - assert get_default_provider_name() is None + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + assert get_default_provider_name() is None def test_get_default_provider_name_path_does_not_exist(): diff --git a/packages/data-designer-config/tests/config/test_fingerprint.py b/packages/data-designer-config/tests/config/test_fingerprint.py index 0535de704..a02ca3603 100644 --- a/packages/data-designer-config/tests/config/test_fingerprint.py +++ b/packages/data-designer-config/tests/config/test_fingerprint.py @@ -79,10 +79,11 @@ def test_fingerprint_deterministic_across_processes(stub_data_designer_config_st # --------------------------------------------------------------------------- -def _make_model(alias: str = "m", model: str = "some-model") -> ModelConfig: +def _make_model(alias: str = "m", model: str = "some-model", provider: str = "some-provider") -> ModelConfig: return ModelConfig( alias=alias, model=model, + provider=provider, inference_parameters=ChatCompletionInferenceParams(temperature=0.5, top_p=0.9, max_tokens=128), ) @@ -129,7 +130,7 @@ def test_changing_sampler_params_changes_hash() -> None: def test_changing_model_identity_changes_hash() -> None: a = _make_minimal_config() - b = _make_minimal_config(model_configs=[ModelConfig(alias="m", model="other-model")]) + b = _make_minimal_config(model_configs=[ModelConfig(alias="m", model="other-model", provider="some-provider")]) assert _compute_hash(a) != _compute_hash(b) @@ -140,6 +141,7 @@ def test_changing_temperature_changes_hash() -> None: ModelConfig( alias="m", model="some-model", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams(temperature=0.99, top_p=0.9, max_tokens=128), ) ], @@ -199,6 +201,7 @@ def test_changing_extra_body_changes_hash() -> None: ModelConfig( alias="m", model="some-model", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams( temperature=0.5, top_p=0.9, max_tokens=128, extra_body={"frequency_penalty": 0.5} ), @@ -302,6 +305,7 @@ def test_skip_health_check_does_not_change_hash() -> None: ModelConfig( alias="m", model="some-model", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams(temperature=0.5, top_p=0.9, max_tokens=128), skip_health_check=True, ) @@ -317,6 +321,7 @@ def test_max_parallel_requests_does_not_change_hash() -> None: ModelConfig( alias="m", model="some-model", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams( temperature=0.5, top_p=0.9, max_tokens=128, max_parallel_requests=32 ), @@ -333,6 +338,7 @@ def test_inference_timeout_does_not_change_hash() -> None: ModelConfig( alias="m", model="some-model", + provider="some-provider", inference_parameters=ChatCompletionInferenceParams( temperature=0.5, top_p=0.9, max_tokens=128, timeout=30 ), diff --git a/packages/data-designer-config/tests/config/test_models.py b/packages/data-designer-config/tests/config/test_models.py index 7bdc6ce91..c49840371 100644 --- a/packages/data-designer-config/tests/config/test_models.py +++ b/packages/data-designer-config/tests/config/test_models.py @@ -4,6 +4,7 @@ import base64 import json import tempfile +import warnings from collections import Counter from pathlib import Path @@ -413,8 +414,8 @@ def test_generation_parameters_max_tokens_validation(): def test_load_model_configs(): stub_model_configs = [ - ModelConfig(alias="test", model="test"), - ModelConfig(alias="test2", model="test2"), + ModelConfig(alias="test", model="test", provider="test-provider"), + ModelConfig(alias="test2", model="test2", provider="test-provider"), ] stub_model_configs_dict_list = [mc.model_dump(mode="json") for mc in stub_model_configs] assert load_model_configs([]) == [] @@ -454,35 +455,61 @@ def test_load_model_configs(): def test_model_config_construction(): # test default construction - model_config = ModelConfig(alias="test", model="test") + model_config = ModelConfig(alias="test", model="test", provider="test-provider") assert model_config.inference_parameters == ChatCompletionInferenceParams() assert model_config.generation_type == GenerationType.CHAT_COMPLETION # test construction with completion inference parameters completion_params = ChatCompletionInferenceParams(temperature=0.5, top_p=0.5, max_tokens=100) - model_config = ModelConfig(alias="test", model="test", inference_parameters=completion_params) + model_config = ModelConfig( + alias="test", model="test", provider="test-provider", inference_parameters=completion_params + ) assert model_config.inference_parameters == completion_params assert model_config.generation_type == GenerationType.CHAT_COMPLETION # test construction with embedding inference parameters embedding_params = EmbeddingInferenceParams(dimensions=100) - model_config = ModelConfig(alias="test", model="test", inference_parameters=embedding_params) + model_config = ModelConfig( + alias="test", model="test", provider="test-provider", inference_parameters=embedding_params + ) assert model_config.inference_parameters == embedding_params assert model_config.generation_type == GenerationType.EMBEDDING # test construction with image inference parameters image_params = ImageInferenceParams(extra_body={"size": "1024x1024", "quality": "hd"}) - model_config = ModelConfig(alias="test", model="test", inference_parameters=image_params) + model_config = ModelConfig(alias="test", model="test", provider="test-provider", inference_parameters=image_params) assert model_config.inference_parameters == image_params assert model_config.generation_type == GenerationType.IMAGE +def test_model_config_provider_none_emits_deprecation_warning(): + """Regression for #589: omitting ``provider=`` (or passing ``provider=None``) + on a ``ModelConfig`` is deprecated; construction must emit a + ``DeprecationWarning`` pointing users at the explicit-provider migration. + """ + with pytest.warns(DeprecationWarning, match="ModelConfig.provider=None is deprecated"): + ModelConfig(alias="legacy", model="legacy-model") + + with pytest.warns(DeprecationWarning, match="ModelConfig.provider=None is deprecated"): + ModelConfig(alias="legacy", model="legacy-model", provider=None) + + +def test_model_config_with_provider_does_not_warn(): + """Pin the post-deprecation happy path: specifying ``provider=`` must not + emit any deprecation warning. + """ + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + ModelConfig(alias="modern", model="modern-model", provider="some-provider") + + def test_model_config_generation_type_from_dict(): # Test that generation_type in dict is used to create the right inference params type model_config = ModelConfig.model_validate( { "alias": "test", "model": "test", + "provider": "test-provider", "inference_parameters": {"generation_type": "embedding", "dimensions": 100}, } ) @@ -493,6 +520,7 @@ def test_model_config_generation_type_from_dict(): { "alias": "test", "model": "test", + "provider": "test-provider", "inference_parameters": {"generation_type": "chat-completion", "temperature": 0.5}, } ) @@ -503,6 +531,7 @@ def test_model_config_generation_type_from_dict(): { "alias": "test", "model": "image-model", + "provider": "test-provider", "inference_parameters": { "generation_type": "image", "extra_body": {"size": "1024x1024", "quality": "hd"}, diff --git a/packages/data-designer-engine/src/data_designer/engine/model_provider.py b/packages/data-designer-engine/src/data_designer/engine/model_provider.py index b34442fd5..c2627df68 100644 --- a/packages/data-designer-engine/src/data_designer/engine/model_provider.py +++ b/packages/data-designer-engine/src/data_designer/engine/model_provider.py @@ -3,6 +3,7 @@ from __future__ import annotations +import warnings from functools import cached_property from pydantic import BaseModel, field_validator, model_validator @@ -16,6 +17,9 @@ class ModelProviderRegistry(BaseModel): providers: list[ModelProvider] default: str | None = None + """Deprecated: registry-level default provider. Will be removed in a future + release; specify ``provider=`` explicitly on each ``ModelConfig`` instead. + See issue #589.""" @field_validator("providers", mode="after") @classmethod @@ -50,6 +54,22 @@ def check_default_exists(self) -> Self: raise ValueError(f"Specified default {self.default!r} not found in providers list") return self + @model_validator(mode="after") + def _warn_on_explicit_default(self) -> Self: + # Fires only when the caller actually passed ``default=`` (not when it's + # left at the field default of None). ``resolve_model_provider_registry`` + # avoids passing ``default=`` in the single-provider case so common + # construction paths stay quiet. See issue #589. + if "default" in self.model_fields_set: + warnings.warn( + "ModelProviderRegistry.default is deprecated and will be removed in a " + "future release. Specify provider= explicitly on each ModelConfig " + "instead of relying on a registry-level default. See issue #589.", + DeprecationWarning, + stacklevel=2, + ) + return self + def get_default_provider_name(self) -> str: return self.default or self.providers[0].name @@ -72,6 +92,15 @@ def resolve_model_provider_registry( ) -> ModelProviderRegistry: if len(model_providers) == 0: raise NoModelProvidersError("At least one model provider must be defined") + # In the single-provider case, the registry's ``get_default_provider_name`` + # falls back to ``providers[0].name`` when ``default`` is unset, so we can + # avoid passing ``default=`` and keep the common construction path quiet + # under the #589 deprecation warning. The multi-provider case still + # requires ``default`` (per ``check_implicit_default``); callers who supply + # multiple providers with no explicit default fall back to first-wins, + # matching the contract pinned in #588. + if len(model_providers) == 1 and default_provider_name is None: + return ModelProviderRegistry(providers=model_providers) return ModelProviderRegistry( providers=model_providers, default=default_provider_name or model_providers[0].name, diff --git a/packages/data-designer-engine/tests/engine/test_model_provider.py b/packages/data-designer-engine/tests/engine/test_model_provider.py index 55f750a76..5e8bea25b 100644 --- a/packages/data-designer-engine/tests/engine/test_model_provider.py +++ b/packages/data-designer-engine/tests/engine/test_model_provider.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import warnings + import pytest from data_designer.config.mcp import LocalStdioMCPProvider @@ -92,6 +94,49 @@ def test_resolve_model_provider_registry_empty_error() -> None: resolve_model_provider_registry([]) +def test_explicit_default_emits_deprecation_warning(stub_foo_provider: ModelProvider) -> None: + """Regression for #589: passing ``default=`` explicitly to ``ModelProviderRegistry`` + must emit a ``DeprecationWarning``. The registry-level default field is on its + way out; users should specify ``provider=`` per ``ModelConfig`` instead. + """ + with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): + ModelProviderRegistry(providers=[stub_foo_provider], default="foo") + + +def test_no_default_does_not_emit_deprecation_warning(stub_foo_provider: ModelProvider) -> None: + """Pin the post-deprecation happy path: omitting ``default=`` (single-provider + case) must NOT emit a warning, since callers haven't opted into the deprecated + field. + """ + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + ModelProviderRegistry(providers=[stub_foo_provider]) + + +def test_resolve_single_provider_quiet_under_deprecation(stub_foo_provider: ModelProvider) -> None: + """Pin the q3 tweak: ``resolve_model_provider_registry`` skips ``default=`` + in the single-provider case so common construction paths stay quiet under + the #589 deprecation warning. + """ + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + registry = resolve_model_provider_registry([stub_foo_provider]) + + assert registry.get_default_provider_name() == "foo" + + +def test_resolve_multi_provider_emits_deprecation_warning( + stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider +) -> None: + """Multi-provider registries currently require ``default``, so + ``resolve_model_provider_registry`` keeps passing it. That construction + path is the deprecated one users should migrate off; the warning fires + accordingly. + """ + with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): + resolve_model_provider_registry([stub_foo_provider, stub_bar_provider]) + + def test_mcp_provider_registry_empty() -> None: """Test MCPProviderRegistry can be created empty.""" registry = MCPProviderRegistry() diff --git a/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py b/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py index 165422436..886bd1c82 100644 --- a/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py +++ b/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py @@ -5,6 +5,7 @@ import copy import re +import warnings from pathlib import Path from typing import TYPE_CHECKING @@ -288,6 +289,15 @@ def _handle_delete_all(self) -> None: def _handle_change_default(self) -> None: """Handle changing the default provider.""" + deprecation_msg = ( + "The 'Change default provider' workflow is deprecated and will be removed " + "in a future release. Specify provider= explicitly on each ModelConfig " + "instead of relying on a registry-level default. See issue #589." + ) + print_warning(deprecation_msg) + warnings.warn(deprecation_msg, DeprecationWarning, stacklevel=2) + console.print() + providers = self.service.list_all() current_default = self.service.get_default() diff --git a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py index a2b692fb3..a46684783 100644 --- a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py +++ b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py @@ -3,6 +3,7 @@ from __future__ import annotations +import warnings from pathlib import Path from pydantic import BaseModel @@ -35,6 +36,14 @@ def load(self) -> ModelProviderRegistry | None: try: config_dict = load_config_file(self.config_file) + if config_dict.get("default") is not None: + warnings.warn( + f"The 'default:' key in {self.config_file} is deprecated and will be " + "removed in a future release. Remove it and refer to providers by name " + "in your ModelConfig entries. See issue #589.", + DeprecationWarning, + stacklevel=2, + ) return ModelProviderRegistry.model_validate(config_dict) except Exception: return None diff --git a/packages/data-designer/tests/cli/controllers/test_provider_controller.py b/packages/data-designer/tests/cli/controllers/test_provider_controller.py index fc83ad45a..ba95bcd77 100644 --- a/packages/data-designer/tests/cli/controllers/test_provider_controller.py +++ b/packages/data-designer/tests/cli/controllers/test_provider_controller.py @@ -208,6 +208,21 @@ def test_run_changes_default_provider( assert controller_with_providers.service.get_default() == "test-provider-2" +@patch("data_designer.cli.controllers.provider_controller.select_with_arrows") +def test_handle_change_default_emits_deprecation_warning( + mock_select: MagicMock, + controller_with_providers: ProviderController, +) -> None: + """Regression for #589: entering the 'Change default provider' workflow + must emit a ``DeprecationWarning`` so users see the migration nudge before + setting another value that's also slated for removal. + """ + mock_select.side_effect = ["change_default", "test-provider-2"] + + with pytest.warns(DeprecationWarning, match="'Change default provider' workflow is deprecated"): + controller_with_providers.run() + + @patch("data_designer.cli.controllers.provider_controller.confirm_action", return_value=False) @patch("data_designer.cli.controllers.provider_controller.select_with_arrows") def test_run_respects_delete_cancellation( diff --git a/packages/data-designer/tests/cli/repositories/test_provider_repository.py b/packages/data-designer/tests/cli/repositories/test_provider_repository.py index 0becfb54a..8c3d5d995 100644 --- a/packages/data-designer/tests/cli/repositories/test_provider_repository.py +++ b/packages/data-designer/tests/cli/repositories/test_provider_repository.py @@ -1,8 +1,11 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import warnings from pathlib import Path +import pytest + from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository from data_designer.config.models import ModelProvider from data_designer.config.utils.constants import MODEL_PROVIDERS_FILE_NAME @@ -35,3 +38,42 @@ def test_save(tmp_path: Path, stub_model_providers: list[ModelProvider]): repository.save(ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name)) assert repository.load() is not None assert repository.load().providers == stub_model_providers + + +def test_load_with_yaml_default_emits_deprecation_warning( + tmp_path: Path, stub_model_providers: list[ModelProvider] +) -> None: + """Regression for #589: when the on-disk providers YAML carries a non-None + ``default:`` key, ``ProviderRepository.load`` must emit a + ``DeprecationWarning`` so users see the migration nudge regardless of which + entry point reads the file. + """ + providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME + save_config_file( + providers_file_path, + ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name).model_dump(), + ) + repository = ProviderRepository(tmp_path) + + with pytest.warns(DeprecationWarning, match="'default:' key.*is deprecated"): + registry = repository.load() + assert registry is not None + assert registry.default == stub_model_providers[0].name + + +def test_load_without_yaml_default_does_not_warn(tmp_path: Path, stub_model_providers: list[ModelProvider]) -> None: + """Pin the post-deprecation happy path: a YAML without a ``default:`` key + must load cleanly with no ``DeprecationWarning``. + """ + providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME + save_config_file( + providers_file_path, + ModelProviderRegistry(providers=stub_model_providers).model_dump(exclude_none=True), + ) + repository = ProviderRepository(tmp_path) + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + registry = repository.load() + assert registry is not None + assert registry.default is None From 17a48acc0aa9e99a8d3c985fa407d0bbe8b8113c Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Fri, 1 May 2026 12:19:26 -0600 Subject: [PATCH 2/4] fix(models): address PR #594 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile P1: ProviderRepository.load emitted its DeprecationWarning inside a `try/except Exception` block. Under `filterwarnings("error", DeprecationWarning)` the warn would raise, the except would swallow it, and `load()` would silently return None (losing the registry). Move the warn outside the catch-all so the strict-warning path no longer drops valid configs. Greptile P2 / johnnygreco: `_warn_on_implicit_provider` and `_warn_on_explicit_default` use `stacklevel=2`, which lands inside pydantic v2's validator dispatch rather than at the user's `ModelConfig(...)` / `ModelProviderRegistry(...)` call. That broke both attribution (the source line was unhelpful) and Python's once-per-location dedup (every call collapsed to the same pydantic-internal key, suppressing all but the first warning). Introduce `data_designer.config.utils.warning_helpers.warn_at_caller`, which walks past the helper, validator, and any pydantic frames to find the user's call site and emits via `warnings.warn_explicit` with the user frame's `__warningregistry__`. Keeps attribution accurate and dedup keyed on the user's (filename, lineno). johnnygreco: align the `provider_repository.py` warning copy with the sibling site in `default_model_settings.py` ("specify provider= explicitly on each ModelConfig instead") so both YAML-default warning sites give the same migration instruction. The previous wording pointed users at "ModelConfig entries" inside `model_providers.yaml`, where ModelConfig entries don't actually live. johnnygreco: dedup the cascade in `DataDesigner.__init__`. With `model_providers=None` and a YAML `default:`, the user previously saw two DeprecationWarnings for the same root cause — `get_default_provider_name()` warns about the YAML key, then `resolve_model_provider_registry(...)` re-warns from `_warn_on_explicit_default`. Suppress the registry-level duplicate in the YAML-fallback branch via `warnings.catch_warnings()` so users see exactly one warning per user action. johnnygreco: tighten `_warn_on_explicit_default` to fire only when `default is not None`. Passing `default=None` explicitly is semantically equivalent to omitting it (caller is opting *out* of a registry-level default), and shouldn't trigger the deprecation nudge. johnnygreco: add a `model_validate({...})` regression test for `ModelConfig` so the deserialization path (legacy on-disk configs) is pinned alongside the construction path. Tests: - Update `test_load_exists` and `test_save` to omit `default=` so the roundtrip stops exercising the deprecated YAML-default path unguarded (Greptile note). - Wrap `test_resolve_model_provider_registry_with_explicit_default`, `test_get_provider`, and `test_init_user_supplied_providers_preserve_first_wins_over_yaml_default` in `pytest.warns` so the suite stays green under `-W error::DeprecationWarning` (Greptile note). - Add `test_explicit_default_none_does_not_emit_deprecation_warning` to pin the tightened predicate. - Add `test_init_yaml_default_emits_single_deprecation_warning` to pin the cascade-dedup behavior. Refs #589 Made-with: Cursor --- .../src/data_designer/config/models.py | 13 ++-- .../config/utils/warning_helpers.py | 67 +++++++++++++++++++ .../tests/config/test_models.py | 11 +++ .../data_designer/engine/model_provider.py | 20 +++--- .../tests/engine/test_model_provider.py | 23 +++++-- .../cli/repositories/provider_repository.py | 26 ++++--- .../data_designer/interface/data_designer.py | 17 ++++- .../repositories/test_provider_repository.py | 10 ++- .../tests/interface/test_data_designer.py | 64 +++++++++++++++++- 9 files changed, 221 insertions(+), 30 deletions(-) create mode 100644 packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index 35d724447..9e3d8c44c 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -5,7 +5,6 @@ import json import logging -import warnings from abc import ABC, abstractmethod from enum import Enum from pathlib import Path @@ -32,6 +31,7 @@ load_image_path_to_base64, ) from data_designer.config.utils.io_helpers import smart_load_yaml +from data_designer.config.utils.warning_helpers import warn_at_caller logger = logging.getLogger(__name__) @@ -542,12 +542,17 @@ def _convert_inference_parameters(cls, value: Any) -> Any: @model_validator(mode="after") def _warn_on_implicit_provider(self) -> Self: if self.provider is None: - msg = ( + # Use ``warn_at_caller`` so the warning is attributed to the user's + # ``ModelConfig(...)`` / ``model_validate(...)`` call rather than a + # pydantic-internal frame. Without this, every call dedupes to the + # same pydantic line and only the first emission is shown. See + # PR #594 review. + warn_at_caller( f"ModelConfig.provider=None is deprecated and will be required in a future release. " f"Specify provider= explicitly on ModelConfig(alias={self.alias!r}, ...). " - "See issue #589." + "See issue #589.", + DeprecationWarning, ) - warnings.warn(msg, DeprecationWarning, stacklevel=2) return self diff --git a/packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py new file mode 100644 index 000000000..f48ea3afe --- /dev/null +++ b/packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Helpers for emitting warnings that attribute correctly to user code. + +Pydantic v2 dispatches ``@model_validator`` / ``@field_validator`` callbacks +through several internal frames. ``warnings.warn(stacklevel=N)`` from inside a +validator therefore tends to land inside pydantic's machinery rather than at +the user's ``ModelConfig(...)`` / ``ModelProviderRegistry(...)`` call site. + +That breaks two things: + +1. Attribution — the displayed source location is unhelpful. +2. Deduplication — Python's default once-per-location dedup key is + ``(category, module, lineno)``. When every call resolves to the same + pydantic-internal line, every warning after the first is silently + suppressed regardless of which user file triggered it. + +``warn_at_caller`` walks the stack to the first frame outside pydantic (and +outside this helper / the calling validator) and uses +``warnings.warn_explicit`` to attribute the warning there. +""" + +from __future__ import annotations + +import sys +import warnings + + +def warn_at_caller(message: str, category: type[Warning]) -> None: + """Emit ``message`` attributed to the first non-pydantic frame above the caller. + + Intended to be invoked from inside a pydantic validator. The walk skips this + helper's own frame and the calling validator's frame, then walks past any + pydantic-internal frames until it finds the user's call site. + + The user frame's ``__warningregistry__`` is passed to + ``warnings.warn_explicit`` so Python's built-in once-per-location dedup keys + on the *user's* (filename, lineno) rather than an internal pydantic frame. + That matches how ``warnings.warn`` would dedup if ``stacklevel`` could + correctly point at the user. + + We deliberately do *not* pass ``module_globals`` — it's only used for + ``linecache`` source-line display, and for scripts run with ``python -c`` + (where the user frame's ``__loader__`` is ``BuiltinImporter`` for + ``__main__``) the lookup raises ``ImportError("'__main__' is not a built-in + module")``. Skipping ``module_globals`` keeps the warning path robust at + the cost of an empty source line in the formatted output. + """ + # Skip frame 0 (warn_at_caller) and frame 1 (the validator that called us). + frame = sys._getframe(2) if hasattr(sys, "_getframe") else None + while frame is not None: + module_name = frame.f_globals.get("__name__", "") + if not module_name.startswith("pydantic"): + warnings.warn_explicit( + message, + category, + frame.f_code.co_filename, + frame.f_lineno, + module=module_name, + registry=frame.f_globals.setdefault("__warningregistry__", {}), + ) + return + frame = frame.f_back + + # Fallback: never escaped pydantic frames (or no frame access). Use stacklevel. + warnings.warn(message, category, stacklevel=3) diff --git a/packages/data-designer-config/tests/config/test_models.py b/packages/data-designer-config/tests/config/test_models.py index c49840371..c5bacd818 100644 --- a/packages/data-designer-config/tests/config/test_models.py +++ b/packages/data-designer-config/tests/config/test_models.py @@ -494,6 +494,17 @@ def test_model_config_provider_none_emits_deprecation_warning(): ModelConfig(alias="legacy", model="legacy-model", provider=None) +def test_model_config_provider_none_via_model_validate_emits_deprecation_warning(): + """Regression for #589 / PR #594 review: deserialising legacy on-disk configs + via ``ModelConfig.model_validate(...)`` must surface the same + ``DeprecationWarning`` as direct construction. Both paths funnel through + the same validator today, so this pin protects against a future refactor + that, e.g., only runs the validator on construction and not on revalidation. + """ + with pytest.warns(DeprecationWarning, match="ModelConfig.provider=None is deprecated"): + ModelConfig.model_validate({"alias": "legacy", "model": "legacy-model"}) + + def test_model_config_with_provider_does_not_warn(): """Pin the post-deprecation happy path: specifying ``provider=`` must not emit any deprecation warning. diff --git a/packages/data-designer-engine/src/data_designer/engine/model_provider.py b/packages/data-designer-engine/src/data_designer/engine/model_provider.py index c2627df68..d3d8dc7be 100644 --- a/packages/data-designer-engine/src/data_designer/engine/model_provider.py +++ b/packages/data-designer-engine/src/data_designer/engine/model_provider.py @@ -3,7 +3,6 @@ from __future__ import annotations -import warnings from functools import cached_property from pydantic import BaseModel, field_validator, model_validator @@ -11,6 +10,7 @@ from data_designer.config.mcp import MCPProviderT from data_designer.config.models import ModelProvider +from data_designer.config.utils.warning_helpers import warn_at_caller from data_designer.engine.errors import NoModelProvidersError, UnknownProviderError @@ -56,17 +56,21 @@ def check_default_exists(self) -> Self: @model_validator(mode="after") def _warn_on_explicit_default(self) -> Self: - # Fires only when the caller actually passed ``default=`` (not when it's - # left at the field default of None). ``resolve_model_provider_registry`` - # avoids passing ``default=`` in the single-provider case so common - # construction paths stay quiet. See issue #589. - if "default" in self.model_fields_set: - warnings.warn( + # Fires only when the caller actually passed a non-None ``default=``. + # The ``model_fields_set`` guard distinguishes "caller opted into the + # deprecated field" from "field at its default value of None", and the + # ``self.default is not None`` clause additionally lets callers + # explicitly opt *out* via ``default=None`` without tripping the + # warning. ``resolve_model_provider_registry`` avoids passing + # ``default=`` in the single-provider case so common construction paths + # stay quiet. ``warn_at_caller`` keeps attribution and dedup correct + # under pydantic's validator dispatch. See issue #589 / PR #594 review. + if "default" in self.model_fields_set and self.default is not None: + warn_at_caller( "ModelProviderRegistry.default is deprecated and will be removed in a " "future release. Specify provider= explicitly on each ModelConfig " "instead of relying on a registry-level default. See issue #589.", DeprecationWarning, - stacklevel=2, ) return self diff --git a/packages/data-designer-engine/tests/engine/test_model_provider.py b/packages/data-designer-engine/tests/engine/test_model_provider.py index 5e8bea25b..bbcd8b663 100644 --- a/packages/data-designer-engine/tests/engine/test_model_provider.py +++ b/packages/data-designer-engine/tests/engine/test_model_provider.py @@ -58,10 +58,14 @@ def test_no_duplicate_provider_names(stub_foo_provider: ModelProvider): def test_get_provider(stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider): - registry = ModelProviderRegistry( - providers=[stub_foo_provider, stub_bar_provider], - default="foo", - ) + # Multi-provider construction with an explicit default exercises the #589 + # deprecation path; wrap so this test stays green if the project ever runs + # with ``-W error::DeprecationWarning``. + with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): + registry = ModelProviderRegistry( + providers=[stub_foo_provider, stub_bar_provider], + default="foo", + ) assert registry.get_provider(None) == stub_foo_provider assert registry.get_provider("foo") == stub_foo_provider @@ -82,8 +86,15 @@ def test_resolve_model_provider_registry(stub_foo_provider: ModelProvider) -> No def test_resolve_model_provider_registry_with_explicit_default( stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider ) -> None: - """Test resolve_model_provider_registry with explicit default.""" - registry = resolve_model_provider_registry([stub_foo_provider, stub_bar_provider], default_provider_name="bar") + """Test resolve_model_provider_registry with explicit default. + + The multi-provider/explicit-default path is the deprecated one (see #589), + so the construction emits a ``DeprecationWarning``. Wrap the call in + ``pytest.warns`` so this test stays green if the project ever runs under + ``-W error::DeprecationWarning``. + """ + with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): + registry = resolve_model_provider_registry([stub_foo_provider, stub_bar_provider], default_provider_name="bar") assert registry.get_default_provider_name() == "bar" diff --git a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py index a46684783..1be627575 100644 --- a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py +++ b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py @@ -36,14 +36,24 @@ def load(self) -> ModelProviderRegistry | None: try: config_dict = load_config_file(self.config_file) - if config_dict.get("default") is not None: - warnings.warn( - f"The 'default:' key in {self.config_file} is deprecated and will be " - "removed in a future release. Remove it and refer to providers by name " - "in your ModelConfig entries. See issue #589.", - DeprecationWarning, - stacklevel=2, - ) + except Exception: + return None + + # Emit the deprecation warning *outside* the validation try/except below. + # ``DeprecationWarning`` is an ``Exception`` subclass, so under + # ``filterwarnings("error", DeprecationWarning)`` a warn raised inside + # the catch-all would be silently swallowed and ``load`` would drop the + # registry. See PR #594 review. + if config_dict.get("default") is not None: + warnings.warn( + f"The 'default:' key in {self.config_file} is deprecated and will " + "be removed in a future release. Remove it and specify provider= " + "explicitly on each ModelConfig instead. See issue #589.", + DeprecationWarning, + stacklevel=2, + ) + + try: return ModelProviderRegistry.model_validate(config_dict) except Exception: return None diff --git a/packages/data-designer/src/data_designer/interface/data_designer.py b/packages/data-designer/src/data_designer/interface/data_designer.py index 09ab43b0a..503a63269 100644 --- a/packages/data-designer/src/data_designer/interface/data_designer.py +++ b/packages/data-designer/src/data_designer/interface/data_designer.py @@ -4,6 +4,7 @@ from __future__ import annotations import logging +import warnings from pathlib import Path from typing import TYPE_CHECKING @@ -163,7 +164,21 @@ def __init__( self._model_providers = self._resolve_model_providers(model_providers) default_provider_name = None self._mcp_providers = mcp_providers or [] - self._model_provider_registry = resolve_model_provider_registry(self._model_providers, default_provider_name) + # When the YAML carries a default, ``get_default_provider_name`` already + # nudged the user with a ``DeprecationWarning``. Building the registry + # below would re-fire ``ModelProviderRegistry._warn_on_explicit_default`` + # for the same root cause, so suppress that second warning. See PR #594 + # review. + with warnings.catch_warnings(): + if default_provider_name is not None: + warnings.filterwarnings( + "ignore", + message="ModelProviderRegistry.default is deprecated", + category=DeprecationWarning, + ) + self._model_provider_registry = resolve_model_provider_registry( + self._model_providers, default_provider_name + ) self._seed_reader_registry = SeedReaderRegistry(readers=seed_readers or DEFAULT_SEED_READERS) @property diff --git a/packages/data-designer/tests/cli/repositories/test_provider_repository.py b/packages/data-designer/tests/cli/repositories/test_provider_repository.py index 8c3d5d995..647050916 100644 --- a/packages/data-designer/tests/cli/repositories/test_provider_repository.py +++ b/packages/data-designer/tests/cli/repositories/test_provider_repository.py @@ -23,10 +23,14 @@ def test_load_does_not_exist(): def test_load_exists(tmp_path: Path, stub_model_providers: list[ModelProvider]): + # Roundtrip test for the load/save cycle. We deliberately leave ``default`` + # unset so this test does not exercise the deprecated YAML ``default:`` path + # — that path is covered by ``test_load_with_yaml_default_emits_deprecation_warning`` + # below. See issue #589. providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME save_config_file( providers_file_path, - ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name).model_dump(), + ModelProviderRegistry(providers=stub_model_providers).model_dump(exclude_none=True), ) repository = ProviderRepository(tmp_path) assert repository.load() is not None @@ -34,8 +38,10 @@ def test_load_exists(tmp_path: Path, stub_model_providers: list[ModelProvider]): def test_save(tmp_path: Path, stub_model_providers: list[ModelProvider]): + # As above, leave ``default`` unset so the roundtrip stays clear of the + # YAML-default deprecation. See issue #589. repository = ProviderRepository(tmp_path) - repository.save(ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name)) + repository.save(ModelProviderRegistry(providers=stub_model_providers)) assert repository.load() is not None assert repository.load().providers == stub_model_providers diff --git a/packages/data-designer/tests/interface/test_data_designer.py b/packages/data-designer/tests/interface/test_data_designer.py index f3bc44044..af9e17a87 100644 --- a/packages/data-designer/tests/interface/test_data_designer.py +++ b/packages/data-designer/tests/interface/test_data_designer.py @@ -6,6 +6,7 @@ import contextlib import json import logging +import warnings from datetime import datetime from pathlib import Path from typing import Any @@ -464,7 +465,13 @@ def test_init_user_supplied_providers_preserve_first_wins_over_yaml_default( ), ] - with patch.object(dd_mod, "get_default_provider_name", return_value="second-provider"): + # Multi-provider construction (user-supplied list of length > 1) still + # passes ``default=`` to ``ModelProviderRegistry`` — that's the deprecated + # path under #589 — so the registry-level deprecation fires here. + with ( + patch.object(dd_mod, "get_default_provider_name", return_value="second-provider"), + pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"), + ): data_designer = DataDesigner( artifact_path=stub_artifact_path, model_providers=user_providers, @@ -513,6 +520,61 @@ def test_init_no_user_providers_uses_yaml_default( assert data_designer.model_provider_registry.get_default_provider_name() == "yaml-second" +def test_init_yaml_default_emits_single_deprecation_warning( + stub_artifact_path: Path, + stub_managed_assets_path: Path, +) -> None: + """Regression for PR #594 review: when ``DataDesigner()`` falls back to the + YAML's ``providers:`` and ``default:``, the user should see a single + ``DeprecationWarning`` (the YAML one) rather than a duplicate cascade where + ``ModelProviderRegistry._warn_on_explicit_default`` also fires for the same + root cause. See issue #589. + """ + yaml_providers = [ + ModelProvider( + name="yaml-first", + endpoint="https://yaml-first.example.com/v1", + api_key="yaml-first-key", + ), + ModelProvider( + name="yaml-second", + endpoint="https://yaml-second.example.com/v1", + api_key="yaml-second-key", + ), + ] + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", DeprecationWarning) + with ( + patch.object(dd_mod, "get_default_providers", return_value=yaml_providers), + patch.object(dd_mod, "get_default_provider_name") as mock_get_default, + ): + mock_get_default.side_effect = lambda: ( + warnings.warn( + "The 'default:' key in /fake/path is deprecated and will " + "be removed in a future release. Remove it and specify provider= " + "explicitly on each ModelConfig instead. See issue #589.", + DeprecationWarning, + stacklevel=2, + ) + or "yaml-second" + ) + DataDesigner( + artifact_path=stub_artifact_path, + secret_resolver=PlaintextResolver(), + managed_assets_path=stub_managed_assets_path, + ) + + deprecation_messages = [str(w.message) for w in caught if issubclass(w.category, DeprecationWarning)] + yaml_default_warnings = [m for m in deprecation_messages if "'default:' key" in m] + registry_default_warnings = [m for m in deprecation_messages if "ModelProviderRegistry.default is deprecated" in m] + assert len(yaml_default_warnings) == 1, deprecation_messages + assert registry_default_warnings == [], ( + "Registry-level deprecation should be suppressed in the YAML-fallback path " + "to avoid two warnings for the same root cause." + ) + + def test_run_config_setting_persists(stub_artifact_path, stub_model_providers): """Test that run config setting persists across multiple calls.""" data_designer = DataDesigner(artifact_path=stub_artifact_path, model_providers=stub_model_providers) From 247fa3000df1abab04e627a80a6c7ae4c8751c1e Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 5 May 2026 10:03:08 -0600 Subject: [PATCH 3/4] fix(models): make deprecation warnings visible under default filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit andreatgretel (PR #594): the YAML-default warning in `get_default_provider_name` and the registry-default warning emitted from inside DataDesigner helpers were attributing to data_designer library frames, not user code. Python's default filter chain includes `ignore::DeprecationWarning`, so library-attributed entries are silenced — meaning a normal `DataDesigner()` call with a YAML `default:` set showed nothing, and `resolve_model_provider_registry` warnings were similarly invisible. Two related changes: 1. `warn_at_caller`: extend the default skip-list from `("pydantic",)` to `("pydantic", "pydantic_core", "data_designer")` so the walk escapes both pydantic's validator-dispatch frames and data_designer helper frames before attributing. Also tighten the prefix predicate to exact-or-dotted-prefix matching (`name == p or name.startswith(p + ".")`) so e.g. `pydantic_helpers` is not falsely matched as part of `pydantic` (johnnygreco nit). Allow callers to pass a custom `skip_prefixes` for flexibility. Drop the "skip frame 0+1 unconditionally" guard now that prefix matching covers it. 2. `get_default_provider_name`: switch from `warnings.warn(stacklevel=2)` to `warn_at_caller`. The previous stacklevel pointed into `default_model_settings.py`, which is a library file → silenced under default filters. Verified the fix empirically with `python -W default`: warning is now attributed to the user's call site and rendered. johnnygreco (PR #594): add the missing `test_explicit_default_none_does_not_emit_deprecation_warning` regression for the `self.default is not None` predicate landed in the prior round. Tests: - New `test_warning_helpers.py` pins prefix-matching precision (rejects `pydantic_helpers` / `data_designer_other`), default skip-list contents, attribution past skip-prefix frames, and per-call-site dedup behavior. - `test_get_default_provider_name_warning_attributes_to_user_frame` pins andreatgretel's repro for the YAML-default site. - `test_explicit_default_warning_attributes_to_user_frame` pins the multi-frame case: construction goes through `resolve_model_provider_registry`, so the walk has to escape both pydantic and data_designer before landing on the test file. - `test_explicit_default_none_does_not_emit_deprecation_warning` pins johnnygreco's predicate-tightening regression. 3,124 tests pass (540 config + 1,923 engine + 653 interface; +10 net from this round). Refs #589 Made-with: Cursor --- .../config/default_model_settings.py | 12 ++- .../config/utils/warning_helpers.py | 86 ++++++++++++----- .../config/test_default_model_settings.py | 26 +++++ .../config/utils/test_warning_helpers.py | 95 +++++++++++++++++++ .../tests/engine/test_model_provider.py | 37 ++++++++ 5 files changed, 229 insertions(+), 27 deletions(-) create mode 100644 packages/data-designer-config/tests/config/utils/test_warning_helpers.py diff --git a/packages/data-designer-config/src/data_designer/config/default_model_settings.py b/packages/data-designer-config/src/data_designer/config/default_model_settings.py index 84878417b..8a0366733 100644 --- a/packages/data-designer-config/src/data_designer/config/default_model_settings.py +++ b/packages/data-designer-config/src/data_designer/config/default_model_settings.py @@ -5,7 +5,6 @@ import logging import os -import warnings from functools import lru_cache from pathlib import Path from typing import Any, Literal @@ -25,6 +24,7 @@ PREDEFINED_PROVIDERS_MODEL_MAP, ) from data_designer.config.utils.io_helpers import load_config_file, save_config_file +from data_designer.config.utils.warning_helpers import warn_at_caller logger = logging.getLogger(__name__) @@ -104,12 +104,18 @@ def get_default_provider_name() -> str | None: """ default = _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default") if default is not None: - warnings.warn( + # ``warn_at_caller`` (rather than ``warnings.warn(stacklevel=2)``) so the + # warning attributes to the user's call site rather than this library + # module. The only real call path is ``DataDesigner.__init__``, which + # is itself a ``data_designer`` frame; under default Python filters, + # library-attributed ``DeprecationWarning`` entries are silenced + # (``ignore::DeprecationWarning``), so library attribution = invisible + # warning. See PR #594 review. + warn_at_caller( f"The 'default:' key in {MODEL_PROVIDERS_FILE_PATH} is deprecated and will " "be removed in a future release. Remove it and specify provider= explicitly " "on each ModelConfig instead. See issue #589.", DeprecationWarning, - stacklevel=2, ) return default diff --git a/packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py index f48ea3afe..45bbbdecb 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/warning_helpers.py @@ -3,22 +3,30 @@ """Helpers for emitting warnings that attribute correctly to user code. -Pydantic v2 dispatches ``@model_validator`` / ``@field_validator`` callbacks -through several internal frames. ``warnings.warn(stacklevel=N)`` from inside a -validator therefore tends to land inside pydantic's machinery rather than at -the user's ``ModelConfig(...)`` / ``ModelProviderRegistry(...)`` call site. +Library-internal warnings (typically emitted from a pydantic ``@model_validator`` +or from a helper function) need to be attributed to the *user's* call site, not +to the library frame that happened to fire them. Two reasons: -That breaks two things: +1. Attribution — a source location pointing at library internals isn't + actionable. +2. Visibility under default filters — Python's default ``DeprecationWarning`` + filter is:: -1. Attribution — the displayed source location is unhelpful. -2. Deduplication — Python's default once-per-location dedup key is + default::DeprecationWarning:__main__ + ignore::DeprecationWarning + + Library-attributed ``DeprecationWarning`` entries fall under the second + filter and are silenced. Attributing to user code is what gets the warning + actually shown. + +3. Deduplication — Python's once-per-location dedup key is ``(category, module, lineno)``. When every call resolves to the same - pydantic-internal line, every warning after the first is silently - suppressed regardless of which user file triggered it. + library-internal line, every warning after the first is silently suppressed + regardless of which user file triggered it. -``warn_at_caller`` walks the stack to the first frame outside pydantic (and -outside this helper / the calling validator) and uses -``warnings.warn_explicit`` to attribute the warning there. +``warn_at_caller`` walks the stack past frames whose module belongs to a known +internal package (pydantic, data_designer) and uses ``warnings.warn_explicit`` +to attribute the warning at the first user frame. """ from __future__ import annotations @@ -26,19 +34,50 @@ import sys import warnings +DEFAULT_INTERNAL_PREFIXES: tuple[str, ...] = ("pydantic", "pydantic_core", "data_designer") +"""Modules whose frames are skipped when walking up to the user's call site. + +Matching is exact-or-dotted-prefix (see ``_module_in_prefixes``), so +``pydantic_helpers`` is *not* treated as part of ``pydantic``.""" + + +def _module_in_prefixes(module_name: str, prefixes: tuple[str, ...]) -> bool: + """Return True if ``module_name`` belongs to one of the prefix-rooted packages. + + Uses exact-equality plus dotted-prefix matching so that, e.g., + ``pydantic_helpers`` is NOT treated as part of the ``pydantic`` package + while ``pydantic.fields`` is. Same for ``data_designer`` vs. a hypothetical + ``data_designer_other``. + """ + return any(module_name == prefix or module_name.startswith(prefix + ".") for prefix in prefixes) + + +def warn_at_caller( + message: str, + category: type[Warning], + *, + skip_prefixes: tuple[str, ...] = DEFAULT_INTERNAL_PREFIXES, +) -> None: + """Emit ``message`` attributed to the first frame outside ``skip_prefixes``. + + Intended for warnings whose root cause is the user's call site but whose + emission point is library code (a pydantic validator, an internal helper, + etc.). The walk starts above this helper's own frame and skips every frame + whose module belongs to a package in ``skip_prefixes`` until it reaches a + user frame. -def warn_at_caller(message: str, category: type[Warning]) -> None: - """Emit ``message`` attributed to the first non-pydantic frame above the caller. + The default skip set covers: - Intended to be invoked from inside a pydantic validator. The walk skips this - helper's own frame and the calling validator's frame, then walks past any - pydantic-internal frames until it finds the user's call site. + * ``pydantic`` / ``pydantic_core`` — so warnings emitted from + ``@model_validator`` callbacks escape pydantic's dispatch frames. + * ``data_designer`` — so warnings emitted from a registry / model-config + built deep inside a DataDesigner helper still attribute to the outermost + user call. Without this, attribution lands on a library file and Python's + default ``DeprecationWarning`` filter silences the warning entirely. The user frame's ``__warningregistry__`` is passed to ``warnings.warn_explicit`` so Python's built-in once-per-location dedup keys - on the *user's* (filename, lineno) rather than an internal pydantic frame. - That matches how ``warnings.warn`` would dedup if ``stacklevel`` could - correctly point at the user. + on the *user's* (filename, lineno) rather than an internal frame. We deliberately do *not* pass ``module_globals`` — it's only used for ``linecache`` source-line display, and for scripts run with ``python -c`` @@ -47,11 +86,10 @@ def warn_at_caller(message: str, category: type[Warning]) -> None: module")``. Skipping ``module_globals`` keeps the warning path robust at the cost of an empty source line in the formatted output. """ - # Skip frame 0 (warn_at_caller) and frame 1 (the validator that called us). - frame = sys._getframe(2) if hasattr(sys, "_getframe") else None + frame = sys._getframe(1) if hasattr(sys, "_getframe") else None while frame is not None: module_name = frame.f_globals.get("__name__", "") - if not module_name.startswith("pydantic"): + if not _module_in_prefixes(module_name, skip_prefixes): warnings.warn_explicit( message, category, @@ -63,5 +101,5 @@ def warn_at_caller(message: str, category: type[Warning]) -> None: return frame = frame.f_back - # Fallback: never escaped pydantic frames (or no frame access). Use stacklevel. + # Fallback: never escaped library frames (or no frame access). Use stacklevel. warnings.warn(message, category, stacklevel=3) diff --git a/packages/data-designer-config/tests/config/test_default_model_settings.py b/packages/data-designer-config/tests/config/test_default_model_settings.py index 445984745..1c144b7dd 100644 --- a/packages/data-designer-config/tests/config/test_default_model_settings.py +++ b/packages/data-designer-config/tests/config/test_default_model_settings.py @@ -167,6 +167,32 @@ def test_get_default_provider_name_without_default_key(tmp_path: Path): assert get_default_provider_name() is None +def test_get_default_provider_name_warning_attributes_to_user_frame(tmp_path: Path): + """Regression for PR #594 review (andreatgretel): the YAML-default warning + must attribute to the user's call site, not to ``default_model_settings.py``. + Python's default filter ignores library-attributed ``DeprecationWarning`` + entries, so the previous ``stacklevel=2`` attribution rendered the warning + invisible under default filters on the only real call path + (``DataDesigner.__init__``). See issue #589. + """ + providers_file_path = tmp_path / "providers.yaml" + providers_file_path.write_text( + json.dumps(dict(providers=[p.model_dump() for p in get_builtin_model_providers()], default="nvidia")) + ) + with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", DeprecationWarning) + assert get_default_provider_name() == "nvidia" + + matches = [w for w in caught if "'default:' key" in str(w.message)] + assert len(matches) == 1, [str(w.message) for w in caught] + assert matches[0].filename == __file__, ( + f"Warning attributed to {matches[0].filename!r} (line {matches[0].lineno}) " + f"instead of the test file. Library-attributed DeprecationWarnings are " + f"silenced under default filters." + ) + + def test_get_default_provider_name_path_does_not_exist(): with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=Path("non_existent_path")): with pytest.raises(FileNotFoundError, match=r"Default model providers file not found at 'non_existent_path'"): diff --git a/packages/data-designer-config/tests/config/utils/test_warning_helpers.py b/packages/data-designer-config/tests/config/utils/test_warning_helpers.py new file mode 100644 index 000000000..9df72c588 --- /dev/null +++ b/packages/data-designer-config/tests/config/utils/test_warning_helpers.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import warnings + +from data_designer.config.utils.warning_helpers import _module_in_prefixes, warn_at_caller + + +def test_module_in_prefixes_exact_match(): + assert _module_in_prefixes("pydantic", ("pydantic",)) is True + + +def test_module_in_prefixes_dotted_submodule(): + assert _module_in_prefixes("pydantic.fields", ("pydantic",)) is True + assert _module_in_prefixes("data_designer.config.models", ("data_designer",)) is True + + +def test_module_in_prefixes_rejects_prefix_collision(): + """Regression for PR #594 review (johnnygreco): ``startswith`` matching + naively on the prefix would silently treat ``pydantic_helpers`` as part of + the ``pydantic`` package. Anchor on exact-or-dotted-prefix instead. + """ + assert _module_in_prefixes("pydantic_helpers", ("pydantic",)) is False + assert _module_in_prefixes("pydanticfoo", ("pydantic",)) is False + assert _module_in_prefixes("data_designer_other", ("data_designer",)) is False + + +def test_warn_at_caller_attributes_to_direct_caller(): + """When called from a non-skipped module, the warning attributes to the + caller's frame. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + warn_at_caller("hello", DeprecationWarning) # line anchored below + + assert len(caught) == 1 + assert caught[0].filename == __file__ + assert "hello" in str(caught[0].message) + + +def test_warn_at_caller_skips_skip_prefix_frames(): + """The walk should escape any frame whose module is listed in + ``skip_prefixes`` and attribute to the first frame outside them. We + simulate a library frame by ``exec``-ing a helper with a fake module name + in its globals; calling that helper produces a frame whose + ``f_globals["__name__"]`` is the fake name, mirroring how a real library + frame would appear during the walk. + """ + library_globals: dict[str, object] = { + "__name__": "fake_library.dispatch", + "warn_at_caller": warn_at_caller, + "DeprecationWarning": DeprecationWarning, + } + exec( + "def emit():\n warn_at_caller('from-library', DeprecationWarning, skip_prefixes=('fake_library',))\n", + library_globals, + ) + emit = library_globals["emit"] + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + emit() + + assert len(caught) == 1 + assert caught[0].filename == __file__, f"Expected attribution at {__file__!r}, got {caught[0].filename!r}" + + +def test_warn_at_caller_default_skips_pydantic_and_data_designer(): + """Default ``skip_prefixes`` should cover both pydantic and data_designer + so warnings emitted from validators inside DataDesigner internals attribute + to the user, not to either library. + """ + from data_designer.config.utils.warning_helpers import DEFAULT_INTERNAL_PREFIXES + + assert "pydantic" in DEFAULT_INTERNAL_PREFIXES + assert "data_designer" in DEFAULT_INTERNAL_PREFIXES + + +def test_warn_at_caller_dedup_keys_on_user_call_site(): + """Python's once-per-location dedup keys on (text, category, lineno) inside + the *attributing* frame's ``__warningregistry__``. With proper user + attribution, two distinct call sites in the user's file each emit a + warning under ``default`` filtering, while a repeated call at the same + site emits only the first. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("default", DeprecationWarning) + warn_at_caller("dedup-test", DeprecationWarning) # site A + warn_at_caller("dedup-test", DeprecationWarning) # site B + + linenos = {w.lineno for w in caught} + assert len(caught) == 2, [str(w.message) for w in caught] + assert len(linenos) == 2, "Each call site should produce a distinct dedup key" diff --git a/packages/data-designer-engine/tests/engine/test_model_provider.py b/packages/data-designer-engine/tests/engine/test_model_provider.py index bbcd8b663..72006f824 100644 --- a/packages/data-designer-engine/tests/engine/test_model_provider.py +++ b/packages/data-designer-engine/tests/engine/test_model_provider.py @@ -124,6 +124,43 @@ def test_no_default_does_not_emit_deprecation_warning(stub_foo_provider: ModelPr ModelProviderRegistry(providers=[stub_foo_provider]) +def test_explicit_default_none_does_not_emit_deprecation_warning(stub_foo_provider: ModelProvider) -> None: + """Pin the predicate tightening from PR #594 review: passing ``default=None`` + explicitly is semantically equivalent to omitting it (caller is opting *out* + of a registry-level default), so the deprecation must NOT fire. + """ + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + ModelProviderRegistry(providers=[stub_foo_provider], default=None) + + +def test_explicit_default_warning_attributes_to_user_frame( + stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider +) -> None: + """Regression for PR #594 review (andreatgretel): the ``default=`` deprecation + warning must attribute to the *user's* call site, not the pydantic-internal + or ``data_designer`` library frame that emits it. Library-attributed + ``DeprecationWarning`` entries are silenced under Python's default + ``ignore::DeprecationWarning`` filter, so attribution determines whether + the warning is actually visible. + + Construction goes through ``resolve_model_provider_registry`` so the walk + has to escape both pydantic (validator dispatch) and ``data_designer`` + (the helper that builds the registry) before landing on the test frame. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", DeprecationWarning) + resolve_model_provider_registry([stub_foo_provider, stub_bar_provider], default_provider_name="bar") + + matches = [w for w in caught if "ModelProviderRegistry.default is deprecated" in str(w.message)] + assert len(matches) == 1, [str(w.message) for w in caught] + assert matches[0].filename == __file__, ( + f"Warning attributed to {matches[0].filename!r} (line {matches[0].lineno}) " + f"instead of the test file. Library-attributed DeprecationWarnings are " + f"silenced under default filters." + ) + + def test_resolve_single_provider_quiet_under_deprecation(stub_foo_provider: ModelProvider) -> None: """Pin the q3 tweak: ``resolve_model_provider_registry`` skips ``default=`` in the single-provider case so common construction paths stay quiet under From 1c7c0f8c58d6452dc7ac0349b7e809da98185ffc Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Tue, 5 May 2026 12:50:59 -0600 Subject: [PATCH 4/4] fix(models): apply warn_at_caller to remaining deprecation sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit greptile-apps (PR #594, r3189904028): `ProviderRepository.load`'s YAML-default `DeprecationWarning` was using `warnings.warn(stacklevel=2)`, which attributes to whichever data_designer frame called `load()` — controllers, services, list/reset commands, agent introspection. Every real call path lands on `data_designer.cli.*`, which falls under Python's default `ignore::DeprecationWarning` filter and is silenced. Audit found two more sites with the same problem: - `DatasetBuilder._resolve_async_compatibility` (`allow_resize` / issue #552) — was using `stacklevel=4` to walk past `_resolve_async_compatibility -> build/build_preview -> interface -> user`. Brittle: any added frame (decorator, async wrapping, the `try/except DeprecationWarning: raise` boundary) shifts attribution silently. The existing test passed only because it used `simplefilter("always") + record=True`, which records warnings regardless of attribution. - `ProviderController._handle_change_default` — was using `stacklevel=2`, which lands on the menu dispatcher in the same controller module. `print_warning` already shows the message visually, but programmatic observers (`pytest.warns`, `filterwarnings("error", ...)`) saw a library-attributed entry that default filters silenced. All three migrated to `warn_at_caller` (the helper from 247fa30) so attribution lands on the user's call site regardless of internal chain shape. `data_designer` is already in `DEFAULT_INTERNAL_PREFIXES`, so the walk escapes the entire library in one pass. Added attribution regression tests at each site asserting `warning.filename == __file__`. A future regression to `warnings.warn(stacklevel=N)` now fails CI instead of silently silencing the user-facing nudge: - `test_load_with_yaml_default_attributes_warning_to_caller` (test_provider_repository.py) - `test_resolve_async_compatibility` extended with the same assertion - `test_handle_change_default_emits_deprecation_warning` rewritten from `pytest.warns(...)` to a `catch_warnings(record=True)` block that filters for the message and asserts `filename == __file__` (`pytest.warns` does not check attribution, so the rewrite is required to actually catch the regression). 3,125 tests pass (548 config + 1,923 engine + 654 interface). Refs #589 --- .../dataset_builders/dataset_builder.py | 14 ++++++++-- .../test_async_builder_integration.py | 8 ++++++ .../cli/controllers/provider_controller.py | 12 ++++++-- .../cli/repositories/provider_repository.py | 11 +++++--- .../controllers/test_provider_controller.py | 20 ++++++++++++- .../repositories/test_provider_repository.py | 28 +++++++++++++++++++ 6 files changed, 84 insertions(+), 9 deletions(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/dataset_builder.py b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/dataset_builder.py index 96470977c..97b87e852 100644 --- a/packages/data-designer-engine/src/data_designer/engine/dataset_builders/dataset_builder.py +++ b/packages/data-designer-engine/src/data_designer/engine/dataset_builders/dataset_builder.py @@ -9,7 +9,6 @@ import os import time import uuid -import warnings from pathlib import Path from typing import TYPE_CHECKING, Any, Callable @@ -23,6 +22,7 @@ ProcessorConfig, ProcessorType, ) +from data_designer.config.utils.warning_helpers import warn_at_caller from data_designer.config.version import get_library_version from data_designer.engine.column_generators.generators.base import ( ColumnGenerator, @@ -319,7 +319,17 @@ def _resolve_async_compatibility(self) -> bool: "use workflow chaining instead (see issue #552)." ) logger.warning(f"⚠️ {msg}") - warnings.warn(msg, DeprecationWarning, stacklevel=4) + # ``warn_at_caller`` rather than ``warnings.warn(stacklevel=N)`` so + # attribution lands on the user's call site instead of an internal + # ``DatasetBuilder.build`` / ``data_designer.interface`` frame. + # The exact internal-frame depth from this method up to user code + # depends on which entry point invoked the builder (build vs. + # build_preview, sync vs. async wrapping), so a hard-coded + # ``stacklevel`` is brittle; ``warn_at_caller`` walks past every + # ``data_designer.*`` frame regardless of chain shape. Library + # attribution would also be silenced under Python's default + # ``ignore::DeprecationWarning`` filter. See PR #594 review. + warn_at_caller(msg, DeprecationWarning) return False return True diff --git a/packages/data-designer-engine/tests/engine/dataset_builders/test_async_builder_integration.py b/packages/data-designer-engine/tests/engine/dataset_builders/test_async_builder_integration.py index 846089095..dab109dbb 100644 --- a/packages/data-designer-engine/tests/engine/dataset_builders/test_async_builder_integration.py +++ b/packages/data-designer-engine/tests/engine/dataset_builders/test_async_builder_integration.py @@ -101,6 +101,14 @@ def test_resolve_async_compatibility(configs: list[Mock], expected: bool) -> Non assert len(w) == 1 assert issubclass(w[0].category, DeprecationWarning) assert "allow_resize" in str(w[0].message) + # Regression for PR #594 review: the warning must attribute to the + # caller's frame (this test file), not to a ``data_designer.*`` library + # frame. Library-attributed ``DeprecationWarning`` entries fall under + # Python's default ``ignore::DeprecationWarning`` filter and are + # silenced. A regression to ``warnings.warn(..., stacklevel=N)`` would + # land somewhere inside the engine package and silently break the + # user-facing nudge. + assert w[0].filename == __file__ else: assert len(w) == 0 diff --git a/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py b/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py index 886bd1c82..c72087036 100644 --- a/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py +++ b/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py @@ -5,7 +5,6 @@ import copy import re -import warnings from pathlib import Path from typing import TYPE_CHECKING @@ -28,6 +27,7 @@ print_warning, select_with_arrows, ) +from data_designer.config.utils.warning_helpers import warn_at_caller if TYPE_CHECKING: from data_designer.engine.model_provider import ModelProvider @@ -295,7 +295,15 @@ def _handle_change_default(self) -> None: "instead of relying on a registry-level default. See issue #589." ) print_warning(deprecation_msg) - warnings.warn(deprecation_msg, DeprecationWarning, stacklevel=2) + # ``print_warning`` always shows the user the message in the console, + # but ``warnings.warn`` is what's observable to programmatic callers + # (``pytest.warns``, ``filterwarnings("error", ...)``). With + # ``stacklevel=2`` attribution lands on the menu dispatcher in this + # same module — a ``data_designer.cli.*`` frame — and Python's default + # ``ignore::DeprecationWarning`` filter silences it. ``warn_at_caller`` + # walks past every ``data_designer.*`` frame so the warning attributes + # to the user's call site and stays visible. See PR #594 review. + warn_at_caller(deprecation_msg, DeprecationWarning) console.print() providers = self.service.list_all() diff --git a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py index 1be627575..f815ef127 100644 --- a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py +++ b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py @@ -3,7 +3,6 @@ from __future__ import annotations -import warnings from pathlib import Path from pydantic import BaseModel @@ -12,6 +11,7 @@ from data_designer.config.models import ModelProvider from data_designer.config.utils.constants import MODEL_PROVIDERS_FILE_NAME from data_designer.config.utils.io_helpers import load_config_file, save_config_file +from data_designer.config.utils.warning_helpers import warn_at_caller class ModelProviderRegistry(BaseModel): @@ -43,14 +43,17 @@ def load(self) -> ModelProviderRegistry | None: # ``DeprecationWarning`` is an ``Exception`` subclass, so under # ``filterwarnings("error", DeprecationWarning)`` a warn raised inside # the catch-all would be silently swallowed and ``load`` would drop the - # registry. See PR #594 review. + # registry. ``warn_at_caller`` (rather than ``warnings.warn(stacklevel=2)``) + # so the warning attributes to the user's call site rather than a + # ``data_designer.cli.*`` frame; under default Python filters, + # library-attributed ``DeprecationWarning`` entries are silenced + # (``ignore::DeprecationWarning``). See PR #594 review. if config_dict.get("default") is not None: - warnings.warn( + warn_at_caller( f"The 'default:' key in {self.config_file} is deprecated and will " "be removed in a future release. Remove it and specify provider= " "explicitly on each ModelConfig instead. See issue #589.", DeprecationWarning, - stacklevel=2, ) try: diff --git a/packages/data-designer/tests/cli/controllers/test_provider_controller.py b/packages/data-designer/tests/cli/controllers/test_provider_controller.py index ba95bcd77..265f393a3 100644 --- a/packages/data-designer/tests/cli/controllers/test_provider_controller.py +++ b/packages/data-designer/tests/cli/controllers/test_provider_controller.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import warnings from pathlib import Path from unittest.mock import MagicMock, patch @@ -216,12 +217,29 @@ def test_handle_change_default_emits_deprecation_warning( """Regression for #589: entering the 'Change default provider' workflow must emit a ``DeprecationWarning`` so users see the migration nudge before setting another value that's also slated for removal. + + Also pins the attribution contract from PR #594 review: the warning must + land on the caller's frame (this test file), not on a + ``data_designer.cli.*`` library frame. Library attribution falls under + Python's default ``ignore::DeprecationWarning`` filter and would silently + suppress the user-facing nudge for any caller that isn't using + ``simplefilter("always")``. """ mock_select.side_effect = ["change_default", "test-provider-2"] - with pytest.warns(DeprecationWarning, match="'Change default provider' workflow is deprecated"): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") controller_with_providers.run() + deprecations = [ + w + for w in caught + if issubclass(w.category, DeprecationWarning) + and "'Change default provider' workflow is deprecated" in str(w.message) + ] + assert len(deprecations) == 1 + assert deprecations[0].filename == __file__ + @patch("data_designer.cli.controllers.provider_controller.confirm_action", return_value=False) @patch("data_designer.cli.controllers.provider_controller.select_with_arrows") diff --git a/packages/data-designer/tests/cli/repositories/test_provider_repository.py b/packages/data-designer/tests/cli/repositories/test_provider_repository.py index 647050916..e62a59ccd 100644 --- a/packages/data-designer/tests/cli/repositories/test_provider_repository.py +++ b/packages/data-designer/tests/cli/repositories/test_provider_repository.py @@ -67,6 +67,34 @@ def test_load_with_yaml_default_emits_deprecation_warning( assert registry.default == stub_model_providers[0].name +def test_load_with_yaml_default_attributes_warning_to_caller( + tmp_path: Path, stub_model_providers: list[ModelProvider] +) -> None: + """Regression for PR #594 review: the YAML-default ``DeprecationWarning`` + must attribute to the *caller's* frame (this test file), not to a + ``data_designer.cli.*`` library frame. Library-attributed + ``DeprecationWarning`` entries fall under Python's default + ``ignore::DeprecationWarning`` filter and are silenced, so attribution at + a library frame == invisible warning. ``warn_at_caller`` keeps this + visible; a regression to ``warnings.warn(stacklevel=2)`` would land on + ``provider_repository.py`` and silently break the user nudge. + """ + providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME + save_config_file( + providers_file_path, + ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name).model_dump(), + ) + repository = ProviderRepository(tmp_path) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + repository.load() + + deprecations = [w for w in caught if issubclass(w.category, DeprecationWarning)] + assert len(deprecations) == 1 + assert deprecations[0].filename == __file__ + + def test_load_without_yaml_default_does_not_warn(tmp_path: Path, stub_model_providers: list[ModelProvider]) -> None: """Pin the post-deprecation happy path: a YAML without a ``default:`` key must load cleanly with no ``DeprecationWarning``.