NVIDIA-NeMo · nabinchha · May 5, 2026 · Apr 30, 2026 · May 1, 2026 · May 1, 2026
@@ -161,6 +161,7 @@ import data_designer.config as dd
 model = dd.ModelConfig(
     alias="my-model",
     model="nvidia/nemotron-3-nano-30b-a3b",
+    provider="nvidia",
     inference_parameters=dd.ChatCompletionInferenceParams(
         max_parallel_requests=8,
     ),

@@ -67,6 +67,12 @@ data-designer config providers
 
 **Change default provider**: Set which provider is used by default. This option is only available when multiple providers are configured.
 
+!!! warning "Deprecated: 'Change default provider' workflow"
+    The "Change default provider" workflow is **deprecated** and will be removed in a future
+    release alongside the registry-level default. Specify `provider=` explicitly on each
+    `ModelConfig` instead — the workflow now emits a `DeprecationWarning` when entered.
+    See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589).
+
 ## Managing Model Configurations
 
 Run the interactive model configuration command:
@@ -111,7 +117,7 @@ data-designer config list
 This command displays:
 
 - **Model Providers**: All configured providers with their endpoints (API keys are masked)
-- **Default Provider**: The currently selected default provider
+- **Default Provider**: The currently selected default provider _(deprecated; see issue #589)_
 - **Model Configurations**: All configured models with their settings
 
 ## Resetting Configurations

@@ -90,6 +90,13 @@ preview_result.display_sample_record()
 !!! note "Default Providers Always Available"
     When you only specify `model_configs`, the default model providers (NVIDIA, OpenAI, and OpenRouter) are still available. You only need to create custom providers if you want to connect to different endpoints or modify provider settings.
 
+!!! warning "Always specify `provider=` on `ModelConfig`"
+    Leaving `provider` unset (or passing `provider=None`) on `ModelConfig` is **deprecated**.
+    The legacy "implicit default provider" routing — used when `provider` is omitted — emits
+    a `DeprecationWarning` and will be removed in a future release. Always reference the
+    intended provider by name, as the examples below do. See
+    [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589).
+
 !!! tip "Mixing Custom and Default Models"
     When you provide custom `model_configs` to `DataDesignerConfigBuilder`, they **replace** the defaults entirely. To use custom model configs in addition to the default configs, use the add_model_config method:
 

@@ -107,6 +107,13 @@ Both methods operate on the same files, ensuring consistency across your entire
 !!! warning "API Key Requirements"
     While default model configurations are always available, you need to set the appropriate API key environment variable (`NVIDIA_API_KEY`, `OPENAI_API_KEY`, or `OPENROUTER_API_KEY`) to actually use the corresponding models for data generation. Without a valid API key, any attempt to generate data using that provider's models will fail.
 
+!!! warning "Deprecated: implicit default provider routing"
+    The `default:` key in `~/.data-designer/model_providers.yaml` and the registry-level
+    "default provider" concept are **deprecated** and will be removed in a future release.
+    Specify `provider=` explicitly on every `ModelConfig` instead — the built-in defaults
+    above already do this, and a `DeprecationWarning` is now emitted whenever the legacy
+    routing is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589).
+
 !!! tip "Environment Variables"
     Store your API keys in environment variables rather than hardcoding them in your scripts:
 

@@ -167,6 +167,7 @@ dd.ModelConfig(
 dd.ModelConfig(
     alias="dalle",
     model="dall-e-3",
+    provider="openai",
     inference_parameters=dd.ImageInferenceParams(
         extra_body={"size": "1024x1024", "quality": "hd"}
     ),

@@ -6,6 +6,14 @@ Model providers are external services that host and serve models. Data Designer
 
 A `ModelProvider` defines how Data Designer connects to a provider's API endpoint. When you create a `ModelConfig`, you reference a provider by name, and Data Designer uses that provider's settings to make API calls to the appropriate endpoint.
 
+!!! warning "Deprecated: implicit default provider routing"
+    Earlier versions of Data Designer let you omit `provider=` on `ModelConfig` and
+    fall back to a registry-level default — including the `default:` key in
+    `~/.data-designer/model_providers.yaml`. That implicit routing is **deprecated**
+    and will be removed in a future release. Always reference a provider by name on
+    every `ModelConfig`. A `DeprecationWarning` is now emitted when the legacy path
+    is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589).
+
 ## ModelProvider Configuration
 
 The `ModelProvider` class has the following fields:

@@ -21,6 +21,7 @@ config_builder = dd.DataDesignerConfigBuilder(
         dd.ModelConfig(
             alias="my-model",
             model="nvidia/nemotron-3-nano-30b-a3b",
+            provider="nvidia",
             inference_parameters=dd.ChatCompletionInferenceParams(temperature=0.7),
         ),
     ]

@@ -24,6 +24,7 @@
     PREDEFINED_PROVIDERS_MODEL_MAP,
 )
 from data_designer.config.utils.io_helpers import load_config_file, save_config_file
+from data_designer.config.utils.warning_helpers import warn_at_caller
 
 logger = logging.getLogger(__name__)
 
@@ -95,7 +96,28 @@ def get_default_providers() -> list[ModelProvider]:
 
 
 def get_default_provider_name() -> str | None:
-    return _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default")
+    """Return the YAML's ``default:`` provider name, if set.
+
+    Deprecated: this function and the underlying YAML key are deprecated and
+    will be removed in a future release. Specify ``provider=`` explicitly on
+    each ``ModelConfig`` instead. See issue #589.
+    """
+    default = _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default")
+    if default is not None:
+        # ``warn_at_caller`` (rather than ``warnings.warn(stacklevel=2)``) so the
+        # warning attributes to the user's call site rather than this library
+        # module. The only real call path is ``DataDesigner.__init__``, which
+        # is itself a ``data_designer`` frame; under default Python filters,
+        # library-attributed ``DeprecationWarning`` entries are silenced
+        # (``ignore::DeprecationWarning``), so library attribution = invisible
+        # warning. See PR #594 review.
+        warn_at_caller(
+            f"The 'default:' key in {MODEL_PROVIDERS_FILE_PATH} is deprecated and will "
+            "be removed in a future release. Remove it and specify provider= explicitly "
+            "on each ModelConfig instead. See issue #589.",
+            DeprecationWarning,
+        )
+    return default
 
 
 def resolve_seed_default_model_settings() -> None:

@@ -31,6 +31,7 @@
     load_image_path_to_base64,
 )
 from data_designer.config.utils.io_helpers import smart_load_yaml
+from data_designer.config.utils.warning_helpers import warn_at_caller
 
 logger = logging.getLogger(__name__)
 
@@ -503,7 +504,10 @@ class ModelConfig(ConfigBase):
         model: Model identifier (e.g., from build.nvidia.com or other providers).
         inference_parameters: Inference parameters for the model (temperature, top_p, max_tokens, etc.).
             The generation_type is determined by the type of inference_parameters.
-        provider: Optional model provider name if using custom providers.
+        provider: Name of the model provider. Required in a future release. Leaving
+            ``provider`` unset (or ``None``) currently routes through the registry's
+            implicit default and is **deprecated**; specify ``provider=`` explicitly.
+            See issue #589.
         skip_health_check: Whether to skip the health check for this model. Defaults to False.
     """
 
@@ -535,6 +539,22 @@ def _convert_inference_parameters(cls, value: Any) -> Any:
                 return ChatCompletionInferenceParams(**value)
         return value
 
+    @model_validator(mode="after")
+    def _warn_on_implicit_provider(self) -> Self:
+        if self.provider is None:
+            # Use ``warn_at_caller`` so the warning is attributed to the user's
+            # ``ModelConfig(...)`` / ``model_validate(...)`` call rather than a
+            # pydantic-internal frame. Without this, every call dedupes to the
+            # same pydantic line and only the first emission is shown. See
+            # PR #594 review.
+            warn_at_caller(
+                f"ModelConfig.provider=None is deprecated and will be required in a future release. "
+                f"Specify provider= explicitly on ModelConfig(alias={self.alias!r}, ...). "
+                "See issue #589.",
+                DeprecationWarning,
+            )
+        return self
+
 
 class ModelProvider(ConfigBase):
     """Configuration for a custom model provider.

@@ -139,6 +139,7 @@ def stub_model_configs() -> list[ModelConfig]:
         ModelConfig(
             alias="stub-model",
             model="stub-model",
+            provider="provider-1",
             inference_parameters=ChatCompletionInferenceParams(
                 temperature=0.9,
                 top_p=0.9,

@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Helpers for emitting warnings that attribute correctly to user code.
+
+Library-internal warnings (typically emitted from a pydantic ``@model_validator``
+or from a helper function) need to be attributed to the *user's* call site, not
+to the library frame that happened to fire them. Two reasons:
+
+1. Attribution — a source location pointing at library internals isn't
+   actionable.
+2. Visibility under default filters — Python's default ``DeprecationWarning``
+   filter is::
+
+       default::DeprecationWarning:__main__
+       ignore::DeprecationWarning
+
+   Library-attributed ``DeprecationWarning`` entries fall under the second
+   filter and are silenced. Attributing to user code is what gets the warning
+   actually shown.
+
+3. Deduplication — Python's once-per-location dedup key is
+   ``(category, module, lineno)``. When every call resolves to the same
+   library-internal line, every warning after the first is silently suppressed
+   regardless of which user file triggered it.
+
+``warn_at_caller`` walks the stack past frames whose module belongs to a known
+internal package (pydantic, data_designer) and uses ``warnings.warn_explicit``
+to attribute the warning at the first user frame.
+"""
+
+from __future__ import annotations
+
+import sys
+import warnings
+
+DEFAULT_INTERNAL_PREFIXES: tuple[str, ...] = ("pydantic", "pydantic_core", "data_designer")
+"""Modules whose frames are skipped when walking up to the user's call site.
+
+Matching is exact-or-dotted-prefix (see ``_module_in_prefixes``), so
+``pydantic_helpers`` is *not* treated as part of ``pydantic``."""
+
+
+def _module_in_prefixes(module_name: str, prefixes: tuple[str, ...]) -> bool:
+    """Return True if ``module_name`` belongs to one of the prefix-rooted packages.
+
+    Uses exact-equality plus dotted-prefix matching so that, e.g.,
+    ``pydantic_helpers`` is NOT treated as part of the ``pydantic`` package
+    while ``pydantic.fields`` is. Same for ``data_designer`` vs. a hypothetical
+    ``data_designer_other``.
+    """
+    return any(module_name == prefix or module_name.startswith(prefix + ".") for prefix in prefixes)
+
+
+def warn_at_caller(
+    message: str,
+    category: type[Warning],
+    *,
+    skip_prefixes: tuple[str, ...] = DEFAULT_INTERNAL_PREFIXES,
+) -> None:
+    """Emit ``message`` attributed to the first frame outside ``skip_prefixes``.
+
+    Intended for warnings whose root cause is the user's call site but whose
+    emission point is library code (a pydantic validator, an internal helper,
+    etc.). The walk starts above this helper's own frame and skips every frame
+    whose module belongs to a package in ``skip_prefixes`` until it reaches a
+    user frame.
+
+    The default skip set covers:
+
+    * ``pydantic`` / ``pydantic_core`` — so warnings emitted from
+      ``@model_validator`` callbacks escape pydantic's dispatch frames.
+    * ``data_designer`` — so warnings emitted from a registry / model-config
+      built deep inside a DataDesigner helper still attribute to the outermost
+      user call. Without this, attribution lands on a library file and Python's
+      default ``DeprecationWarning`` filter silences the warning entirely.
+
+    The user frame's ``__warningregistry__`` is passed to
+    ``warnings.warn_explicit`` so Python's built-in once-per-location dedup keys
+    on the *user's* (filename, lineno) rather than an internal frame.
+
+    We deliberately do *not* pass ``module_globals`` — it's only used for
+    ``linecache`` source-line display, and for scripts run with ``python -c``
+    (where the user frame's ``__loader__`` is ``BuiltinImporter`` for
+    ``__main__``) the lookup raises ``ImportError("'__main__' is not a built-in
+    module")``. Skipping ``module_globals`` keeps the warning path robust at
+    the cost of an empty source line in the formatted output.
+    """
+    frame = sys._getframe(1) if hasattr(sys, "_getframe") else None
+    while frame is not None:
+        module_name = frame.f_globals.get("__name__", "")
+        if not _module_in_prefixes(module_name, skip_prefixes):
+            warnings.warn_explicit(
+                message,
+                category,
+                frame.f_code.co_filename,
+                frame.f_lineno,
+                module=module_name,
+                registry=frame.f_globals.setdefault("__warningregistry__", {}),
+            )
+            return
+        frame = frame.f_back
+
+    # Fallback: never escaped library frames (or no frame access). Use stacklevel.
+    warnings.warn(message, category, stacklevel=3)
@@ -868,6 +868,7 @@ def test_add_model_config(stub_empty_builder):
     new_model_config = ModelConfig(
         alias="new-model",
         model="openai/gpt-4",
+        provider="openai",
         inference_parameters=ChatCompletionInferenceParams(
             temperature=0.7,
             top_p=0.95,
@@ -915,6 +916,7 @@ def test_add_model_config_duplicate_alias(stub_empty_builder):
     duplicate_model_config = ModelConfig(
         alias="stub-model",
         model="different/model",
+        provider="some-provider",
         inference_parameters=ChatCompletionInferenceParams(temperature=0.5),
     )
 
@@ -931,11 +933,13 @@ def test_delete_model_config(stub_empty_builder):
     model_config_1 = ModelConfig(
         alias="model-to-delete",
         model="model/delete",
+        provider="some-provider",
         inference_parameters=ChatCompletionInferenceParams(temperature=0.5),
     )
     model_config_2 = ModelConfig(
         alias="model-to-keep",
         model="model/keep",
+        provider="some-provider",
         inference_parameters=ChatCompletionInferenceParams(temperature=0.6),
     )
     stub_empty_builder.add_model_config(model_config_1)

@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import json
+import warnings
 from pathlib import Path
 from unittest.mock import patch
 
@@ -142,19 +143,54 @@ def test_get_default_providers_path_does_not_exist():
 
 
 def test_get_default_provider_name_with_default_key(tmp_path: Path):
+    """When the YAML carries a non-None ``default:``, the function must
+    return that value AND emit a ``DeprecationWarning`` (regression for #589).
+    """
     providers_file_path = tmp_path / "providers.yaml"
     providers_file_path.write_text(
         json.dumps(dict(providers=[p.model_dump() for p in get_builtin_model_providers()], default="nvidia"))
     )
     with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path):
-        assert get_default_provider_name() == "nvidia"
+        with pytest.warns(DeprecationWarning, match="'default:' key.*is deprecated"):
+            assert get_default_provider_name() == "nvidia"
 
 
 def test_get_default_provider_name_without_default_key(tmp_path: Path):
+    """Pin the post-deprecation happy path: a YAML without ``default:`` must
+    return ``None`` and NOT emit a ``DeprecationWarning``.
+    """
     providers_file_path = tmp_path / "providers.yaml"
     providers_file_path.write_text(json.dumps({"providers": [p.model_dump() for p in get_builtin_model_providers()]}))
     with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path):
-        assert get_default_provider_name() is None
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", DeprecationWarning)
+            assert get_default_provider_name() is None
+
+
+def test_get_default_provider_name_warning_attributes_to_user_frame(tmp_path: Path):
+    """Regression for PR #594 review (andreatgretel): the YAML-default warning
+    must attribute to the user's call site, not to ``default_model_settings.py``.
+    Python's default filter ignores library-attributed ``DeprecationWarning``
+    entries, so the previous ``stacklevel=2`` attribution rendered the warning
+    invisible under default filters on the only real call path
+    (``DataDesigner.__init__``). See issue #589.
+    """
+    providers_file_path = tmp_path / "providers.yaml"
+    providers_file_path.write_text(
+        json.dumps(dict(providers=[p.model_dump() for p in get_builtin_model_providers()], default="nvidia"))
+    )
+    with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path):
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always", DeprecationWarning)
+            assert get_default_provider_name() == "nvidia"
+
+    matches = [w for w in caught if "'default:' key" in str(w.message)]
+    assert len(matches) == 1, [str(w.message) for w in caught]
+    assert matches[0].filename == __file__, (
+        f"Warning attributed to {matches[0].filename!r} (line {matches[0].lineno}) "
+        f"instead of the test file. Library-attributed DeprecationWarnings are "
+        f"silenced under default filters."
+    )
 
 
 def test_get_default_provider_name_path_does_not_exist():