From a97c196c3ec8a9b33fc221c5e6d6ffd2a4874809 Mon Sep 17 00:00:00 2001
From: Mike McCarty <mmccarty@nvidia.com>
Date: Wed, 11 Feb 2026 16:05:24 -0500
Subject: [PATCH 1/3] Updated docs

---
 CLAUDE.md                          |  13 +
 dependencies.yaml                  |   6 +
 docs/source/api/checks.rst         | 265 ++++++++++++++++
 docs/source/api/cli.rst            |  62 ++++
 docs/source/api/debug.rst          | 233 ++++++++++++++
 docs/source/api/doctor.rst         | 141 +++++++++
 docs/source/conf.py                |  66 +++-
 docs/source/contributing.rst       | 363 ++++++++++++++++++++++
 docs/source/cuda_driver.rst        |  35 ---
 docs/source/doctor.rst             |  19 --
 docs/source/index.rst              |  70 ++++-
 docs/source/installation.rst       | 106 +++++++
 docs/source/plugin_development.rst | 477 +++++++++++++++++++++++++++++
 docs/source/troubleshooting.rst    | 395 ++++++++++++++++++++++++
 docs/source/user_guide.rst         | 293 ++++++++++++++++++
 15 files changed, 2465 insertions(+), 79 deletions(-)
 create mode 100644 docs/source/api/checks.rst
 create mode 100644 docs/source/api/cli.rst
 create mode 100644 docs/source/api/debug.rst
 create mode 100644 docs/source/api/doctor.rst
 create mode 100644 docs/source/contributing.rst
 delete mode 100644 docs/source/cuda_driver.rst
 delete mode 100644 docs/source/doctor.rst
 create mode 100644 docs/source/installation.rst
 create mode 100644 docs/source/plugin_development.rst
 create mode 100644 docs/source/troubleshooting.rst
 create mode 100644 docs/source/user_guide.rst

diff --git a/CLAUDE.md b/CLAUDE.md
index ef8d92f..c17c1b0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -39,6 +39,19 @@ coverage report
 coverage html && open htmlcov/index.html
 ```
 
+### Documentation
+
+```bash
+# Build HTML documentation
+cd docs && make html
+
+# View documentation
+open docs/build/html/index.html
+
+# Clean build artifacts
+cd docs && make clean
+```
+
 ### Linting and Pre-commit
 
 ```bash
diff --git a/dependencies.yaml b/dependencies.yaml
index 63e2900..12cb729 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -75,6 +75,12 @@ dependencies:
       - output_types: [pyproject, requirements]
         packages:
           - importlib-metadata >= 4.13.0; python_version < '3.12'
+  docs:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - sphinx
+          - sphinx-rtd-theme
   test_python:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/docs/source/api/checks.rst b/docs/source/api/checks.rst
new file mode 100644
index 0000000..acf8775
--- /dev/null
+++ b/docs/source/api/checks.rst
@@ -0,0 +1,265 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Health Checks
+=============
+
+Built-in health check modules for verifying RAPIDS installation requirements.
+
+GPU Checks
+----------
+
+.. automodule:: rapids_cli.doctor.checks.gpu
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+gpu_check
+^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.checks.gpu.gpu_check
+
+Verifies that NVIDIA GPUs are available and accessible.
+
+**Parameters:**
+
+- ``verbose`` (bool): Enable detailed output
+
+**Returns:**
+
+- str: Message indicating number of GPUs detected
+
+**Raises:**
+
+- ValueError: If no GPUs are detected
+- AssertionError: If GPU count is zero
+
+**Example:**
+
+.. code-block:: python
+
+   >>> gpu_check(verbose=True)
+   'GPU(s) detected: 2'
+
+check_gpu_compute_capability
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.checks.gpu.check_gpu_compute_capability
+
+Verifies that all GPUs meet minimum compute capability requirements.
+
+**Parameters:**
+
+- ``verbose`` (bool): Enable detailed output
+
+**Returns:**
+
+- bool: True if all GPUs meet requirements
+
+**Raises:**
+
+- ValueError: If any GPU has insufficient compute capability
+
+**Required Compute Capability:**
+
+- Minimum: 7.0 (Volta architecture or newer)
+- Supported GPUs: V100, A100, H100, RTX 20xx/30xx/40xx series
+
+CUDA Driver Checks
+------------------
+
+.. automodule:: rapids_cli.doctor.checks.cuda_driver
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+cuda_check
+^^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.checks.cuda_driver.cuda_check
+
+Verifies CUDA driver availability and retrieves version.
+
+**Parameters:**
+
+- ``verbose`` (bool): Enable detailed output
+
+**Returns:**
+
+- int: CUDA driver version code (e.g., 12040 for CUDA 12.4)
+
+**Raises:**
+
+- ValueError: If CUDA driver version cannot be determined
+
+**Example:**
+
+.. code-block:: python
+
+   >>> cuda_check(verbose=True)
+   12040
+
+Memory Checks
+-------------
+
+.. automodule:: rapids_cli.doctor.checks.memory
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+get_system_memory
+^^^^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.checks.memory.get_system_memory
+
+Retrieves total system memory in gigabytes.
+
+**Parameters:**
+
+- ``verbose`` (bool): Unused, kept for consistency
+
+**Returns:**
+
+- float: Total system memory in GB
+
+get_gpu_memory
+^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.checks.memory.get_gpu_memory
+
+Calculates total GPU memory across all GPUs in gigabytes.
+
+**Parameters:**
+
+- ``verbose`` (bool): Unused, kept for consistency
+
+**Returns:**
+
+- float: Total GPU memory in GB
+
+check_memory_to_gpu_ratio
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.checks.memory.check_memory_to_gpu_ratio
+
+Verifies system-to-GPU memory ratio meets recommendations.
+
+**Parameters:**
+
+- ``verbose`` (bool): Enable detailed output
+
+**Returns:**
+
+- bool: Always returns True (issues warnings instead of failing)
+
+**Warnings:**
+
+Issues warning if ratio is less than 1.8:1 (below recommended 2:1)
+
+**Recommendation:**
+
+For optimal performance, especially with Dask:
+
+- System memory should be at least 2x total GPU memory
+- Example: 64GB RAM for 32GB total GPU memory (2x 16GB GPUs)
+
+NVLink Checks
+-------------
+
+.. automodule:: rapids_cli.doctor.checks.nvlink
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+check_nvlink_status
+^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.checks.nvlink.check_nvlink_status
+
+Checks for NVLink availability on multi-GPU systems.
+
+**Parameters:**
+
+- ``verbose`` (bool): Enable detailed output
+
+**Returns:**
+
+- bool: False if fewer than 2 GPUs, True if NVLink detected
+
+**Raises:**
+
+- ValueError: If NVLink status check fails on multi-GPU system
+
+**NVLink Benefits:**
+
+- High-bandwidth GPU-to-GPU communication
+- Essential for multi-GPU training and processing
+- Significantly faster than PCIe transfers
+
+**Note:**
+
+Only relevant for multi-GPU systems with NVLink-capable GPUs.
+
+Check Function Contract
+-----------------------
+
+All built-in checks follow these conventions:
+
+Function Signature
+^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+   def check_function(verbose=False, **kwargs):
+       """Brief description of what this check verifies."""
+       pass
+
+**Parameters:**
+
+- ``verbose`` (bool): Whether to provide detailed output
+- ``**kwargs``: Reserved for future compatibility
+
+Return Values
+^^^^^^^^^^^^^
+
+**Success:**
+
+- Return any value (often True or a status string)
+- Returning a string provides information for verbose output
+
+**Failure:**
+
+- Raise an exception with descriptive error message
+- Use ValueError for failed checks
+- Provide actionable guidance in error message
+
+**Warnings:**
+
+- Use ``warnings.warn()`` for non-fatal issues
+- Always set ``stacklevel=2`` for correct source location
+
+Usage in Custom Checks
+-----------------------
+
+Reference these built-in checks when creating custom checks:
+
+.. code-block:: python
+
+   # Example: Custom memory check based on built-in pattern
+   from rapids_cli.doctor.checks.memory import get_gpu_memory
+
+
+   def my_memory_check(verbose=False, **kwargs):
+       """Check if GPU has enough memory for my workload."""
+       gpu_memory = get_gpu_memory()
+
+       required_gb = 16
+       if gpu_memory < required_gb:
+           raise ValueError(
+               f"Insufficient GPU memory: {gpu_memory:.1f}GB available, "
+               f"{required_gb}GB required"
+           )
+
+       if verbose:
+           return f"GPU memory check passed: {gpu_memory:.1f}GB available"
+       return True
diff --git a/docs/source/api/cli.rst b/docs/source/api/cli.rst
new file mode 100644
index 0000000..bd916ff
--- /dev/null
+++ b/docs/source/api/cli.rst
@@ -0,0 +1,62 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+CLI Module
+==========
+
+The CLI module provides the main command-line interface for RAPIDS CLI using Click.
+
+.. automodule:: rapids_cli.cli
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Main Commands
+-------------
+
+rapids
+^^^^^^
+
+.. autofunction:: rapids_cli.cli.rapids
+
+The main CLI entry point. Provides access to all subcommands.
+
+doctor
+^^^^^^
+
+.. autofunction:: rapids_cli.cli.doctor
+
+Run health checks to verify RAPIDS installation.
+
+**Options:**
+
+- ``--verbose``: Enable detailed output
+- ``--dry-run``: Show which checks would run without executing them
+- ``filters``: Optional filters to run specific checks
+
+**Exit Codes:**
+
+- 0: All checks passed
+- 1: One or more checks failed
+
+debug
+^^^^^
+
+.. autofunction:: rapids_cli.cli.debug
+
+Gather comprehensive debugging information.
+
+**Options:**
+
+- ``--json``: Output in JSON format for machine parsing
+
+**Output:**
+
+Returns detailed system information including:
+
+- Platform and OS details
+- GPU and driver information
+- CUDA version
+- Python configuration
+- Installed packages
+- Available tools
diff --git a/docs/source/api/debug.rst b/docs/source/api/debug.rst
new file mode 100644
index 0000000..b4ce4b4
--- /dev/null
+++ b/docs/source/api/debug.rst
@@ -0,0 +1,233 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Debug Module
+============
+
+The debug module gathers comprehensive system information for troubleshooting.
+
+.. automodule:: rapids_cli.debug.debug
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Core Functions
+--------------
+
+run_debug
+^^^^^^^^^
+
+.. autofunction:: rapids_cli.debug.debug.run_debug
+
+Main function for gathering and displaying debug information.
+
+**Parameters:**
+
+- ``output_format`` (str): Output format, either "console" or "json"
+
+**Collected Information:**
+
+- Date and time
+- Platform information
+- nvidia-smi output
+- NVIDIA driver version
+- CUDA version
+- CUDA runtime path
+- System CUDA toolkit locations
+- Python version (full and short)
+- Python hash info
+- All installed package versions
+- pip freeze output
+- conda list output (if available)
+- conda info output (if available)
+- Available development tools
+- OS information from /etc/os-release
+
+gather_cuda_version
+^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.debug.debug.gather_cuda_version
+
+Retrieves and formats CUDA driver version from pynvml.
+
+**Returns:**
+
+- str: CUDA version in format "Major.Minor" or "Major.Minor.Patch"
+
+**Example:**
+
+.. code-block:: python
+
+   >>> gather_cuda_version()
+   '12.4'
+
+gather_package_versions
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.debug.debug.gather_package_versions
+
+Collects versions of all installed Python packages.
+
+**Returns:**
+
+- dict: Mapping of package names to version strings
+
+**Example:**
+
+.. code-block:: python
+
+   >>> versions = gather_package_versions()
+   >>> versions['rapids-cli']
+   '0.1.0'
+
+gather_command_output
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.debug.debug.gather_command_output
+
+Executes a command and returns its output, with optional fallback.
+
+**Parameters:**
+
+- ``command`` (list[str]): Command and arguments to execute
+- ``fallback_output`` (str | None): Value to return if command fails
+
+**Returns:**
+
+- str | None: Command output or fallback value
+
+**Example:**
+
+.. code-block:: python
+
+   >>> gather_command_output(['pip', '--version'])
+   'pip 24.0 from /usr/local/lib/python3.10/site-packages/pip (python 3.10)'
+
+   >>> gather_command_output(['nonexistent'], fallback_output='Not installed')
+   'Not installed'
+
+gather_tools
+^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.debug.debug.gather_tools
+
+Gathers version information for common development tools.
+
+**Returns:**
+
+- dict: Tool names mapped to version strings or None
+
+**Checked Tools:**
+
+- pip
+- conda
+- uv
+- pixi
+- g++
+- cmake
+- nvcc
+
+Output Formats
+--------------
+
+Console Format
+^^^^^^^^^^^^^^
+
+Human-readable output with Rich formatting:
+
+.. code-block:: text
+
+   RAPIDS Debug Information
+
+   Date
+   2025-02-11 15:30:00
+
+   Platform
+   Linux-6.8.0-94-generic-x86_64
+
+   Driver Version
+   550.54.15
+
+   Cuda Version
+   12.4
+
+   Package Versions
+   ┌─────────────────┬──────────┐
+   │ rapids-cli      │ 0.1.0    │
+   │ cudf            │ 25.02.0  │
+   └─────────────────┴──────────┘
+
+JSON Format
+^^^^^^^^^^^
+
+Machine-readable output for automation:
+
+.. code-block:: json
+
+   {
+     "date": "2025-02-11 15:30:00",
+     "platform": "Linux-6.8.0-94-generic-x86_64",
+     "nvidia_smi_output": "...",
+     "driver_version": "550.54.15",
+     "cuda_version": "12.4",
+     "cuda_runtime_path": "/usr/local/cuda/include",
+     "system_ctk": ["/usr/local/cuda-12.4"],
+     "python_version_full": "3.13.12 (main, ...)",
+     "python_version": "3.13.12",
+     "python_hash_info": "sys.hash_info(...)",
+     "package_versions": {
+       "rapids-cli": "0.1.0"
+     },
+     "pip_packages": "...",
+     "conda_packages": "...",
+     "conda_info": "...",
+     "tools": {
+       "pip": "pip 24.0",
+       "conda": "conda 24.1.0"
+     },
+     "os_info": {
+       "NAME": "Ubuntu",
+       "VERSION": "22.04"
+     }
+   }
+
+Usage Examples
+--------------
+
+Console Output
+^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+   from rapids_cli.debug.debug import run_debug
+
+   # Display debug info in console
+   run_debug(output_format="console")
+
+JSON Output
+^^^^^^^^^^^
+
+.. code-block:: python
+
+   import json
+   from rapids_cli.debug.debug import run_debug
+
+   # Get JSON output
+   run_debug(output_format="json")
+
+   # Can be captured with redirection
+   # rapids debug --json > debug.json
+
+Programmatic Access
+^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+   from rapids_cli.debug.debug import gather_package_versions, gather_cuda_version
+
+   # Get specific information
+   cuda_ver = gather_cuda_version()
+   packages = gather_package_versions()
+
+   print(f"CUDA: {cuda_ver}")
+   print(f"Installed packages: {len(packages)}")
diff --git a/docs/source/api/doctor.rst b/docs/source/api/doctor.rst
new file mode 100644
index 0000000..16ab6ad
--- /dev/null
+++ b/docs/source/api/doctor.rst
@@ -0,0 +1,141 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Doctor Module
+=============
+
+The doctor module orchestrates health check execution and plugin discovery.
+
+.. automodule:: rapids_cli.doctor.doctor
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Core Functions
+--------------
+
+doctor_check
+^^^^^^^^^^^^
+
+.. autofunction:: rapids_cli.doctor.doctor.doctor_check
+
+The main orchestration function for running health checks.
+
+**Parameters:**
+
+- ``verbose`` (bool): Enable detailed output
+- ``dry_run`` (bool): Discover checks without executing them
+- ``filters`` (list[str] | None): Optional filters to match check paths
+
+**Returns:**
+
+- bool: True if all checks passed, False if any failed
+
+**Process:**
+
+1. Discovers all registered checks via entry points
+2. Filters checks based on provided filters
+3. Executes each check and captures results
+4. Collects warnings from checks
+5. Displays results and returns success status
+
+CheckResult
+^^^^^^^^^^^
+
+.. autoclass:: rapids_cli.doctor.doctor.CheckResult
+   :members:
+
+Data class representing the result of a single check execution.
+
+**Attributes:**
+
+- ``name`` (str): Name of the check function
+- ``description`` (str): First line of check's docstring
+- ``status`` (bool): True if check passed, False if failed
+- ``value`` (str | None): Optional return value for verbose output
+- ``error`` (Exception | None): Exception if check failed
+- ``warnings`` (list[WarningMessage] | None): Any warnings issued during check
+
+Plugin Discovery
+----------------
+
+The doctor module discovers plugins using Python entry points:
+
+.. code-block:: python
+
+   from importlib.metadata import entry_points
+
+   for ep in entry_points(group="rapids_doctor_check"):
+       check_fn = ep.load()
+       # Execute check
+
+Entry Point Group
+^^^^^^^^^^^^^^^^^
+
+Plugins register in the ``rapids_doctor_check`` group:
+
+.. code-block:: toml
+
+   [project.entry-points.rapids_doctor_check]
+   my_check = "my_package.checks:my_check_function"
+
+Check Execution Flow
+--------------------
+
+1. **Discovery Phase**
+
+   - Scan entry points for ``rapids_doctor_check`` group
+   - Load check functions
+   - Apply filters if specified
+
+2. **Execution Phase**
+
+   - Run each check with ``verbose`` parameter
+   - Capture warnings using ``warnings.catch_warnings()``
+   - Catch exceptions for failed checks
+   - Store results in CheckResult objects
+
+3. **Reporting Phase**
+
+   - Display warnings
+   - Show verbose output if requested
+   - List failed checks with error messages
+   - Return overall success status
+
+Error Handling
+--------------
+
+The doctor module handles several error scenarios:
+
+**Import Errors**
+
+Failed imports during discovery are suppressed with ``contextlib.suppress``:
+
+.. code-block:: python
+
+   with contextlib.suppress(AttributeError, ImportError):
+       check_fn = ep.load()
+
+**Check Exceptions**
+
+Exceptions raised by checks are caught and stored:
+
+.. code-block:: python
+
+   try:
+       value = check_fn(verbose=verbose)
+       status = True
+   except Exception as e:
+       error = e
+       status = False
+
+**Warnings**
+
+Python warnings are captured and displayed:
+
+.. code-block:: python
+
+   with warnings.catch_warnings(record=True) as w:
+       warnings.simplefilter("always")
+       value = check_fn(verbose=verbose)
+       caught_warnings = w
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 3b59adc..2303b4a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,5 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
-# All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # Configuration file for the Sphinx documentation builder.
 #
@@ -13,32 +12,73 @@
 import os
 import sys
 
-sys.path.insert(0, os.path.abspath("../rapids_cli"))
+sys.path.insert(0, os.path.abspath("../../"))
 
 
 project = "RAPIDS CLI"
-copyright = "2024, NVIDIA RAPIDS"
+copyright = "2025-2026, NVIDIA CORPORATION & AFFILIATES"
 author = "NVIDIA RAPIDS"
-release = "2024"
+
+# The short X.Y version
+version = "0.1"
+# The full version, including alpha/beta/rc tags
+release = "0.1.0"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-extensions = []
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.napoleon",  # For Google and NumPy style docstrings
+    "sphinx.ext.intersphinx",  # Link to other project docs
+    "sphinx.ext.todo",  # Support for todo items
+]
 
 templates_path = ["_templates"]
 exclude_patterns = []
 
+# Napoleon settings for Google-style docstrings
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+napoleon_include_init_with_doc = True
+napoleon_include_private_with_doc = False
+napoleon_include_special_with_doc = True
+napoleon_use_admonition_for_examples = True
+napoleon_use_admonition_for_notes = True
+napoleon_use_admonition_for_references = True
+napoleon_use_ivar = False
+napoleon_use_param = True
+napoleon_use_rtype = True
+
+# Autodoc settings
+autodoc_default_options = {
+    "members": True,
+    "member-order": "bysource",
+    "special-members": "__init__",
+    "undoc-members": True,
+    "exclude-members": "__weakref__",
+}
+
+# Intersphinx mapping
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+}
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = "alabaster"
+html_theme = "sphinx_rtd_theme"
 html_static_path = ["_static"]
 
-extensions = [
-    "sphinx.ext.autodoc",
-    "sphinx.ext.autosummary",
-    "sphinx.ext.viewcode",
-    "sphinx.ext.napoleon",  # For Google and NumPy style docstrings
-]
+html_theme_options = {
+    "navigation_depth": 4,
+    "collapse_navigation": False,
+    "sticky_navigation": True,
+    "includehidden": True,
+}
+
+# Add any paths that contain custom static files (such as style sheets)
+html_logo = None
+html_favicon = None
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
new file mode 100644
index 0000000..9d721f3
--- /dev/null
+++ b/docs/source/contributing.rst
@@ -0,0 +1,363 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Contributing Guide
+==================
+
+Thank you for your interest in contributing to RAPIDS CLI! This guide will help you get started.
+
+Getting Started
+---------------
+
+Prerequisites
+^^^^^^^^^^^^^
+
+- Python 3.10 or later
+- Git
+- NVIDIA GPU (for testing)
+- NVIDIA drivers and CUDA toolkit
+
+Development Setup
+^^^^^^^^^^^^^^^^^
+
+1. Fork and clone the repository:
+
+   .. code-block:: bash
+
+      git clone https://github.com/YOUR_USERNAME/rapids-cli.git
+      cd rapids-cli
+
+2. Create a development environment:
+
+   .. code-block:: bash
+
+      # Using conda (recommended)
+      conda create -n rapids-cli-dev python=3.10
+      conda activate rapids-cli-dev
+
+      # Or using venv
+      python -m venv venv
+      source venv/bin/activate
+
+3. Install in editable mode with test dependencies:
+
+   .. code-block:: bash
+
+      pip install -e .[test]
+
+4. Install pre-commit hooks:
+
+   .. code-block:: bash
+
+      pre-commit install
+
+Development Workflow
+--------------------
+
+Making Changes
+^^^^^^^^^^^^^^
+
+1. Create a feature branch:
+
+   .. code-block:: bash
+
+      git checkout -b feature/your-feature-name
+
+2. Make your changes following the code style guidelines
+
+3. Add tests for your changes
+
+4. Run tests locally:
+
+   .. code-block:: bash
+
+      pytest
+
+5. Run linting checks:
+
+   .. code-block:: bash
+
+      pre-commit run --all-files
+
+Code Style
+----------
+
+The project uses several linting tools to maintain code quality:
+
+Formatting
+^^^^^^^^^^
+
+- **Black**: Code formatting (120 char line length)
+- **isort**: Import sorting
+
+Linting
+^^^^^^^
+
+- **Ruff**: Fast Python linter (replaces flake8, pylint, etc.)
+- **mypy**: Static type checking
+
+Run formatters and linters:
+
+.. code-block:: bash
+
+   # Format code
+   black .
+
+   # Check with ruff
+   ruff check --fix .
+
+   # Type check
+   mypy rapids_cli/
+
+Docstrings
+^^^^^^^^^^
+
+Use Google-style docstrings:
+
+.. code-block:: python
+
+   def my_function(param1: str, param2: int) -> bool:
+       """Brief description of the function.
+
+       Longer description if needed.
+
+       Args:
+           param1: Description of param1.
+           param2: Description of param2.
+
+       Returns:
+           Description of return value.
+
+       Raises:
+           ValueError: Description of when this is raised.
+
+       Example:
+           >>> my_function("test", 42)
+           True
+       """
+       pass
+
+Testing
+-------
+
+Writing Tests
+^^^^^^^^^^^^^
+
+- Place tests in ``rapids_cli/tests/``
+- Use pytest for testing
+- Mock external dependencies (pynvml, subprocess calls, etc.)
+- Aim for high coverage (95%+ required)
+
+Test Structure:
+
+.. code-block:: python
+
+   # rapids_cli/tests/test_my_feature.py
+   from unittest.mock import patch, MagicMock
+   import pytest
+
+   from rapids_cli.my_module import my_function
+
+
+   def test_my_function_success():
+       """Test that my_function works in normal case."""
+       result = my_function("input")
+       assert result == "expected"
+
+
+   def test_my_function_failure():
+       """Test that my_function handles errors correctly."""
+       with pytest.raises(ValueError, match="error message"):
+           my_function("invalid")
+
+
+   def test_my_function_with_mock():
+       """Test my_function with mocked dependencies."""
+       with patch("pynvml.nvmlInit") as mock_init:
+           result = my_function()
+           mock_init.assert_called_once()
+
+Running Tests
+^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   # Run all tests
+   pytest
+
+   # Run specific test file
+   pytest rapids_cli/tests/test_doctor.py
+
+   # Run with coverage
+   pytest --cov=rapids_cli
+
+   # Run specific test
+   pytest rapids_cli/tests/test_doctor.py::test_doctor_check_all_pass
+
+Pull Request Process
+--------------------
+
+1. Ensure all tests pass and coverage is maintained
+
+2. Update documentation if needed
+
+3. Sign your commits:
+
+   .. code-block:: bash
+
+      git commit -s -m "Your commit message"
+
+4. Push to your fork:
+
+   .. code-block:: bash
+
+      git push origin feature/your-feature-name
+
+5. Create a pull request on GitHub
+
+6. Address review feedback
+
+Commit Messages
+^^^^^^^^^^^^^^^
+
+Follow conventional commit format:
+
+.. code-block:: text
+
+   <type>: <short summary>
+
+   <detailed description>
+
+   Signed-off-by: Your Name <your.email@example.com>
+
+Types:
+
+- ``feat``: New feature
+- ``fix``: Bug fix
+- ``docs``: Documentation changes
+- ``test``: Adding or updating tests
+- ``refactor``: Code refactoring
+- ``ci``: CI/CD changes
+- ``chore``: Maintenance tasks
+
+Example:
+
+.. code-block:: text
+
+   feat: add support for filtering checks by package name
+
+   This allows users to run only specific checks by providing
+   filter arguments to the doctor command.
+
+   Signed-off-by: Jane Doe <jane@example.com>
+
+Documentation
+-------------
+
+Building Documentation
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   cd docs
+   make html
+
+   # View in browser
+   open build/html/index.html
+
+Documentation lives in ``docs/source/`` and uses Sphinx with reStructuredText.
+
+Adding New Documentation
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+1. Create ``.rst`` file in ``docs/source/``
+
+2. Add to table of contents in ``index.rst``
+
+3. Build and verify:
+
+   .. code-block:: bash
+
+      cd docs
+      make html
+
+Reporting Issues
+----------------
+
+When reporting bugs:
+
+1. Check if issue already exists
+
+2. Provide minimal reproduction example
+
+3. Include debug output:
+
+   .. code-block:: bash
+
+      rapids debug --json > debug_info.json
+
+4. Include:
+
+   - RAPIDS CLI version
+   - Python version
+   - OS and driver versions
+   - Expected vs actual behavior
+
+Feature Requests
+^^^^^^^^^^^^^^^^
+
+For feature requests:
+
+1. Describe the use case
+
+2. Explain why existing features don't work
+
+3. Provide example usage
+
+4. Consider contributing the feature!
+
+Code Review Guidelines
+----------------------
+
+For Reviewers
+^^^^^^^^^^^^^
+
+- Check that tests cover new functionality
+- Verify documentation is updated
+- Ensure code style is consistent
+- Look for potential edge cases
+- Validate error messages are helpful
+
+For Contributors
+^^^^^^^^^^^^^^^^
+
+- Respond to feedback promptly
+- Ask questions if feedback is unclear
+- Keep PRs focused on single concern
+- Update based on reviews
+
+Release Process
+---------------
+
+Releases are managed by maintainers:
+
+1. Version is managed via git tags
+2. CI automatically builds packages
+3. Packages published to PyPI and conda-forge
+
+Community
+---------
+
+- GitHub Discussions: Q&A and ideas
+- Slack: Real-time chat at rapids.ai/community
+- Issues: Bug reports and features
+
+License
+-------
+
+By contributing, you agree that your contributions will be licensed under the Apache 2.0 License.
+
+Thank You!
+----------
+
+Every contribution helps make RAPIDS CLI better. Thank you for your time and effort!
diff --git a/docs/source/cuda_driver.rst b/docs/source/cuda_driver.rst
deleted file mode 100644
index a4d69d6..0000000
--- a/docs/source/cuda_driver.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-.. _cuda_driver:
-
-CUDA Driver Checks
-==================
-
-This module provides functions to check the availability of CUDA, retrieve CUDA version,
-and verify compatibility between the CUDA toolkit and NVIDIA drivers.
-
-Functions
----------
-.. autofunction:: rapids_cli.doctor.checks.cuda_driver.cuda_check
-
-   Checks if CUDA is available on the system by initializing the NVML and retrieving the CUDA driver version.
-
-   :return: True if CUDA is available, False otherwise.
-
-.. autofunction:: rapids_cli.doctor.checks.cuda_driver.get_cuda_version
-
-   Retrieves the version of the installed CUDA toolkit.
-
-   :return: A string representing the CUDA version or None if CUDA is not found.
-
-.. autofunction:: rapids_cli.doctor.checks.cuda_driver.get_driver_version
-
-   Retrieves the installed NVIDIA driver version.
-
-   :return: A string representing the NVIDIA driver version or None if the driver is not found.
-
-.. autofunction:: rapids_cli.doctor.checks.cuda_driver.check_driver_compatibility
-
-   Checks the compatibility between the installed CUDA version and the NVIDIA driver version.
-
-   This function prints whether the installed versions are compatible with RAPIDS.
-
-   :return: None
diff --git a/docs/source/doctor.rst b/docs/source/doctor.rst
deleted file mode 100644
index 33be8d8..0000000
--- a/docs/source/doctor.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-.. _doctor:
-
-Doctor
-=========
-
-Overview of Doctor.
-
-.. automodule:: doctor
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
-----------
-
-.. automodule:: doctor.checks
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 94df6bc..9c27bfa 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,26 +1,72 @@
-.. RAPIDS CLI documentation master file, created by
-   sphinx-quickstart on Fri Oct 25 10:50:48 2024.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
 
-RAPIDS CLI documentation
+RAPIDS CLI Documentation
 ========================
 
-Add your content using ``reStructuredText`` syntax. See the
-`reStructuredText <https://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html>`_
-documentation for details.
+The RAPIDS CLI is a command-line tool for performing common RAPIDS operations,
+primarily focused on health checks (``rapids doctor``) and debugging (``rapids debug``).
+It uses a plugin system that allows RAPIDS libraries to register their own health checks
+via Python entry points.
 
+.. image:: https://img.shields.io/badge/python-3.10+-blue.svg
+   :target: https://www.python.org/downloads/
+   :alt: Python Version
+
+.. image:: https://img.shields.io/badge/license-Apache%202.0-blue.svg
+   :target: https://github.com/rapidsai/rapids-cli/blob/main/LICENSE
+   :alt: License
+
+Quick Start
+-----------
+
+Install the RAPIDS CLI:
+
+.. code-block:: bash
+
+   pip install rapids-cli
+
+Run health checks:
+
+.. code-block:: bash
+
+   rapids doctor
+
+Gather debugging information:
+
+.. code-block:: bash
+
+   rapids debug --json
+
+Documentation Contents
+----------------------
 
 .. toctree::
    :maxdepth: 2
-   :caption: Contents:
+   :caption: User Guide
+
+   installation
+   user_guide
+   troubleshooting
 
-   doctor
-   cuda_driver
+.. toctree::
+   :maxdepth: 2
+   :caption: Developer Guide
+
+   plugin_development
+   contributing
+
+.. toctree::
+   :maxdepth: 2
+   :caption: API Reference
 
+   api/cli
+   api/doctor
+   api/debug
+   api/checks
 
 Indices and tables
-===================
+==================
 
 * :ref:`genindex`
 * :ref:`modindex`
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
new file mode 100644
index 0000000..213d474
--- /dev/null
+++ b/docs/source/installation.rst
@@ -0,0 +1,106 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Installation
+============
+
+Requirements
+------------
+
+- Python 3.10 or later
+- NVIDIA GPU (for running health checks)
+- NVIDIA drivers installed
+- CUDA toolkit (optional, for full functionality)
+
+Installation Methods
+--------------------
+
+From PyPI
+^^^^^^^^^
+
+The simplest way to install RAPIDS CLI is via pip:
+
+.. code-block:: bash
+
+   pip install rapids-cli
+
+From Conda
+^^^^^^^^^^
+
+You can also install via conda:
+
+.. code-block:: bash
+
+   conda install -c rapidsai -c conda-forge rapids-cli
+
+From Source
+^^^^^^^^^^^
+
+For development or to get the latest features:
+
+.. code-block:: bash
+
+   git clone https://github.com/rapidsai/rapids-cli.git
+   cd rapids-cli
+   pip install -e .
+
+With Test Dependencies
+^^^^^^^^^^^^^^^^^^^^^^
+
+To run tests locally:
+
+.. code-block:: bash
+
+   pip install -e .[test]
+
+Verification
+------------
+
+Verify the installation by running:
+
+.. code-block:: bash
+
+   rapids --help
+
+You should see the RAPIDS CLI help message with available commands.
+
+Quick Test
+^^^^^^^^^^
+
+Run a quick health check to verify everything is working:
+
+.. code-block:: bash
+
+   rapids doctor --verbose
+
+This will check your GPU availability, CUDA installation, and system configuration.
+
+Upgrading
+---------
+
+To upgrade to the latest version:
+
+.. code-block:: bash
+
+   pip install --upgrade rapids-cli
+
+Or with conda:
+
+.. code-block:: bash
+
+   conda update rapids-cli
+
+Uninstalling
+------------
+
+To uninstall RAPIDS CLI:
+
+.. code-block:: bash
+
+   pip uninstall rapids-cli
+
+Or with conda:
+
+.. code-block:: bash
+
+   conda remove rapids-cli
diff --git a/docs/source/plugin_development.rst b/docs/source/plugin_development.rst
new file mode 100644
index 0000000..f4aa2ae
--- /dev/null
+++ b/docs/source/plugin_development.rst
@@ -0,0 +1,477 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Plugin Development Guide
+========================
+
+The RAPIDS CLI uses a plugin system based on Python entry points to allow external packages
+to register their own health checks. This guide shows you how to create plugins for your
+RAPIDS library.
+
+Overview
+--------
+
+Plugins are discovered automatically through Python entry points in the ``rapids_doctor_check``
+group. When ``rapids doctor`` runs, it discovers all registered checks and executes them.
+
+Quick Start
+-----------
+
+Here's a minimal example of adding a check to your RAPIDS package:
+
+1. Create a check function in your package:
+
+   .. code-block:: python
+
+      # my_rapids_package/health_checks.py
+
+
+      def my_package_check(verbose=False, **kwargs):
+          """Check that my_rapids_package is working correctly."""
+          import my_rapids_package
+
+          # Perform your check
+          result = my_rapids_package.test_function()
+
+          if not result:
+              raise ValueError("my_rapids_package self-test failed")
+
+          return "my_rapids_package is working correctly"
+
+2. Register the check in your ``pyproject.toml``:
+
+   .. code-block:: toml
+
+      [project.entry-points.rapids_doctor_check]
+      my_package_check = "my_rapids_package.health_checks:my_package_check"
+
+3. Install your package and test:
+
+   .. code-block:: bash
+
+      pip install -e .
+      rapids doctor --verbose
+
+Check Function Contract
+-----------------------
+
+Your check function must follow these conventions:
+
+Function Signature
+^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+   def my_check(verbose=False, **kwargs):
+       """Check description goes here."""
+       pass
+
+- Accept ``verbose`` parameter (boolean, default False)
+- Accept ``**kwargs`` for forward compatibility
+- Provide a clear docstring (first line is used in output)
+
+Return Values
+^^^^^^^^^^^^^
+
+**Success**: Return successfully (any return value)
+
+.. code-block:: python
+
+   def check_success(verbose=False, **kwargs):
+       """This check always passes."""
+       # Option 1: Return None (implicit)
+       return
+
+
+   def check_with_info(verbose=False, **kwargs):
+       """This check passes with info."""
+       # Option 2: Return a string for verbose output
+       return "GPU 0: Tesla V100, 32GB memory"
+
+**Failure**: Raise an exception with a helpful message
+
+.. code-block:: python
+
+   def check_failure(verbose=False, **kwargs):
+       """This check fails with helpful message."""
+       if not some_condition():
+           raise ValueError(
+               "Check failed: XYZ is not configured correctly. "
+               "To fix this, run: sudo apt-get install xyz"
+           )
+
+**Warnings**: Use ``warnings.warn()`` for non-fatal issues
+
+.. code-block:: python
+
+   import warnings
+
+
+   def check_with_warning(verbose=False, **kwargs):
+       """This check passes but issues a warning."""
+       if not optimal_condition():
+           warnings.warn(
+               "Suboptimal configuration detected. " "Performance may be degraded.",
+               stacklevel=2,
+           )
+       return True
+
+Examples
+--------
+
+Example 1: Basic Import Check
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Check that your package can be imported:
+
+.. code-block:: python
+
+   def import_check(verbose=False, **kwargs):
+       """Check that my_package can be imported."""
+       try:
+           import my_package
+       except ImportError as e:
+           raise ImportError(
+               "my_package not found. Install with: pip install my_package"
+           ) from e
+
+       if verbose:
+           return f"my_package version {my_package.__version__}"
+       return True
+
+Example 2: GPU Memory Check
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Check GPU memory requirements:
+
+.. code-block:: python
+
+   import pynvml
+
+
+   def gpu_memory_check(verbose=False, **kwargs):
+       """Check that GPU has sufficient memory for my_package."""
+       pynvml.nvmlInit()
+
+       required_memory_gb = 8
+       handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+       memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+       available_gb = memory_info.total / (1024**3)
+
+       if available_gb < required_memory_gb:
+           raise ValueError(
+               f"Insufficient GPU memory: {available_gb:.1f}GB available, "
+               f"{required_memory_gb}GB required"
+           )
+
+       if verbose:
+           return f"GPU memory: {available_gb:.1f}GB available"
+       return True
+
+Example 3: Dependency Version Check
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Check that dependencies meet version requirements:
+
+.. code-block:: python
+
+   import warnings
+   from packaging import version
+
+
+   def dependency_version_check(verbose=False, **kwargs):
+       """Check that dependencies meet minimum version requirements."""
+       import numpy
+       import pandas
+
+       min_numpy = "1.20.0"
+       min_pandas = "1.3.0"
+
+       if version.parse(numpy.__version__) < version.parse(min_numpy):
+           raise ValueError(
+               f"NumPy {min_numpy}+ required, found {numpy.__version__}. "
+               f"Upgrade with: pip install 'numpy>={min_numpy}'"
+           )
+
+       if version.parse(pandas.__version__) < version.parse(min_pandas):
+           warnings.warn(
+               f"Pandas {min_pandas}+ recommended for best performance. "
+               f"Found {pandas.__version__}.",
+               stacklevel=2,
+           )
+
+       if verbose:
+           return f"NumPy {numpy.__version__}, Pandas {pandas.__version__}"
+       return True
+
+Example 4: Functional Test
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Run a simple functional test:
+
+.. code-block:: python
+
+   def functional_check(verbose=False, **kwargs):
+       """Run a simple functional test."""
+       import my_package
+       import numpy as np
+
+       try:
+           # Create test data
+           data = np.random.rand(100, 10)
+
+           # Run simple operation
+           result = my_package.process(data)
+
+           # Verify result
+           assert result.shape == (100, 10), "Unexpected output shape"
+           assert not np.isnan(result).any(), "NaN values in output"
+
+       except Exception as e:
+           raise RuntimeError(
+               f"Functional test failed: {e}. " "This may indicate a GPU or driver issue."
+           ) from e
+
+       if verbose:
+           return "Functional test passed: basic operations working"
+       return True
+
+Best Practices
+--------------
+
+Clear Error Messages
+^^^^^^^^^^^^^^^^^^^^
+
+Always provide actionable error messages:
+
+.. code-block:: python
+
+   # Bad: Unclear what to do
+   raise ValueError("Check failed")
+
+   # Good: Clear action to fix
+   raise ValueError(
+       "CUDA 11.2+ required but CUDA 10.2 found. "
+       "Upgrade CUDA: https://developer.nvidia.com/cuda-downloads"
+   )
+
+Performance
+^^^^^^^^^^^
+
+Keep checks fast (< 1 second each):
+
+.. code-block:: python
+
+   # Bad: Slow check
+   def slow_check(verbose=False, **kwargs):
+       """This check is too slow."""
+       result = expensive_computation()  # Takes 30 seconds
+       return result
+
+
+   # Good: Fast check
+   def fast_check(verbose=False, **kwargs):
+       """This check is appropriately fast."""
+       # Just verify configuration, don't run full workload
+       config = load_config()
+       validate_config(config)
+       return True
+
+Verbose Output
+^^^^^^^^^^^^^^
+
+Provide useful information in verbose mode:
+
+.. code-block:: python
+
+   def informative_check(verbose=False, **kwargs):
+       """Check with informative output."""
+       gpu_count = get_gpu_count()
+       gpu_memory = get_total_gpu_memory()
+
+       if gpu_count == 0:
+           raise ValueError("No GPUs found")
+
+       if verbose:
+           return f"Found {gpu_count} GPU(s) " f"with {gpu_memory:.1f}GB total memory"
+       return True
+
+Graceful Degradation
+^^^^^^^^^^^^^^^^^^^^
+
+Handle optional dependencies gracefully:
+
+.. code-block:: python
+
+   def optional_dependency_check(verbose=False, **kwargs):
+       """Check that works with optional dependencies."""
+       try:
+           import optional_package
+
+           has_optional = True
+       except ImportError:
+           has_optional = False
+
+       if not has_optional:
+           import warnings
+
+           warnings.warn(
+               "optional_package not found. " "Some features will be disabled.",
+               stacklevel=2,
+           )
+
+       # Continue with check anyway
+       return True
+
+Testing Your Plugin
+-------------------
+
+Test Plugin Discovery
+^^^^^^^^^^^^^^^^^^^^^
+
+Verify your check is discovered:
+
+.. code-block:: bash
+
+   rapids doctor --verbose --dry-run | grep my_check
+
+Test Plugin Execution
+^^^^^^^^^^^^^^^^^^^^^
+
+Run your check:
+
+.. code-block:: bash
+
+   rapids doctor --verbose my_package
+
+Unit Testing
+^^^^^^^^^^^^
+
+Test your check function directly:
+
+.. code-block:: python
+
+   # test_health_checks.py
+   import pytest
+   from my_package.health_checks import my_check
+
+
+   def test_my_check_success():
+       """Test that check passes in normal conditions."""
+       result = my_check(verbose=True)
+       assert result is not None
+
+
+   def test_my_check_failure():
+       """Test that check fails appropriately."""
+       with pytest.raises(ValueError, match="expected error"):
+           my_check_with_bad_config(verbose=False)
+
+Advanced Topics
+---------------
+
+Multiple Checks per Package
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Register multiple checks:
+
+.. code-block:: toml
+
+   [project.entry-points.rapids_doctor_check]
+   my_pkg_import = "my_package.checks:import_check"
+   my_pkg_gpu = "my_package.checks:gpu_check"
+   my_pkg_functional = "my_package.checks:functional_check"
+
+Check Dependencies
+^^^^^^^^^^^^^^^^^^
+
+If checks have dependencies, handle them gracefully:
+
+.. code-block:: python
+
+   def dependent_check(verbose=False, **kwargs):
+       """This check depends on GPU check passing."""
+       # Don't fail if dependencies aren't met
+       try:
+           import pynvml
+
+           pynvml.nvmlInit()
+       except Exception:
+           warnings.warn("GPU not available, skipping dependent check", stacklevel=2)
+           return True
+
+       # Rest of check
+       return True
+
+Environment-Specific Checks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Adapt checks to different environments:
+
+.. code-block:: python
+
+   import os
+
+
+   def environment_aware_check(verbose=False, **kwargs):
+       """Check that adapts to environment."""
+       is_ci = os.environ.get("CI") == "true"
+
+       if is_ci:
+           # Skip expensive checks in CI
+           return "Skipped in CI environment"
+
+       # Run full check
+       run_expensive_validation()
+       return True
+
+Troubleshooting
+---------------
+
+Check Not Discovered
+^^^^^^^^^^^^^^^^^^^^
+
+If your check isn't showing up:
+
+1. Verify entry point is correct:
+
+   .. code-block:: bash
+
+      python -c "from importlib.metadata import entry_points; print([ep for ep in entry_points(group='rapids_doctor_check')])"
+
+2. Reinstall your package:
+
+   .. code-block:: bash
+
+      pip install -e . --force-reinstall --no-deps
+
+3. Check for import errors:
+
+   .. code-block:: python
+
+      python -c "from my_package.checks import my_check"
+
+Check Always Fails
+^^^^^^^^^^^^^^^^^^
+
+Debug the check directly:
+
+.. code-block:: python
+
+   from my_package.checks import my_check
+
+   try:
+       result = my_check(verbose=True)
+       print(f"Success: {result}")
+   except Exception as e:
+       print(f"Failed: {e}")
+       import traceback
+
+       traceback.print_exc()
+
+Resources
+---------
+
+- Entry points documentation: https://packaging.python.org/specifications/entry-points/
+- RAPIDS CLI repository: https://github.com/rapidsai/rapids-cli
+- Example plugins: See built-in checks in ``rapids_cli/doctor/checks/``
diff --git a/docs/source/troubleshooting.rst b/docs/source/troubleshooting.rst
new file mode 100644
index 0000000..014f39f
--- /dev/null
+++ b/docs/source/troubleshooting.rst
@@ -0,0 +1,395 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Troubleshooting
+===============
+
+This guide helps you resolve common issues with the RAPIDS CLI.
+
+Common Issues
+-------------
+
+No GPUs Detected
+^^^^^^^^^^^^^^^^
+
+**Symptom**: ``rapids doctor`` reports "No available GPUs detected"
+
+**Solutions**:
+
+1. Verify NVIDIA drivers are installed:
+
+   .. code-block:: bash
+
+      nvidia-smi
+
+   If this fails, install NVIDIA drivers:
+
+   .. code-block:: bash
+
+      # Ubuntu/Debian
+      sudo apt-get install nvidia-driver-550
+
+2. Check that GPU is visible to Python:
+
+   .. code-block:: bash
+
+      python -c "import pynvml; pynvml.nvmlInit(); print(pynvml.nvmlDeviceGetCount())"
+
+3. Verify you're not in a container without GPU access:
+
+   .. code-block:: bash
+
+      # Docker needs --gpus all flag
+      docker run --gpus all ...
+
+CUDA Version Mismatch
+^^^^^^^^^^^^^^^^^^^^^
+
+**Symptom**: ``rapids doctor`` reports CUDA version incompatibility
+
+**Solutions**:
+
+1. Check your CUDA driver version:
+
+   .. code-block:: bash
+
+      nvidia-smi | grep "CUDA Version"
+
+2. Install compatible RAPIDS packages:
+
+   .. code-block:: bash
+
+      # For CUDA 11.x
+      pip install cudf-cu11 cuml-cu11
+
+      # For CUDA 12.x
+      pip install cudf-cu12 cuml-cu12
+
+3. Update NVIDIA drivers if needed:
+
+   .. code-block:: bash
+
+      # Check https://docs.rapids.ai/install for requirements
+      sudo apt-get update && sudo apt-get upgrade nvidia-driver
+
+Low Memory Warning
+^^^^^^^^^^^^^^^^^^
+
+**Symptom**: Warning about system memory to GPU memory ratio
+
+**Context**: RAPIDS recommends 2:1 ratio of system RAM to GPU memory for optimal performance
+
+**Solutions**:
+
+1. This is a warning, not an error. RAPIDS will still work.
+
+2. For better performance, consider:
+
+   - Adding more system RAM
+   - Using data chunking strategies
+   - Processing smaller batches
+
+3. For Dask workloads, adjust worker memory limits:
+
+   .. code-block:: python
+
+      from dask_cuda import LocalCUDACluster
+
+      cluster = LocalCUDACluster(
+          device_memory_limit="8GB",  # Limit per worker
+          memory_limit="16GB",  # System memory per worker
+      )
+
+NVLink Not Found
+^^^^^^^^^^^^^^^^
+
+**Symptom**: ``rapids doctor`` reports NVLink is not available
+
+**Context**: NVLink is only available on multi-GPU systems with NVLink-capable GPUs
+
+**Solutions**:
+
+1. If you have only one GPU, this is expected. NVLink is not needed.
+
+2. For multi-GPU systems without NVLink:
+
+   - RAPIDS will work but inter-GPU transfers will be slower
+   - Consider PCIe topology optimization
+
+3. Verify NVLink status:
+
+   .. code-block:: bash
+
+      nvidia-smi nvlink --status
+
+Insufficient Compute Capability
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**Symptom**: "GPU requires compute capability 7.0 or higher"
+
+**Context**: RAPIDS requires GPU compute capability 7.0+ (Volta architecture or newer)
+
+**Solutions**:
+
+1. Check your GPU compute capability:
+
+   .. code-block:: bash
+
+      rapids debug | grep "GPU"
+
+2. Supported GPUs include:
+
+   - Tesla V100, A100, H100
+   - RTX 20xx, 30xx, 40xx series
+   - GTX 1660 and above
+
+3. If your GPU is too old, you'll need to upgrade hardware.
+
+Check Discovery Issues
+^^^^^^^^^^^^^^^^^^^^^^
+
+**Symptom**: Custom checks not discovered by ``rapids doctor``
+
+**Solutions**:
+
+1. Verify entry point registration:
+
+   .. code-block:: bash
+
+      python -c "from importlib.metadata import entry_points; \
+                 print([ep.name for ep in entry_points(group='rapids_doctor_check')])"
+
+2. Reinstall package with entry points:
+
+   .. code-block:: bash
+
+      pip install -e . --force-reinstall
+
+3. Check for import errors:
+
+   .. code-block:: bash
+
+      rapids doctor --verbose
+
+   Look for "Failed to import" messages.
+
+Import Errors
+^^^^^^^^^^^^^
+
+**Symptom**: "ModuleNotFoundError" when running checks
+
+**Solutions**:
+
+1. Verify package is installed:
+
+   .. code-block:: bash
+
+      pip list | grep rapids
+
+2. Check Python environment:
+
+   .. code-block:: bash
+
+      which python
+      python --version
+
+3. Ensure you're in the correct virtual environment:
+
+   .. code-block:: bash
+
+      # Conda
+      conda activate rapids-env
+
+      # venv
+      source venv/bin/activate
+
+Permission Errors
+^^^^^^^^^^^^^^^^^
+
+**Symptom**: "Permission denied" when accessing GPU
+
+**Solutions**:
+
+1. Add user to video/render groups:
+
+   .. code-block:: bash
+
+      sudo usermod -a -G video $USER
+      sudo usermod -a -G render $USER
+
+      # Log out and back in for changes to take effect
+
+2. Check device permissions:
+
+   .. code-block:: bash
+
+      ls -l /dev/nvidia*
+
+3. For containers, ensure proper device mounting:
+
+   .. code-block:: bash
+
+      docker run --gpus all --device=/dev/nvidia0 ...
+
+Debugging Tips
+--------------
+
+Enable Verbose Mode
+^^^^^^^^^^^^^^^^^^^
+
+Always start with verbose output:
+
+.. code-block:: bash
+
+   rapids doctor --verbose
+
+This shows:
+
+- Which checks are discovered
+- Detailed error messages
+- Stack traces for failures
+
+Gather Debug Information
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Collect comprehensive system information:
+
+.. code-block:: bash
+
+   rapids debug --json > debug_info.json
+
+Share this file when reporting issues.
+
+Test Individual Components
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Test NVIDIA stack components:
+
+.. code-block:: bash
+
+   # Test nvidia-smi
+   nvidia-smi
+
+   # Test pynvml (Python binding)
+   python -c "import pynvml; pynvml.nvmlInit(); print('OK')"
+
+   # Test CUDA
+   python -c "import cuda; print(cuda.cudaroot)"
+
+Check Environment Variables
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Verify CUDA-related environment variables:
+
+.. code-block:: bash
+
+   echo $CUDA_HOME
+   echo $LD_LIBRARY_PATH
+   echo $PATH
+
+Run in Isolation
+^^^^^^^^^^^^^^^^
+
+Test in a clean environment:
+
+.. code-block:: bash
+
+   # Create fresh environment
+   conda create -n test-rapids python=3.10
+   conda activate test-rapids
+
+   # Install only RAPIDS CLI
+   pip install rapids-cli
+
+   # Test
+   rapids doctor
+
+Enable Python Warnings
+^^^^^^^^^^^^^^^^^^^^^^
+
+See all warnings:
+
+.. code-block:: bash
+
+   python -W all -m rapids_cli.cli doctor
+
+Performance Issues
+------------------
+
+Slow Check Execution
+^^^^^^^^^^^^^^^^^^^^
+
+If checks are slow:
+
+1. Use ``--dry-run`` to verify discovery without execution:
+
+   .. code-block:: bash
+
+      rapids doctor --dry-run
+
+2. Profile individual checks:
+
+   .. code-block:: python
+
+      import time
+      from my_package.checks import my_check
+
+      start = time.time()
+      my_check(verbose=True)
+      print(f"Check took {time.time() - start:.2f}s")
+
+3. Optimize slow checks (keep under 1 second each)
+
+High Memory Usage
+^^^^^^^^^^^^^^^^^
+
+If ``rapids doctor`` uses too much memory:
+
+1. This is unexpected - report as a bug
+
+2. Workaround: Run checks individually:
+
+   .. code-block:: bash
+
+      rapids doctor package1
+      rapids doctor package2
+
+Reporting Issues
+----------------
+
+When reporting issues, include:
+
+1. Output of ``rapids debug --json``
+
+2. Complete error messages from ``rapids doctor --verbose``
+
+3. Steps to reproduce
+
+4. Expected vs actual behavior
+
+5. Environment details:
+
+   .. code-block:: bash
+
+      rapids debug > environment.txt
+      python --version
+      pip list > packages.txt
+
+Submit issues at: https://github.com/rapidsai/rapids-cli/issues
+
+Getting Help
+------------
+
+- GitHub Issues: https://github.com/rapidsai/rapids-cli/issues
+- RAPIDS Slack: https://rapids.ai/community
+- Documentation: https://docs.rapids.ai
+- Stack Overflow: Tag questions with ``rapids`` and ``rapids-cli``
+
+Known Limitations
+-----------------
+
+- Windows support is experimental
+- WSL2 requires special GPU setup
+- Some checks require sudo access
+- Docker containers need ``--gpus all`` flag
+- Remote GPU monitoring not supported
diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst
new file mode 100644
index 0000000..11da9ad
--- /dev/null
+++ b/docs/source/user_guide.rst
@@ -0,0 +1,293 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+User Guide
+==========
+
+This guide provides detailed information on using the RAPIDS CLI.
+
+Overview
+--------
+
+The RAPIDS CLI provides two main commands:
+
+- ``rapids doctor`` - Health checks for your RAPIDS installation
+- ``rapids debug`` - Gather debugging information about your system
+
+rapids doctor
+-------------
+
+The ``doctor`` command performs health checks to ensure your RAPIDS environment is properly configured.
+
+Basic Usage
+^^^^^^^^^^^
+
+Run all health checks:
+
+.. code-block:: bash
+
+   rapids doctor
+
+This will check:
+
+- GPU availability and compatibility
+- CUDA driver version
+- System memory to GPU memory ratio
+- NVLink status (for multi-GPU systems)
+- Any checks registered by installed RAPIDS packages
+
+Verbose Output
+^^^^^^^^^^^^^^
+
+Get detailed information about each check:
+
+.. code-block:: bash
+
+   rapids doctor --verbose
+
+This shows:
+
+- Which checks are discovered
+- Detailed output from each check
+- Additional diagnostic information
+
+Dry Run
+^^^^^^^
+
+See which checks would run without actually executing them:
+
+.. code-block:: bash
+
+   rapids doctor --dry-run
+
+This is useful for:
+
+- Verifying plugin discovery
+- Debugging check registration issues
+- Understanding what will be checked
+
+Filtering Checks
+^^^^^^^^^^^^^^^^
+
+Run only specific checks by filtering:
+
+.. code-block:: bash
+
+   # Run only cuDF-related checks
+   rapids doctor cudf
+
+   # Run multiple filtered checks
+   rapids doctor cudf cuml
+
+The filter matches any part of the check's module path.
+
+Exit Codes
+^^^^^^^^^^
+
+The ``doctor`` command returns:
+
+- ``0`` - All checks passed
+- ``1`` - One or more checks failed
+
+This makes it suitable for use in scripts and CI/CD pipelines:
+
+.. code-block:: bash
+
+   if rapids doctor; then
+       echo "Environment is ready!"
+   else
+       echo "Environment has issues!"
+       exit 1
+   fi
+
+rapids debug
+------------
+
+The ``debug`` command gathers comprehensive information about your system for troubleshooting.
+
+Basic Usage
+^^^^^^^^^^^
+
+Generate a debug report:
+
+.. code-block:: bash
+
+   rapids debug
+
+This displays:
+
+- Platform information
+- NVIDIA driver version
+- CUDA version
+- Python version and configuration
+- Installed package versions
+- System tools (pip, conda, cmake, etc.)
+- OS information
+
+JSON Output
+^^^^^^^^^^^
+
+Get machine-readable output:
+
+.. code-block:: bash
+
+   rapids debug --json
+
+This is useful for:
+
+- Automated debugging scripts
+- Parsing in other tools
+- Sharing debug information programmatically
+
+The JSON output includes all information in a structured format:
+
+.. code-block:: json
+
+   {
+     "date": "2025-02-11 15:30:00",
+     "platform": "Linux-6.8.0-94-generic-x86_64",
+     "driver_version": "550.54.15",
+     "cuda_version": "12.4",
+     "python_version": "3.13.12",
+     "package_versions": {
+       "rapids-cli": "0.1.0",
+       ...
+     },
+     ...
+   }
+
+Saving Debug Output
+^^^^^^^^^^^^^^^^^^^
+
+Save debug information to a file:
+
+.. code-block:: bash
+
+   rapids debug --json > debug_info.json
+
+This file can be:
+
+- Shared with support teams
+- Attached to bug reports
+- Used for comparison across environments
+
+Common Workflows
+----------------
+
+Pre-Installation Check
+^^^^^^^^^^^^^^^^^^^^^^
+
+Before installing RAPIDS, verify your system meets requirements:
+
+.. code-block:: bash
+
+   # Install just the CLI first
+   pip install rapids-cli
+
+   # Check system compatibility
+   rapids doctor --verbose
+
+The checks will tell you if your GPU, drivers, and CUDA are suitable for RAPIDS.
+
+Post-Installation Verification
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+After installing RAPIDS packages, verify everything works:
+
+.. code-block:: bash
+
+   # Install RAPIDS
+   pip install cudf-cu12 cuml-cu12
+
+   # Verify installation
+   rapids doctor
+
+   # If issues occur, gather debug info
+   rapids debug --json > debug_info.json
+
+CI/CD Integration
+^^^^^^^^^^^^^^^^^
+
+Use RAPIDS CLI in your CI/CD pipelines:
+
+.. code-block:: yaml
+
+   # GitHub Actions example
+   - name: Verify RAPIDS Environment
+     run: |
+       pip install rapids-cli
+       rapids doctor --verbose || exit 1
+
+   - name: Save Debug Info on Failure
+     if: failure()
+     run: rapids debug --json > ${{ github.workspace }}/debug.json
+
+Troubleshooting Workflow
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When encountering issues:
+
+1. Run verbose health check:
+
+   .. code-block:: bash
+
+      rapids doctor --verbose
+
+2. Review warning messages and failures
+
+3. Gather full debug information:
+
+   .. code-block:: bash
+
+      rapids debug > debug_output.txt
+
+4. Check troubleshooting guide (see :doc:`troubleshooting`)
+
+5. Report issues with debug output
+
+Best Practices
+--------------
+
+Regular Health Checks
+^^^^^^^^^^^^^^^^^^^^^
+
+Run ``rapids doctor`` regularly to catch configuration drift:
+
+.. code-block:: bash
+
+   # Add to your shell profile
+   alias rapids-check='rapids doctor && echo "✓ RAPIDS environment healthy"'
+
+Environment Documentation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Document your environment with debug output:
+
+.. code-block:: bash
+
+   # Save baseline configuration
+   rapids debug --json > baseline_env.json
+
+   # Later, compare environments
+   rapids debug --json > current_env.json
+   diff baseline_env.json current_env.json
+
+Automated Monitoring
+^^^^^^^^^^^^^^^^^^^^
+
+Monitor RAPIDS environments automatically:
+
+.. code-block:: bash
+
+   #!/bin/bash
+   # daily_rapids_check.sh
+
+   if ! rapids doctor; then
+       rapids debug --json | mail -s "RAPIDS Health Check Failed" admin@example.com
+   fi
+
+Add to cron:
+
+.. code-block:: bash
+
+   0 9 * * * /path/to/daily_rapids_check.sh

From 005391c1e14b7d7cd0f3f44ef09986c1f477dc20 Mon Sep 17 00:00:00 2001
From: Mike McCarty <mmccarty@nvidia.com>
Date: Wed, 11 Feb 2026 16:20:12 -0500
Subject: [PATCH 2/3] updated docs to match the deployment docs

---
 dependencies.yaml                  |   3 +-
 docs/source/api/checks.rst         | 231 +--------------
 docs/source/api/cli.rst            |  56 +---
 docs/source/api/debug.rst          | 238 ++--------------
 docs/source/api/doctor.rst         | 147 ++--------
 docs/source/conf.py                |  73 +++--
 docs/source/contributing.rst       | 363 ------------------------
 docs/source/index.rst              |  25 +-
 docs/source/installation.rst       | 106 -------
 docs/source/plugin_development.rst | 439 +++++------------------------
 docs/source/troubleshooting.rst    | 369 +++---------------------
 docs/source/user_guide.rst         | 254 +++--------------
 rapids_cli/doctor/doctor.py        |  26 +-
 13 files changed, 251 insertions(+), 2079 deletions(-)
 delete mode 100644 docs/source/contributing.rst
 delete mode 100644 docs/source/installation.rst

diff --git a/dependencies.yaml b/dependencies.yaml
index 12cb729..fdda2a2 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -79,8 +79,9 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
+          - pydata-sphinx-theme
           - sphinx
-          - sphinx-rtd-theme
+          - sphinx-copybutton
   test_python:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/docs/source/api/checks.rst b/docs/source/api/checks.rst
index acf8775..363e26f 100644
--- a/docs/source/api/checks.rst
+++ b/docs/source/api/checks.rst
@@ -4,7 +4,10 @@
 Health Checks
 =============
 
-Built-in health check modules for verifying RAPIDS installation requirements.
+Built-in health check modules registered via the ``rapids_doctor_check``
+entry point group in ``pyproject.toml``.
+
+All check functions follow the contract described in :doc:`../plugin_development`.
 
 GPU Checks
 ----------
@@ -14,57 +17,6 @@ GPU Checks
    :undoc-members:
    :show-inheritance:
 
-gpu_check
-^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.checks.gpu.gpu_check
-
-Verifies that NVIDIA GPUs are available and accessible.
-
-**Parameters:**
-
-- ``verbose`` (bool): Enable detailed output
-
-**Returns:**
-
-- str: Message indicating number of GPUs detected
-
-**Raises:**
-
-- ValueError: If no GPUs are detected
-- AssertionError: If GPU count is zero
-
-**Example:**
-
-.. code-block:: python
-
-   >>> gpu_check(verbose=True)
-   'GPU(s) detected: 2'
-
-check_gpu_compute_capability
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.checks.gpu.check_gpu_compute_capability
-
-Verifies that all GPUs meet minimum compute capability requirements.
-
-**Parameters:**
-
-- ``verbose`` (bool): Enable detailed output
-
-**Returns:**
-
-- bool: True if all GPUs meet requirements
-
-**Raises:**
-
-- ValueError: If any GPU has insufficient compute capability
-
-**Required Compute Capability:**
-
-- Minimum: 7.0 (Volta architecture or newer)
-- Supported GPUs: V100, A100, H100, RTX 20xx/30xx/40xx series
-
 CUDA Driver Checks
 ------------------
 
@@ -73,32 +25,6 @@ CUDA Driver Checks
    :undoc-members:
    :show-inheritance:
 
-cuda_check
-^^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.checks.cuda_driver.cuda_check
-
-Verifies CUDA driver availability and retrieves version.
-
-**Parameters:**
-
-- ``verbose`` (bool): Enable detailed output
-
-**Returns:**
-
-- int: CUDA driver version code (e.g., 12040 for CUDA 12.4)
-
-**Raises:**
-
-- ValueError: If CUDA driver version cannot be determined
-
-**Example:**
-
-.. code-block:: python
-
-   >>> cuda_check(verbose=True)
-   12040
-
 Memory Checks
 -------------
 
@@ -107,62 +33,6 @@ Memory Checks
    :undoc-members:
    :show-inheritance:
 
-get_system_memory
-^^^^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.checks.memory.get_system_memory
-
-Retrieves total system memory in gigabytes.
-
-**Parameters:**
-
-- ``verbose`` (bool): Unused, kept for consistency
-
-**Returns:**
-
-- float: Total system memory in GB
-
-get_gpu_memory
-^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.checks.memory.get_gpu_memory
-
-Calculates total GPU memory across all GPUs in gigabytes.
-
-**Parameters:**
-
-- ``verbose`` (bool): Unused, kept for consistency
-
-**Returns:**
-
-- float: Total GPU memory in GB
-
-check_memory_to_gpu_ratio
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.checks.memory.check_memory_to_gpu_ratio
-
-Verifies system-to-GPU memory ratio meets recommendations.
-
-**Parameters:**
-
-- ``verbose`` (bool): Enable detailed output
-
-**Returns:**
-
-- bool: Always returns True (issues warnings instead of failing)
-
-**Warnings:**
-
-Issues warning if ratio is less than 1.8:1 (below recommended 2:1)
-
-**Recommendation:**
-
-For optimal performance, especially with Dask:
-
-- System memory should be at least 2x total GPU memory
-- Example: 64GB RAM for 32GB total GPU memory (2x 16GB GPUs)
-
 NVLink Checks
 -------------
 
@@ -170,96 +40,3 @@ NVLink Checks
    :members:
    :undoc-members:
    :show-inheritance:
-
-check_nvlink_status
-^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.checks.nvlink.check_nvlink_status
-
-Checks for NVLink availability on multi-GPU systems.
-
-**Parameters:**
-
-- ``verbose`` (bool): Enable detailed output
-
-**Returns:**
-
-- bool: False if fewer than 2 GPUs, True if NVLink detected
-
-**Raises:**
-
-- ValueError: If NVLink status check fails on multi-GPU system
-
-**NVLink Benefits:**
-
-- High-bandwidth GPU-to-GPU communication
-- Essential for multi-GPU training and processing
-- Significantly faster than PCIe transfers
-
-**Note:**
-
-Only relevant for multi-GPU systems with NVLink-capable GPUs.
-
-Check Function Contract
------------------------
-
-All built-in checks follow these conventions:
-
-Function Signature
-^^^^^^^^^^^^^^^^^^
-
-.. code-block:: python
-
-   def check_function(verbose=False, **kwargs):
-       """Brief description of what this check verifies."""
-       pass
-
-**Parameters:**
-
-- ``verbose`` (bool): Whether to provide detailed output
-- ``**kwargs``: Reserved for future compatibility
-
-Return Values
-^^^^^^^^^^^^^
-
-**Success:**
-
-- Return any value (often True or a status string)
-- Returning a string provides information for verbose output
-
-**Failure:**
-
-- Raise an exception with descriptive error message
-- Use ValueError for failed checks
-- Provide actionable guidance in error message
-
-**Warnings:**
-
-- Use ``warnings.warn()`` for non-fatal issues
-- Always set ``stacklevel=2`` for correct source location
-
-Usage in Custom Checks
------------------------
-
-Reference these built-in checks when creating custom checks:
-
-.. code-block:: python
-
-   # Example: Custom memory check based on built-in pattern
-   from rapids_cli.doctor.checks.memory import get_gpu_memory
-
-
-   def my_memory_check(verbose=False, **kwargs):
-       """Check if GPU has enough memory for my workload."""
-       gpu_memory = get_gpu_memory()
-
-       required_gb = 16
-       if gpu_memory < required_gb:
-           raise ValueError(
-               f"Insufficient GPU memory: {gpu_memory:.1f}GB available, "
-               f"{required_gb}GB required"
-           )
-
-       if verbose:
-           return f"GPU memory check passed: {gpu_memory:.1f}GB available"
-       return True
diff --git a/docs/source/api/cli.rst b/docs/source/api/cli.rst
index bd916ff..1580d51 100644
--- a/docs/source/api/cli.rst
+++ b/docs/source/api/cli.rst
@@ -4,59 +4,13 @@
 CLI Module
 ==========
 
-The CLI module provides the main command-line interface for RAPIDS CLI using Click.
+The ``rapids_cli.cli`` module defines the main CLI entry point and subcommands
+using `rich-click <https://github.com/ewels/rich-click>`_.
+
+The CLI is registered as a console script called ``rapids`` via the
+``[project.scripts]`` entry in ``pyproject.toml``.
 
 .. automodule:: rapids_cli.cli
    :members:
    :undoc-members:
    :show-inheritance:
-
-Main Commands
--------------
-
-rapids
-^^^^^^
-
-.. autofunction:: rapids_cli.cli.rapids
-
-The main CLI entry point. Provides access to all subcommands.
-
-doctor
-^^^^^^
-
-.. autofunction:: rapids_cli.cli.doctor
-
-Run health checks to verify RAPIDS installation.
-
-**Options:**
-
-- ``--verbose``: Enable detailed output
-- ``--dry-run``: Show which checks would run without executing them
-- ``filters``: Optional filters to run specific checks
-
-**Exit Codes:**
-
-- 0: All checks passed
-- 1: One or more checks failed
-
-debug
-^^^^^
-
-.. autofunction:: rapids_cli.cli.debug
-
-Gather comprehensive debugging information.
-
-**Options:**
-
-- ``--json``: Output in JSON format for machine parsing
-
-**Output:**
-
-Returns detailed system information including:
-
-- Platform and OS details
-- GPU and driver information
-- CUDA version
-- Python configuration
-- Installed packages
-- Available tools
diff --git a/docs/source/api/debug.rst b/docs/source/api/debug.rst
index b4ce4b4..9c50567 100644
--- a/docs/source/api/debug.rst
+++ b/docs/source/api/debug.rst
@@ -4,230 +4,26 @@
 Debug Module
 ============
 
-The debug module gathers comprehensive system information for troubleshooting.
+The ``rapids_cli.debug.debug`` module gathers system and environment information
+for troubleshooting RAPIDS installations.
 
-.. automodule:: rapids_cli.debug.debug
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Core Functions
---------------
-
-run_debug
-^^^^^^^^^
-
-.. autofunction:: rapids_cli.debug.debug.run_debug
-
-Main function for gathering and displaying debug information.
-
-**Parameters:**
-
-- ``output_format`` (str): Output format, either "console" or "json"
+:func:`run_debug` is the main entry point. It collects:
 
-**Collected Information:**
-
-- Date and time
-- Platform information
-- nvidia-smi output
-- NVIDIA driver version
-- CUDA version
-- CUDA runtime path
-- System CUDA toolkit locations
-- Python version (full and short)
-- Python hash info
+- Platform and OS details (from ``platform`` and ``/etc/os-release``)
+- NVIDIA driver and CUDA versions (via ``pynvml``)
+- CUDA runtime path (via ``cuda-pathfinder``)
+- System CUDA toolkit locations (globbing ``/usr/local/cuda*``)
+- Python version and hash info
 - All installed package versions
-- pip freeze output
-- conda list output (if available)
-- conda info output (if available)
-- Available development tools
-- OS information from /etc/os-release
-
-gather_cuda_version
-^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.debug.debug.gather_cuda_version
-
-Retrieves and formats CUDA driver version from pynvml.
-
-**Returns:**
-
-- str: CUDA version in format "Major.Minor" or "Major.Minor.Patch"
-
-**Example:**
-
-.. code-block:: python
-
-   >>> gather_cuda_version()
-   '12.4'
-
-gather_package_versions
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.debug.debug.gather_package_versions
-
-Collects versions of all installed Python packages.
-
-**Returns:**
-
-- dict: Mapping of package names to version strings
-
-**Example:**
-
-.. code-block:: python
-
-   >>> versions = gather_package_versions()
-   >>> versions['rapids-cli']
-   '0.1.0'
-
-gather_command_output
-^^^^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.debug.debug.gather_command_output
-
-Executes a command and returns its output, with optional fallback.
-
-**Parameters:**
-
-- ``command`` (list[str]): Command and arguments to execute
-- ``fallback_output`` (str | None): Value to return if command fails
-
-**Returns:**
-
-- str | None: Command output or fallback value
-
-**Example:**
-
-.. code-block:: python
-
-   >>> gather_command_output(['pip', '--version'])
-   'pip 24.0 from /usr/local/lib/python3.10/site-packages/pip (python 3.10)'
-
-   >>> gather_command_output(['nonexistent'], fallback_output='Not installed')
-   'Not installed'
+- pip freeze and conda list output
+- Tool versions: pip, conda, uv, pixi, g++, cmake, nvcc
 
-gather_tools
-^^^^^^^^^^^^
+Output is either a Rich-formatted console table or JSON (``--json``).
 
-.. autofunction:: rapids_cli.debug.debug.gather_tools
+API
+---
 
-Gathers version information for common development tools.
-
-**Returns:**
-
-- dict: Tool names mapped to version strings or None
-
-**Checked Tools:**
-
-- pip
-- conda
-- uv
-- pixi
-- g++
-- cmake
-- nvcc
-
-Output Formats
---------------
-
-Console Format
-^^^^^^^^^^^^^^
-
-Human-readable output with Rich formatting:
-
-.. code-block:: text
-
-   RAPIDS Debug Information
-
-   Date
-   2025-02-11 15:30:00
-
-   Platform
-   Linux-6.8.0-94-generic-x86_64
-
-   Driver Version
-   550.54.15
-
-   Cuda Version
-   12.4
-
-   Package Versions
-   ┌─────────────────┬──────────┐
-   │ rapids-cli      │ 0.1.0    │
-   │ cudf            │ 25.02.0  │
-   └─────────────────┴──────────┘
-
-JSON Format
-^^^^^^^^^^^
-
-Machine-readable output for automation:
-
-.. code-block:: json
-
-   {
-     "date": "2025-02-11 15:30:00",
-     "platform": "Linux-6.8.0-94-generic-x86_64",
-     "nvidia_smi_output": "...",
-     "driver_version": "550.54.15",
-     "cuda_version": "12.4",
-     "cuda_runtime_path": "/usr/local/cuda/include",
-     "system_ctk": ["/usr/local/cuda-12.4"],
-     "python_version_full": "3.13.12 (main, ...)",
-     "python_version": "3.13.12",
-     "python_hash_info": "sys.hash_info(...)",
-     "package_versions": {
-       "rapids-cli": "0.1.0"
-     },
-     "pip_packages": "...",
-     "conda_packages": "...",
-     "conda_info": "...",
-     "tools": {
-       "pip": "pip 24.0",
-       "conda": "conda 24.1.0"
-     },
-     "os_info": {
-       "NAME": "Ubuntu",
-       "VERSION": "22.04"
-     }
-   }
-
-Usage Examples
---------------
-
-Console Output
-^^^^^^^^^^^^^^
-
-.. code-block:: python
-
-   from rapids_cli.debug.debug import run_debug
-
-   # Display debug info in console
-   run_debug(output_format="console")
-
-JSON Output
-^^^^^^^^^^^
-
-.. code-block:: python
-
-   import json
-   from rapids_cli.debug.debug import run_debug
-
-   # Get JSON output
-   run_debug(output_format="json")
-
-   # Can be captured with redirection
-   # rapids debug --json > debug.json
-
-Programmatic Access
-^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: python
-
-   from rapids_cli.debug.debug import gather_package_versions, gather_cuda_version
-
-   # Get specific information
-   cuda_ver = gather_cuda_version()
-   packages = gather_package_versions()
-
-   print(f"CUDA: {cuda_ver}")
-   print(f"Installed packages: {len(packages)}")
+.. automodule:: rapids_cli.debug.debug
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/doctor.rst b/docs/source/api/doctor.rst
index 16ab6ad..f4bd73d 100644
--- a/docs/source/api/doctor.rst
+++ b/docs/source/api/doctor.rst
@@ -4,138 +4,35 @@
 Doctor Module
 =============
 
-The doctor module orchestrates health check execution and plugin discovery.
+The ``rapids_cli.doctor.doctor`` module orchestrates health check discovery
+and execution.
 
-.. automodule:: rapids_cli.doctor.doctor
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Core Functions
---------------
-
-doctor_check
-^^^^^^^^^^^^
-
-.. autofunction:: rapids_cli.doctor.doctor.doctor_check
-
-The main orchestration function for running health checks.
-
-**Parameters:**
-
-- ``verbose`` (bool): Enable detailed output
-- ``dry_run`` (bool): Discover checks without executing them
-- ``filters`` (list[str] | None): Optional filters to match check paths
-
-**Returns:**
-
-- bool: True if all checks passed, False if any failed
-
-**Process:**
-
-1. Discovers all registered checks via entry points
-2. Filters checks based on provided filters
-3. Executes each check and captures results
-4. Collects warnings from checks
-5. Displays results and returns success status
-
-CheckResult
-^^^^^^^^^^^
-
-.. autoclass:: rapids_cli.doctor.doctor.CheckResult
-   :members:
-
-Data class representing the result of a single check execution.
-
-**Attributes:**
-
-- ``name`` (str): Name of the check function
-- ``description`` (str): First line of check's docstring
-- ``status`` (bool): True if check passed, False if failed
-- ``value`` (str | None): Optional return value for verbose output
-- ``error`` (Exception | None): Exception if check failed
-- ``warnings`` (list[WarningMessage] | None): Any warnings issued during check
-
-Plugin Discovery
-----------------
-
-The doctor module discovers plugins using Python entry points:
-
-.. code-block:: python
-
-   from importlib.metadata import entry_points
-
-   for ep in entry_points(group="rapids_doctor_check"):
-       check_fn = ep.load()
-       # Execute check
-
-Entry Point Group
-^^^^^^^^^^^^^^^^^
-
-Plugins register in the ``rapids_doctor_check`` group:
-
-.. code-block:: toml
-
-   [project.entry-points.rapids_doctor_check]
-   my_check = "my_package.checks:my_check_function"
+Checks are discovered via Python entry points in the ``rapids_doctor_check``
+group. Each check function is called with ``verbose`` as a keyword argument.
+Results are collected into :class:`CheckResult` objects that track pass/fail
+status, return values, errors, and warnings.
 
 Check Execution Flow
 --------------------
 
-1. **Discovery Phase**
-
-   - Scan entry points for ``rapids_doctor_check`` group
-   - Load check functions
-   - Apply filters if specified
-
-2. **Execution Phase**
-
-   - Run each check with ``verbose`` parameter
-   - Capture warnings using ``warnings.catch_warnings()``
-   - Catch exceptions for failed checks
-   - Store results in CheckResult objects
-
-3. **Reporting Phase**
-
-   - Display warnings
-   - Show verbose output if requested
-   - List failed checks with error messages
-   - Return overall success status
-
-Error Handling
---------------
-
-The doctor module handles several error scenarios:
+1. **Discovery**: Scan ``rapids_doctor_check`` entry points and load check
+   functions. ``ImportError`` and ``AttributeError`` during loading are
+   silently suppressed via ``contextlib.suppress``.
 
-**Import Errors**
+2. **Filtering**: If filter arguments are provided, only checks whose
+   ``ep.value`` contains a filter substring are kept.
 
-Failed imports during discovery are suppressed with ``contextlib.suppress``:
+3. **Execution**: Each check runs inside ``warnings.catch_warnings(record=True)``
+   so warnings are captured. Exceptions are caught and stored rather than
+   propagated.
 
-.. code-block:: python
+4. **Reporting**: Warnings are printed, verbose output is shown for passing
+   checks, and failed checks are listed with their error messages.
 
-   with contextlib.suppress(AttributeError, ImportError):
-       check_fn = ep.load()
+API
+---
 
-**Check Exceptions**
-
-Exceptions raised by checks are caught and stored:
-
-.. code-block:: python
-
-   try:
-       value = check_fn(verbose=verbose)
-       status = True
-   except Exception as e:
-       error = e
-       status = False
-
-**Warnings**
-
-Python warnings are captured and displayed:
-
-.. code-block:: python
-
-   with warnings.catch_warnings(record=True) as w:
-       warnings.simplefilter("always")
-       value = check_fn(verbose=verbose)
-       caught_warnings = w
+.. automodule:: rapids_cli.doctor.doctor
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2303b4a..b4df851 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -5,80 +5,77 @@
 # For the full list of built-in configuration values, see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-# -- Project information -----------------------------------------------------
-# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
-
-
+import datetime
 import os
 import sys
 
 sys.path.insert(0, os.path.abspath("../../"))
 
+# -- Project information -----------------------------------------------------
 
 project = "RAPIDS CLI"
-copyright = "2025-2026, NVIDIA CORPORATION & AFFILIATES"
-author = "NVIDIA RAPIDS"
-
-# The short X.Y version
-version = "0.1"
-# The full version, including alpha/beta/rc tags
-release = "0.1.0"
+html_title = "RAPIDS CLI"
+copyright = f"{datetime.date.today().year}, NVIDIA"
+author = "NVIDIA"
 
 # -- General configuration ---------------------------------------------------
-# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
 extensions = [
     "sphinx.ext.autodoc",
-    "sphinx.ext.autosummary",
     "sphinx.ext.viewcode",
-    "sphinx.ext.napoleon",  # For Google and NumPy style docstrings
-    "sphinx.ext.intersphinx",  # Link to other project docs
-    "sphinx.ext.todo",  # Support for todo items
+    "sphinx.ext.napoleon",
+    "sphinx.ext.intersphinx",
+    "sphinx_copybutton",
 ]
 
 templates_path = ["_templates"]
 exclude_patterns = []
 
+copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
+copybutton_prompt_is_regexp = True
+
 # Napoleon settings for Google-style docstrings
 napoleon_google_docstring = True
 napoleon_numpy_docstring = False
-napoleon_include_init_with_doc = True
-napoleon_include_private_with_doc = False
-napoleon_include_special_with_doc = True
-napoleon_use_admonition_for_examples = True
-napoleon_use_admonition_for_notes = True
-napoleon_use_admonition_for_references = True
-napoleon_use_ivar = False
-napoleon_use_param = True
-napoleon_use_rtype = True
 
 # Autodoc settings
 autodoc_default_options = {
     "members": True,
     "member-order": "bysource",
-    "special-members": "__init__",
     "undoc-members": True,
-    "exclude-members": "__weakref__",
 }
 
-# Intersphinx mapping
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3", None),
 }
 
 # -- Options for HTML output -------------------------------------------------
-# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = "sphinx_rtd_theme"
-html_static_path = ["_static"]
+html_theme = "pydata_sphinx_theme"
 
 html_theme_options = {
-    "navigation_depth": 4,
-    "collapse_navigation": False,
-    "sticky_navigation": True,
-    "includehidden": True,
+    "header_links_before_dropdown": 7,
+    "icon_links": [],
+    "logo": {
+        "link": "https://docs.rapids.ai/",
+    },
+    "github_url": "https://github.com/rapidsai/rapids-cli",
+    "show_toc_level": 1,
+    "navbar_align": "right",
+}
+
+html_sidebars = {
+    "**": ["sidebar-nav-bs", "sidebar-ethical-ads"],
 }
 
-# Add any paths that contain custom static files (such as style sheets)
-html_logo = None
-html_favicon = None
+html_static_path = ["_static"]
+
+
+def setup(app):
+    app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
+    app.add_css_file(
+        "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.9.0/css/all.min.css"
+    )
+    app.add_js_file(
+        "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
+    )
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
deleted file mode 100644
index 9d721f3..0000000
--- a/docs/source/contributing.rst
+++ /dev/null
@@ -1,363 +0,0 @@
-.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-.. SPDX-License-Identifier: Apache-2.0
-
-Contributing Guide
-==================
-
-Thank you for your interest in contributing to RAPIDS CLI! This guide will help you get started.
-
-Getting Started
----------------
-
-Prerequisites
-^^^^^^^^^^^^^
-
-- Python 3.10 or later
-- Git
-- NVIDIA GPU (for testing)
-- NVIDIA drivers and CUDA toolkit
-
-Development Setup
-^^^^^^^^^^^^^^^^^
-
-1. Fork and clone the repository:
-
-   .. code-block:: bash
-
-      git clone https://github.com/YOUR_USERNAME/rapids-cli.git
-      cd rapids-cli
-
-2. Create a development environment:
-
-   .. code-block:: bash
-
-      # Using conda (recommended)
-      conda create -n rapids-cli-dev python=3.10
-      conda activate rapids-cli-dev
-
-      # Or using venv
-      python -m venv venv
-      source venv/bin/activate
-
-3. Install in editable mode with test dependencies:
-
-   .. code-block:: bash
-
-      pip install -e .[test]
-
-4. Install pre-commit hooks:
-
-   .. code-block:: bash
-
-      pre-commit install
-
-Development Workflow
---------------------
-
-Making Changes
-^^^^^^^^^^^^^^
-
-1. Create a feature branch:
-
-   .. code-block:: bash
-
-      git checkout -b feature/your-feature-name
-
-2. Make your changes following the code style guidelines
-
-3. Add tests for your changes
-
-4. Run tests locally:
-
-   .. code-block:: bash
-
-      pytest
-
-5. Run linting checks:
-
-   .. code-block:: bash
-
-      pre-commit run --all-files
-
-Code Style
-----------
-
-The project uses several linting tools to maintain code quality:
-
-Formatting
-^^^^^^^^^^
-
-- **Black**: Code formatting (120 char line length)
-- **isort**: Import sorting
-
-Linting
-^^^^^^^
-
-- **Ruff**: Fast Python linter (replaces flake8, pylint, etc.)
-- **mypy**: Static type checking
-
-Run formatters and linters:
-
-.. code-block:: bash
-
-   # Format code
-   black .
-
-   # Check with ruff
-   ruff check --fix .
-
-   # Type check
-   mypy rapids_cli/
-
-Docstrings
-^^^^^^^^^^
-
-Use Google-style docstrings:
-
-.. code-block:: python
-
-   def my_function(param1: str, param2: int) -> bool:
-       """Brief description of the function.
-
-       Longer description if needed.
-
-       Args:
-           param1: Description of param1.
-           param2: Description of param2.
-
-       Returns:
-           Description of return value.
-
-       Raises:
-           ValueError: Description of when this is raised.
-
-       Example:
-           >>> my_function("test", 42)
-           True
-       """
-       pass
-
-Testing
--------
-
-Writing Tests
-^^^^^^^^^^^^^
-
-- Place tests in ``rapids_cli/tests/``
-- Use pytest for testing
-- Mock external dependencies (pynvml, subprocess calls, etc.)
-- Aim for high coverage (95%+ required)
-
-Test Structure:
-
-.. code-block:: python
-
-   # rapids_cli/tests/test_my_feature.py
-   from unittest.mock import patch, MagicMock
-   import pytest
-
-   from rapids_cli.my_module import my_function
-
-
-   def test_my_function_success():
-       """Test that my_function works in normal case."""
-       result = my_function("input")
-       assert result == "expected"
-
-
-   def test_my_function_failure():
-       """Test that my_function handles errors correctly."""
-       with pytest.raises(ValueError, match="error message"):
-           my_function("invalid")
-
-
-   def test_my_function_with_mock():
-       """Test my_function with mocked dependencies."""
-       with patch("pynvml.nvmlInit") as mock_init:
-           result = my_function()
-           mock_init.assert_called_once()
-
-Running Tests
-^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-   # Run all tests
-   pytest
-
-   # Run specific test file
-   pytest rapids_cli/tests/test_doctor.py
-
-   # Run with coverage
-   pytest --cov=rapids_cli
-
-   # Run specific test
-   pytest rapids_cli/tests/test_doctor.py::test_doctor_check_all_pass
-
-Pull Request Process
---------------------
-
-1. Ensure all tests pass and coverage is maintained
-
-2. Update documentation if needed
-
-3. Sign your commits:
-
-   .. code-block:: bash
-
-      git commit -s -m "Your commit message"
-
-4. Push to your fork:
-
-   .. code-block:: bash
-
-      git push origin feature/your-feature-name
-
-5. Create a pull request on GitHub
-
-6. Address review feedback
-
-Commit Messages
-^^^^^^^^^^^^^^^
-
-Follow conventional commit format:
-
-.. code-block:: text
-
-   <type>: <short summary>
-
-   <detailed description>
-
-   Signed-off-by: Your Name <your.email@example.com>
-
-Types:
-
-- ``feat``: New feature
-- ``fix``: Bug fix
-- ``docs``: Documentation changes
-- ``test``: Adding or updating tests
-- ``refactor``: Code refactoring
-- ``ci``: CI/CD changes
-- ``chore``: Maintenance tasks
-
-Example:
-
-.. code-block:: text
-
-   feat: add support for filtering checks by package name
-
-   This allows users to run only specific checks by providing
-   filter arguments to the doctor command.
-
-   Signed-off-by: Jane Doe <jane@example.com>
-
-Documentation
--------------
-
-Building Documentation
-^^^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-   cd docs
-   make html
-
-   # View in browser
-   open build/html/index.html
-
-Documentation lives in ``docs/source/`` and uses Sphinx with reStructuredText.
-
-Adding New Documentation
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-1. Create ``.rst`` file in ``docs/source/``
-
-2. Add to table of contents in ``index.rst``
-
-3. Build and verify:
-
-   .. code-block:: bash
-
-      cd docs
-      make html
-
-Reporting Issues
-----------------
-
-When reporting bugs:
-
-1. Check if issue already exists
-
-2. Provide minimal reproduction example
-
-3. Include debug output:
-
-   .. code-block:: bash
-
-      rapids debug --json > debug_info.json
-
-4. Include:
-
-   - RAPIDS CLI version
-   - Python version
-   - OS and driver versions
-   - Expected vs actual behavior
-
-Feature Requests
-^^^^^^^^^^^^^^^^
-
-For feature requests:
-
-1. Describe the use case
-
-2. Explain why existing features don't work
-
-3. Provide example usage
-
-4. Consider contributing the feature!
-
-Code Review Guidelines
-----------------------
-
-For Reviewers
-^^^^^^^^^^^^^
-
-- Check that tests cover new functionality
-- Verify documentation is updated
-- Ensure code style is consistent
-- Look for potential edge cases
-- Validate error messages are helpful
-
-For Contributors
-^^^^^^^^^^^^^^^^
-
-- Respond to feedback promptly
-- Ask questions if feedback is unclear
-- Keep PRs focused on single concern
-- Update based on reviews
-
-Release Process
----------------
-
-Releases are managed by maintainers:
-
-1. Version is managed via git tags
-2. CI automatically builds packages
-3. Packages published to PyPI and conda-forge
-
-Community
----------
-
-- GitHub Discussions: Q&A and ideas
-- Slack: Real-time chat at rapids.ai/community
-- Issues: Bug reports and features
-
-License
--------
-
-By contributing, you agree that your contributions will be licensed under the Apache 2.0 License.
-
-Thank You!
-----------
-
-Every contribution helps make RAPIDS CLI better. Thank you for your time and effort!
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9c27bfa..474bf39 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -9,43 +9,23 @@ primarily focused on health checks (``rapids doctor``) and debugging (``rapids d
 It uses a plugin system that allows RAPIDS libraries to register their own health checks
 via Python entry points.
 
-.. image:: https://img.shields.io/badge/python-3.10+-blue.svg
-   :target: https://www.python.org/downloads/
-   :alt: Python Version
-
-.. image:: https://img.shields.io/badge/license-Apache%202.0-blue.svg
-   :target: https://github.com/rapidsai/rapids-cli/blob/main/LICENSE
-   :alt: License
-
 Quick Start
 -----------
 
-Install the RAPIDS CLI:
-
 .. code-block:: bash
 
    pip install rapids-cli
 
-Run health checks:
-
-.. code-block:: bash
-
+   # Run health checks
    rapids doctor
 
-Gather debugging information:
-
-.. code-block:: bash
-
+   # Gather system info for debugging
    rapids debug --json
 
-Documentation Contents
-----------------------
-
 .. toctree::
    :maxdepth: 2
    :caption: User Guide
 
-   installation
    user_guide
    troubleshooting
 
@@ -54,7 +34,6 @@ Documentation Contents
    :caption: Developer Guide
 
    plugin_development
-   contributing
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
deleted file mode 100644
index 213d474..0000000
--- a/docs/source/installation.rst
+++ /dev/null
@@ -1,106 +0,0 @@
-.. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-.. SPDX-License-Identifier: Apache-2.0
-
-Installation
-============
-
-Requirements
-------------
-
-- Python 3.10 or later
-- NVIDIA GPU (for running health checks)
-- NVIDIA drivers installed
-- CUDA toolkit (optional, for full functionality)
-
-Installation Methods
---------------------
-
-From PyPI
-^^^^^^^^^
-
-The simplest way to install RAPIDS CLI is via pip:
-
-.. code-block:: bash
-
-   pip install rapids-cli
-
-From Conda
-^^^^^^^^^^
-
-You can also install via conda:
-
-.. code-block:: bash
-
-   conda install -c rapidsai -c conda-forge rapids-cli
-
-From Source
-^^^^^^^^^^^
-
-For development or to get the latest features:
-
-.. code-block:: bash
-
-   git clone https://github.com/rapidsai/rapids-cli.git
-   cd rapids-cli
-   pip install -e .
-
-With Test Dependencies
-^^^^^^^^^^^^^^^^^^^^^^
-
-To run tests locally:
-
-.. code-block:: bash
-
-   pip install -e .[test]
-
-Verification
-------------
-
-Verify the installation by running:
-
-.. code-block:: bash
-
-   rapids --help
-
-You should see the RAPIDS CLI help message with available commands.
-
-Quick Test
-^^^^^^^^^^
-
-Run a quick health check to verify everything is working:
-
-.. code-block:: bash
-
-   rapids doctor --verbose
-
-This will check your GPU availability, CUDA installation, and system configuration.
-
-Upgrading
----------
-
-To upgrade to the latest version:
-
-.. code-block:: bash
-
-   pip install --upgrade rapids-cli
-
-Or with conda:
-
-.. code-block:: bash
-
-   conda update rapids-cli
-
-Uninstalling
-------------
-
-To uninstall RAPIDS CLI:
-
-.. code-block:: bash
-
-   pip uninstall rapids-cli
-
-Or with conda:
-
-.. code-block:: bash
-
-   conda remove rapids-cli
diff --git a/docs/source/plugin_development.rst b/docs/source/plugin_development.rst
index f4aa2ae..f93b069 100644
--- a/docs/source/plugin_development.rst
+++ b/docs/source/plugin_development.rst
@@ -1,148 +1,78 @@
 .. SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 .. SPDX-License-Identifier: Apache-2.0
 
-Plugin Development Guide
-========================
+Plugin Development
+==================
 
-The RAPIDS CLI uses a plugin system based on Python entry points to allow external packages
-to register their own health checks. This guide shows you how to create plugins for your
-RAPIDS library.
-
-Overview
---------
-
-Plugins are discovered automatically through Python entry points in the ``rapids_doctor_check``
-group. When ``rapids doctor`` runs, it discovers all registered checks and executes them.
+Any package can add checks to ``rapids doctor`` by exposing a function via a
+Python entry point in the ``rapids_doctor_check`` group.
 
 Quick Start
 -----------
 
-Here's a minimal example of adding a check to your RAPIDS package:
-
-1. Create a check function in your package:
+1. Create a check function:
 
    .. code-block:: python
 
-      # my_rapids_package/health_checks.py
-
+      # my_package/health_checks.py
 
-      def my_package_check(verbose=False, **kwargs):
-          """Check that my_rapids_package is working correctly."""
-          import my_rapids_package
 
-          # Perform your check
-          result = my_rapids_package.test_function()
+      def my_check(verbose=False, **kwargs):
+          """Check that my_package is working correctly."""
+          try:
+              import my_package
+          except ImportError as e:
+              raise ImportError(
+                  "my_package not found. Install with: pip install my_package"
+              ) from e
 
-          if not result:
-              raise ValueError("my_rapids_package self-test failed")
+          if verbose:
+              return f"my_package {my_package.__version__} is available"
 
-          return "my_rapids_package is working correctly"
-
-2. Register the check in your ``pyproject.toml``:
+2. Register it in ``pyproject.toml``:
 
    .. code-block:: toml
 
       [project.entry-points.rapids_doctor_check]
-      my_package_check = "my_rapids_package.health_checks:my_package_check"
+      my_check = "my_package.health_checks:my_check"
 
-3. Install your package and test:
+3. Install and verify:
 
    .. code-block:: bash
 
       pip install -e .
-      rapids doctor --verbose
+      rapids doctor --verbose --dry-run
 
 Check Function Contract
 -----------------------
 
-Your check function must follow these conventions:
-
-Function Signature
-^^^^^^^^^^^^^^^^^^
+Signature
+^^^^^^^^^
 
 .. code-block:: python
 
    def my_check(verbose=False, **kwargs):
-       """Check description goes here."""
-       pass
+       """First line of docstring is shown in output."""
+       ...
 
-- Accept ``verbose`` parameter (boolean, default False)
-- Accept ``**kwargs`` for forward compatibility
-- Provide a clear docstring (first line is used in output)
+- Accept ``verbose`` (bool) and ``**kwargs`` for forward compatibility.
+- The first line of the docstring is used as the check description in output.
+- New keyword arguments may be added in the future but will never be removed,
+  so ``**kwargs`` ensures your check won't break.
 
 Return Values
 ^^^^^^^^^^^^^
 
-**Success**: Return successfully (any return value)
-
-.. code-block:: python
-
-   def check_success(verbose=False, **kwargs):
-       """This check always passes."""
-       # Option 1: Return None (implicit)
-       return
-
-
-   def check_with_info(verbose=False, **kwargs):
-       """This check passes with info."""
-       # Option 2: Return a string for verbose output
-       return "GPU 0: Tesla V100, 32GB memory"
-
-**Failure**: Raise an exception with a helpful message
-
-.. code-block:: python
-
-   def check_failure(verbose=False, **kwargs):
-       """This check fails with helpful message."""
-       if not some_condition():
-           raise ValueError(
-               "Check failed: XYZ is not configured correctly. "
-               "To fix this, run: sudo apt-get install xyz"
-           )
-
-**Warnings**: Use ``warnings.warn()`` for non-fatal issues
-
-.. code-block:: python
-
-   import warnings
-
-
-   def check_with_warning(verbose=False, **kwargs):
-       """This check passes but issues a warning."""
-       if not optimal_condition():
-           warnings.warn(
-               "Suboptimal configuration detected. " "Performance may be degraded.",
-               stacklevel=2,
-           )
-       return True
+- **Pass**: Return any value. Returning a string provides extra info shown in
+  ``--verbose`` mode.
+- **Fail**: Raise an exception. The message should tell the user how to fix it.
+- **Warn**: Call ``warnings.warn("message", stacklevel=2)`` for non-fatal issues.
+  Warnings are captured and displayed but do not cause the check to fail.
 
 Examples
 --------
 
-Example 1: Basic Import Check
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Check that your package can be imported:
-
-.. code-block:: python
-
-   def import_check(verbose=False, **kwargs):
-       """Check that my_package can be imported."""
-       try:
-           import my_package
-       except ImportError as e:
-           raise ImportError(
-               "my_package not found. Install with: pip install my_package"
-           ) from e
-
-       if verbose:
-           return f"my_package version {my_package.__version__}"
-       return True
-
-Example 2: GPU Memory Check
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Check GPU memory requirements:
+GPU memory requirement check:
 
 .. code-block:: python
 
@@ -150,328 +80,97 @@ Check GPU memory requirements:
 
 
    def gpu_memory_check(verbose=False, **kwargs):
-       """Check that GPU has sufficient memory for my_package."""
+       """Check that GPU has at least 8GB memory."""
        pynvml.nvmlInit()
-
-       required_memory_gb = 8
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
-       memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
-       available_gb = memory_info.total / (1024**3)
+       mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
+       available_gb = mem.total / (1024**3)
 
-       if available_gb < required_memory_gb:
+       if available_gb < 8:
            raise ValueError(
-               f"Insufficient GPU memory: {available_gb:.1f}GB available, "
-               f"{required_memory_gb}GB required"
+               f"Insufficient GPU memory: {available_gb:.1f}GB available, 8GB required"
            )
 
        if verbose:
-           return f"GPU memory: {available_gb:.1f}GB available"
-       return True
-
-Example 3: Dependency Version Check
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+           return f"GPU memory: {available_gb:.1f}GB"
 
-Check that dependencies meet version requirements:
+Non-fatal warning:
 
 .. code-block:: python
 
    import warnings
-   from packaging import version
-
-
-   def dependency_version_check(verbose=False, **kwargs):
-       """Check that dependencies meet minimum version requirements."""
-       import numpy
-       import pandas
 
-       min_numpy = "1.20.0"
-       min_pandas = "1.3.0"
 
-       if version.parse(numpy.__version__) < version.parse(min_numpy):
-           raise ValueError(
-               f"NumPy {min_numpy}+ required, found {numpy.__version__}. "
-               f"Upgrade with: pip install 'numpy>={min_numpy}'"
-           )
-
-       if version.parse(pandas.__version__) < version.parse(min_pandas):
+   def config_check(verbose=False, **kwargs):
+       """Check optional configuration."""
+       if not optimal_condition():
            warnings.warn(
-               f"Pandas {min_pandas}+ recommended for best performance. "
-               f"Found {pandas.__version__}.",
+               "Suboptimal configuration detected. Performance may be degraded.",
                stacklevel=2,
            )
 
-       if verbose:
-           return f"NumPy {numpy.__version__}, Pandas {pandas.__version__}"
-       return True
-
-Example 4: Functional Test
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Run a simple functional test:
-
-.. code-block:: python
-
-   def functional_check(verbose=False, **kwargs):
-       """Run a simple functional test."""
-       import my_package
-       import numpy as np
-
-       try:
-           # Create test data
-           data = np.random.rand(100, 10)
-
-           # Run simple operation
-           result = my_package.process(data)
-
-           # Verify result
-           assert result.shape == (100, 10), "Unexpected output shape"
-           assert not np.isnan(result).any(), "NaN values in output"
-
-       except Exception as e:
-           raise RuntimeError(
-               f"Functional test failed: {e}. " "This may indicate a GPU or driver issue."
-           ) from e
-
-       if verbose:
-           return "Functional test passed: basic operations working"
-       return True
-
-Best Practices
---------------
-
-Clear Error Messages
-^^^^^^^^^^^^^^^^^^^^
-
-Always provide actionable error messages:
-
-.. code-block:: python
-
-   # Bad: Unclear what to do
-   raise ValueError("Check failed")
-
-   # Good: Clear action to fix
-   raise ValueError(
-       "CUDA 11.2+ required but CUDA 10.2 found. "
-       "Upgrade CUDA: https://developer.nvidia.com/cuda-downloads"
-   )
-
-Performance
-^^^^^^^^^^^
-
-Keep checks fast (< 1 second each):
-
-.. code-block:: python
-
-   # Bad: Slow check
-   def slow_check(verbose=False, **kwargs):
-       """This check is too slow."""
-       result = expensive_computation()  # Takes 30 seconds
-       return result
-
-
-   # Good: Fast check
-   def fast_check(verbose=False, **kwargs):
-       """This check is appropriately fast."""
-       # Just verify configuration, don't run full workload
-       config = load_config()
-       validate_config(config)
-       return True
-
-Verbose Output
-^^^^^^^^^^^^^^
+Multiple checks from one package:
 
-Provide useful information in verbose mode:
-
-.. code-block:: python
-
-   def informative_check(verbose=False, **kwargs):
-       """Check with informative output."""
-       gpu_count = get_gpu_count()
-       gpu_memory = get_total_gpu_memory()
-
-       if gpu_count == 0:
-           raise ValueError("No GPUs found")
-
-       if verbose:
-           return f"Found {gpu_count} GPU(s) " f"with {gpu_memory:.1f}GB total memory"
-       return True
-
-Graceful Degradation
-^^^^^^^^^^^^^^^^^^^^
-
-Handle optional dependencies gracefully:
-
-.. code-block:: python
-
-   def optional_dependency_check(verbose=False, **kwargs):
-       """Check that works with optional dependencies."""
-       try:
-           import optional_package
-
-           has_optional = True
-       except ImportError:
-           has_optional = False
-
-       if not has_optional:
-           import warnings
-
-           warnings.warn(
-               "optional_package not found. " "Some features will be disabled.",
-               stacklevel=2,
-           )
+.. code-block:: toml
 
-       # Continue with check anyway
-       return True
+   [project.entry-points.rapids_doctor_check]
+   my_pkg_import = "my_package.checks:import_check"
+   my_pkg_gpu = "my_package.checks:gpu_check"
+   my_pkg_functional = "my_package.checks:functional_check"
 
 Testing Your Plugin
 -------------------
 
-Test Plugin Discovery
-^^^^^^^^^^^^^^^^^^^^^
-
-Verify your check is discovered:
+Verify discovery:
 
 .. code-block:: bash
 
    rapids doctor --verbose --dry-run | grep my_check
 
-Test Plugin Execution
-^^^^^^^^^^^^^^^^^^^^^
-
-Run your check:
+Run only your checks:
 
 .. code-block:: bash
 
    rapids doctor --verbose my_package
 
-Unit Testing
-^^^^^^^^^^^^
-
-Test your check function directly:
+Unit test with mocks (following the pattern in ``rapids_cli/tests/``):
 
 .. code-block:: python
 
-   # test_health_checks.py
+   from unittest.mock import patch
+
    import pytest
+
    from my_package.health_checks import my_check
 
 
    def test_my_check_success():
-       """Test that check passes in normal conditions."""
        result = my_check(verbose=True)
        assert result is not None
 
 
    def test_my_check_failure():
-       """Test that check fails appropriately."""
        with pytest.raises(ValueError, match="expected error"):
-           my_check_with_bad_config(verbose=False)
-
-Advanced Topics
----------------
-
-Multiple Checks per Package
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Register multiple checks:
-
-.. code-block:: toml
-
-   [project.entry-points.rapids_doctor_check]
-   my_pkg_import = "my_package.checks:import_check"
-   my_pkg_gpu = "my_package.checks:gpu_check"
-   my_pkg_functional = "my_package.checks:functional_check"
-
-Check Dependencies
-^^^^^^^^^^^^^^^^^^
-
-If checks have dependencies, handle them gracefully:
-
-.. code-block:: python
-
-   def dependent_check(verbose=False, **kwargs):
-       """This check depends on GPU check passing."""
-       # Don't fail if dependencies aren't met
-       try:
-           import pynvml
-
-           pynvml.nvmlInit()
-       except Exception:
-           warnings.warn("GPU not available, skipping dependent check", stacklevel=2)
-           return True
-
-       # Rest of check
-       return True
-
-Environment-Specific Checks
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Adapt checks to different environments:
-
-.. code-block:: python
-
-   import os
-
-
-   def environment_aware_check(verbose=False, **kwargs):
-       """Check that adapts to environment."""
-       is_ci = os.environ.get("CI") == "true"
-
-       if is_ci:
-           # Skip expensive checks in CI
-           return "Skipped in CI environment"
-
-       # Run full check
-       run_expensive_validation()
-       return True
+           my_check(verbose=False)
 
 Troubleshooting
 ---------------
 
-Check Not Discovered
-^^^^^^^^^^^^^^^^^^^^
+**Check not discovered**: Verify the entry point name is in the output of:
 
-If your check isn't showing up:
-
-1. Verify entry point is correct:
-
-   .. code-block:: bash
-
-      python -c "from importlib.metadata import entry_points; print([ep for ep in entry_points(group='rapids_doctor_check')])"
-
-2. Reinstall your package:
-
-   .. code-block:: bash
-
-      pip install -e . --force-reinstall --no-deps
-
-3. Check for import errors:
-
-   .. code-block:: python
-
-      python -c "from my_package.checks import my_check"
-
-Check Always Fails
-^^^^^^^^^^^^^^^^^^
-
-Debug the check directly:
+.. code-block:: bash
 
-.. code-block:: python
+   python -c "from importlib.metadata import entry_points; \
+       print([ep.name for ep in entry_points(group='rapids_doctor_check')])"
 
-   from my_package.checks import my_check
+If missing, reinstall with ``pip install -e . --force-reinstall --no-deps``.
 
-   try:
-       result = my_check(verbose=True)
-       print(f"Success: {result}")
-   except Exception as e:
-       print(f"Failed: {e}")
-       import traceback
+**Import errors are silent**: The doctor module uses ``contextlib.suppress``
+to skip checks that fail to import. Test your import directly:
 
-       traceback.print_exc()
+.. code-block:: bash
 
-Resources
----------
+   python -c "from my_package.health_checks import my_check"
 
-- Entry points documentation: https://packaging.python.org/specifications/entry-points/
-- RAPIDS CLI repository: https://github.com/rapidsai/rapids-cli
-- Example plugins: See built-in checks in ``rapids_cli/doctor/checks/``
+See the built-in checks in ``rapids_cli/doctor/checks/`` for reference
+implementations.
diff --git a/docs/source/troubleshooting.rst b/docs/source/troubleshooting.rst
index 014f39f..5da7f2c 100644
--- a/docs/source/troubleshooting.rst
+++ b/docs/source/troubleshooting.rst
@@ -4,17 +4,10 @@
 Troubleshooting
 ===============
 
-This guide helps you resolve common issues with the RAPIDS CLI.
-
-Common Issues
--------------
-
 No GPUs Detected
-^^^^^^^^^^^^^^^^
-
-**Symptom**: ``rapids doctor`` reports "No available GPUs detected"
+----------------
 
-**Solutions**:
+``rapids doctor`` reports "No available GPUs detected".
 
 1. Verify NVIDIA drivers are installed:
 
@@ -22,32 +15,31 @@ No GPUs Detected
 
       nvidia-smi
 
-   If this fails, install NVIDIA drivers:
-
-   .. code-block:: bash
-
-      # Ubuntu/Debian
-      sudo apt-get install nvidia-driver-550
-
-2. Check that GPU is visible to Python:
+2. Check that GPU is visible from Python:
 
    .. code-block:: bash
 
       python -c "import pynvml; pynvml.nvmlInit(); print(pynvml.nvmlDeviceGetCount())"
 
-3. Verify you're not in a container without GPU access:
+3. If running in a container, ensure GPU passthrough is enabled:
 
    .. code-block:: bash
 
-      # Docker needs --gpus all flag
       docker run --gpus all ...
 
-CUDA Version Mismatch
-^^^^^^^^^^^^^^^^^^^^^
+Insufficient Compute Capability
+--------------------------------
+
+"GPU requires compute capability 7 or higher".
+
+RAPIDS requires Volta-generation GPUs or newer (compute capability 7.0+).
+Supported GPUs include V100, A100, H100, and RTX 20xx/30xx/40xx series.
+See https://developer.nvidia.com/cuda-gpus for a full list.
 
-**Symptom**: ``rapids doctor`` reports CUDA version incompatibility
+CUDA Version Issues
+-------------------
 
-**Solutions**:
+"Unable to look up CUDA version".
 
 1. Check your CUDA driver version:
 
@@ -55,341 +47,66 @@ CUDA Version Mismatch
 
       nvidia-smi | grep "CUDA Version"
 
-2. Install compatible RAPIDS packages:
+2. Ensure RAPIDS packages match your CUDA version:
 
    .. code-block:: bash
 
-      # For CUDA 11.x
-      pip install cudf-cu11 cuml-cu11
-
       # For CUDA 12.x
-      pip install cudf-cu12 cuml-cu12
-
-3. Update NVIDIA drivers if needed:
+      pip install cudf-cu12
 
-   .. code-block:: bash
-
-      # Check https://docs.rapids.ai/install for requirements
-      sudo apt-get update && sudo apt-get upgrade nvidia-driver
+      # For CUDA 11.x
+      pip install cudf-cu11
 
 Low Memory Warning
-^^^^^^^^^^^^^^^^^^
-
-**Symptom**: Warning about system memory to GPU memory ratio
-
-**Context**: RAPIDS recommends 2:1 ratio of system RAM to GPU memory for optimal performance
-
-**Solutions**:
-
-1. This is a warning, not an error. RAPIDS will still work.
-
-2. For better performance, consider:
-
-   - Adding more system RAM
-   - Using data chunking strategies
-   - Processing smaller batches
-
-3. For Dask workloads, adjust worker memory limits:
-
-   .. code-block:: python
-
-      from dask_cuda import LocalCUDACluster
-
-      cluster = LocalCUDACluster(
-          device_memory_limit="8GB",  # Limit per worker
-          memory_limit="16GB",  # System memory per worker
-      )
-
-NVLink Not Found
-^^^^^^^^^^^^^^^^
-
-**Symptom**: ``rapids doctor`` reports NVLink is not available
-
-**Context**: NVLink is only available on multi-GPU systems with NVLink-capable GPUs
-
-**Solutions**:
-
-1. If you have only one GPU, this is expected. NVLink is not needed.
-
-2. For multi-GPU systems without NVLink:
-
-   - RAPIDS will work but inter-GPU transfers will be slower
-   - Consider PCIe topology optimization
-
-3. Verify NVLink status:
-
-   .. code-block:: bash
-
-      nvidia-smi nvlink --status
-
-Insufficient Compute Capability
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-**Symptom**: "GPU requires compute capability 7.0 or higher"
-
-**Context**: RAPIDS requires GPU compute capability 7.0+ (Volta architecture or newer)
-
-**Solutions**:
-
-1. Check your GPU compute capability:
-
-   .. code-block:: bash
-
-      rapids debug | grep "GPU"
-
-2. Supported GPUs include:
-
-   - Tesla V100, A100, H100
-   - RTX 20xx, 30xx, 40xx series
-   - GTX 1660 and above
+------------------
 
-3. If your GPU is too old, you'll need to upgrade hardware.
+"System Memory to total GPU Memory ratio not at least 2:1 ratio."
 
-Check Discovery Issues
-^^^^^^^^^^^^^^^^^^^^^^
+This is a warning, not a failure. RAPIDS recommends system RAM be at least
+twice total GPU memory for optimal performance, particularly with Dask.
+RAPIDS will still function with a lower ratio.
 
-**Symptom**: Custom checks not discovered by ``rapids doctor``
+Custom Checks Not Discovered
+-----------------------------
 
-**Solutions**:
+If ``rapids doctor --verbose`` doesn't show your custom check:
 
-1. Verify entry point registration:
+1. Verify the entry point is registered:
 
    .. code-block:: bash
 
       python -c "from importlib.metadata import entry_points; \
-                 print([ep.name for ep in entry_points(group='rapids_doctor_check')])"
-
-2. Reinstall package with entry points:
-
-   .. code-block:: bash
-
-      pip install -e . --force-reinstall
-
-3. Check for import errors:
-
-   .. code-block:: bash
-
-      rapids doctor --verbose
-
-   Look for "Failed to import" messages.
-
-Import Errors
-^^^^^^^^^^^^^
-
-**Symptom**: "ModuleNotFoundError" when running checks
-
-**Solutions**:
-
-1. Verify package is installed:
-
-   .. code-block:: bash
-
-      pip list | grep rapids
-
-2. Check Python environment:
-
-   .. code-block:: bash
-
-      which python
-      python --version
+          print([ep.name for ep in entry_points(group='rapids_doctor_check')])"
 
-3. Ensure you're in the correct virtual environment:
+2. Reinstall the package that provides the check:
 
    .. code-block:: bash
 
-      # Conda
-      conda activate rapids-env
+      pip install -e . --force-reinstall --no-deps
 
-      # venv
-      source venv/bin/activate
-
-Permission Errors
-^^^^^^^^^^^^^^^^^
-
-**Symptom**: "Permission denied" when accessing GPU
-
-**Solutions**:
-
-1. Add user to video/render groups:
+3. Check for import errors by importing the check function directly:
 
    .. code-block:: bash
 
-      sudo usermod -a -G video $USER
-      sudo usermod -a -G render $USER
+      python -c "from my_package.checks import my_check"
 
-      # Log out and back in for changes to take effect
+   Import errors during discovery are silently suppressed
+   (see ``contextlib.suppress`` in ``doctor.py``).
 
-2. Check device permissions:
+General Debugging Steps
+-----------------------
 
-   .. code-block:: bash
-
-      ls -l /dev/nvidia*
-
-3. For containers, ensure proper device mounting:
+1. Run with verbose output:
 
    .. code-block:: bash
 
-      docker run --gpus all --device=/dev/nvidia0 ...
-
-Debugging Tips
---------------
-
-Enable Verbose Mode
-^^^^^^^^^^^^^^^^^^^
-
-Always start with verbose output:
-
-.. code-block:: bash
-
-   rapids doctor --verbose
-
-This shows:
-
-- Which checks are discovered
-- Detailed error messages
-- Stack traces for failures
-
-Gather Debug Information
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Collect comprehensive system information:
-
-.. code-block:: bash
-
-   rapids debug --json > debug_info.json
-
-Share this file when reporting issues.
-
-Test Individual Components
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Test NVIDIA stack components:
-
-.. code-block:: bash
-
-   # Test nvidia-smi
-   nvidia-smi
-
-   # Test pynvml (Python binding)
-   python -c "import pynvml; pynvml.nvmlInit(); print('OK')"
-
-   # Test CUDA
-   python -c "import cuda; print(cuda.cudaroot)"
-
-Check Environment Variables
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Verify CUDA-related environment variables:
-
-.. code-block:: bash
-
-   echo $CUDA_HOME
-   echo $LD_LIBRARY_PATH
-   echo $PATH
-
-Run in Isolation
-^^^^^^^^^^^^^^^^
-
-Test in a clean environment:
-
-.. code-block:: bash
-
-   # Create fresh environment
-   conda create -n test-rapids python=3.10
-   conda activate test-rapids
-
-   # Install only RAPIDS CLI
-   pip install rapids-cli
-
-   # Test
-   rapids doctor
-
-Enable Python Warnings
-^^^^^^^^^^^^^^^^^^^^^^
-
-See all warnings:
-
-.. code-block:: bash
-
-   python -W all -m rapids_cli.cli doctor
-
-Performance Issues
-------------------
-
-Slow Check Execution
-^^^^^^^^^^^^^^^^^^^^
-
-If checks are slow:
-
-1. Use ``--dry-run`` to verify discovery without execution:
-
-   .. code-block:: bash
-
-      rapids doctor --dry-run
-
-2. Profile individual checks:
-
-   .. code-block:: python
-
-      import time
-      from my_package.checks import my_check
-
-      start = time.time()
-      my_check(verbose=True)
-      print(f"Check took {time.time() - start:.2f}s")
-
-3. Optimize slow checks (keep under 1 second each)
-
-High Memory Usage
-^^^^^^^^^^^^^^^^^
-
-If ``rapids doctor`` uses too much memory:
-
-1. This is unexpected - report as a bug
-
-2. Workaround: Run checks individually:
-
-   .. code-block:: bash
-
-      rapids doctor package1
-      rapids doctor package2
-
-Reporting Issues
-----------------
-
-When reporting issues, include:
-
-1. Output of ``rapids debug --json``
-
-2. Complete error messages from ``rapids doctor --verbose``
-
-3. Steps to reproduce
-
-4. Expected vs actual behavior
+      rapids doctor --verbose
 
-5. Environment details:
+2. Gather full environment information:
 
    .. code-block:: bash
 
-      rapids debug > environment.txt
-      python --version
-      pip list > packages.txt
-
-Submit issues at: https://github.com/rapidsai/rapids-cli/issues
-
-Getting Help
-------------
-
-- GitHub Issues: https://github.com/rapidsai/rapids-cli/issues
-- RAPIDS Slack: https://rapids.ai/community
-- Documentation: https://docs.rapids.ai
-- Stack Overflow: Tag questions with ``rapids`` and ``rapids-cli``
-
-Known Limitations
------------------
+      rapids debug --json > debug_info.json
 
-- Windows support is experimental
-- WSL2 requires special GPU setup
-- Some checks require sudo access
-- Docker containers need ``--gpus all`` flag
-- Remote GPU monitoring not supported
+3. Report issues at https://github.com/rapidsai/rapids-cli/issues with the
+   debug output attached.
diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst
index 11da9ad..8656999 100644
--- a/docs/source/user_guide.rst
+++ b/docs/source/user_guide.rst
@@ -4,215 +4,113 @@
 User Guide
 ==========
 
-This guide provides detailed information on using the RAPIDS CLI.
-
-Overview
---------
-
-The RAPIDS CLI provides two main commands:
-
-- ``rapids doctor`` - Health checks for your RAPIDS installation
-- ``rapids debug`` - Gather debugging information about your system
+The RAPIDS CLI provides two commands: ``rapids doctor`` for health checks and
+``rapids debug`` for gathering system information.
 
 rapids doctor
 -------------
 
-The ``doctor`` command performs health checks to ensure your RAPIDS environment is properly configured.
-
-Basic Usage
-^^^^^^^^^^^
-
-Run all health checks:
+The ``doctor`` command performs health checks to ensure your RAPIDS environment
+is properly configured.
 
 .. code-block:: bash
 
    rapids doctor
 
-This will check:
+Built-in checks verify:
 
-- GPU availability and compatibility
+- GPU availability and compute capability (7.0+)
 - CUDA driver version
-- System memory to GPU memory ratio
-- NVLink status (for multi-GPU systems)
-- Any checks registered by installed RAPIDS packages
+- System memory to GPU memory ratio (recommends 2:1 for Dask)
+- NVLink status (multi-GPU systems)
+
+Any installed RAPIDS library can register additional checks via the plugin system
+(see :doc:`plugin_development`).
 
 Verbose Output
 ^^^^^^^^^^^^^^
 
-Get detailed information about each check:
+The ``--verbose`` flag shows check discovery details and per-check output:
 
 .. code-block:: bash
 
-   rapids doctor --verbose
-
-This shows:
-
-- Which checks are discovered
-- Detailed output from each check
-- Additional diagnostic information
+   $ rapids doctor --verbose
+   Discovering checks
+   Found check 'gpu' provided by 'rapids_cli.doctor.checks.gpu:gpu_check'
+   ...
+   Discovered 5 checks
+   Running checks
+   gpu_check: GPU(s) detected: 2
+   All checks passed!
 
 Dry Run
 ^^^^^^^
 
-See which checks would run without actually executing them:
+The ``--dry-run`` flag discovers checks without executing them, useful for
+verifying plugin registration:
 
 .. code-block:: bash
 
    rapids doctor --dry-run
 
-This is useful for:
-
-- Verifying plugin discovery
-- Debugging check registration issues
-- Understanding what will be checked
+Filtering
+^^^^^^^^^
 
-Filtering Checks
-^^^^^^^^^^^^^^^^
-
-Run only specific checks by filtering:
+Pass filter arguments to run only matching checks. Filters match against
+the check's module path:
 
 .. code-block:: bash
 
    # Run only cuDF-related checks
    rapids doctor cudf
 
-   # Run multiple filtered checks
+   # Run checks from multiple packages
    rapids doctor cudf cuml
 
-The filter matches any part of the check's module path.
-
 Exit Codes
 ^^^^^^^^^^
 
-The ``doctor`` command returns:
-
-- ``0`` - All checks passed
-- ``1`` - One or more checks failed
+- ``0``: All checks passed
+- ``1``: One or more checks failed
 
-This makes it suitable for use in scripts and CI/CD pipelines:
+This makes ``rapids doctor`` suitable for scripting:
 
 .. code-block:: bash
 
-   if rapids doctor; then
-       echo "Environment is ready!"
-   else
-       echo "Environment has issues!"
-       exit 1
-   fi
+   rapids doctor || exit 1
 
 rapids debug
 ------------
 
-The ``debug`` command gathers comprehensive information about your system for troubleshooting.
-
-Basic Usage
-^^^^^^^^^^^
-
-Generate a debug report:
+The ``debug`` command gathers comprehensive system information for troubleshooting.
 
 .. code-block:: bash
 
    rapids debug
 
-This displays:
-
-- Platform information
-- NVIDIA driver version
-- CUDA version
-- Python version and configuration
-- Installed package versions
-- System tools (pip, conda, cmake, etc.)
-- OS information
+Output includes: platform, NVIDIA driver version, CUDA version, CUDA runtime
+path, system CTK locations, Python version, all installed package versions,
+pip/conda package lists, available tools (pip, conda, uv, pixi, g++, cmake,
+nvcc), and OS information.
 
 JSON Output
 ^^^^^^^^^^^
 
-Get machine-readable output:
-
-.. code-block:: bash
-
-   rapids debug --json
-
-This is useful for:
-
-- Automated debugging scripts
-- Parsing in other tools
-- Sharing debug information programmatically
-
-The JSON output includes all information in a structured format:
-
-.. code-block:: json
-
-   {
-     "date": "2025-02-11 15:30:00",
-     "platform": "Linux-6.8.0-94-generic-x86_64",
-     "driver_version": "550.54.15",
-     "cuda_version": "12.4",
-     "python_version": "3.13.12",
-     "package_versions": {
-       "rapids-cli": "0.1.0",
-       ...
-     },
-     ...
-   }
-
-Saving Debug Output
-^^^^^^^^^^^^^^^^^^^
-
-Save debug information to a file:
+The ``--json`` flag produces machine-readable output:
 
 .. code-block:: bash
 
    rapids debug --json > debug_info.json
 
-This file can be:
-
-- Shared with support teams
-- Attached to bug reports
-- Used for comparison across environments
-
-Common Workflows
-----------------
-
-Pre-Installation Check
-^^^^^^^^^^^^^^^^^^^^^^
-
-Before installing RAPIDS, verify your system meets requirements:
-
-.. code-block:: bash
-
-   # Install just the CLI first
-   pip install rapids-cli
-
-   # Check system compatibility
-   rapids doctor --verbose
-
-The checks will tell you if your GPU, drivers, and CUDA are suitable for RAPIDS.
-
-Post-Installation Verification
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-After installing RAPIDS packages, verify everything works:
-
-.. code-block:: bash
-
-   # Install RAPIDS
-   pip install cudf-cu12 cuml-cu12
-
-   # Verify installation
-   rapids doctor
-
-   # If issues occur, gather debug info
-   rapids debug --json > debug_info.json
+This is useful for attaching to bug reports or comparing environments.
 
 CI/CD Integration
-^^^^^^^^^^^^^^^^^
+-----------------
 
-Use RAPIDS CLI in your CI/CD pipelines:
+Example GitHub Actions usage:
 
 .. code-block:: yaml
 
-   # GitHub Actions example
    - name: Verify RAPIDS Environment
      run: |
        pip install rapids-cli
@@ -220,74 +118,4 @@ Use RAPIDS CLI in your CI/CD pipelines:
 
    - name: Save Debug Info on Failure
      if: failure()
-     run: rapids debug --json > ${{ github.workspace }}/debug.json
-
-Troubleshooting Workflow
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When encountering issues:
-
-1. Run verbose health check:
-
-   .. code-block:: bash
-
-      rapids doctor --verbose
-
-2. Review warning messages and failures
-
-3. Gather full debug information:
-
-   .. code-block:: bash
-
-      rapids debug > debug_output.txt
-
-4. Check troubleshooting guide (see :doc:`troubleshooting`)
-
-5. Report issues with debug output
-
-Best Practices
---------------
-
-Regular Health Checks
-^^^^^^^^^^^^^^^^^^^^^
-
-Run ``rapids doctor`` regularly to catch configuration drift:
-
-.. code-block:: bash
-
-   # Add to your shell profile
-   alias rapids-check='rapids doctor && echo "✓ RAPIDS environment healthy"'
-
-Environment Documentation
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Document your environment with debug output:
-
-.. code-block:: bash
-
-   # Save baseline configuration
-   rapids debug --json > baseline_env.json
-
-   # Later, compare environments
-   rapids debug --json > current_env.json
-   diff baseline_env.json current_env.json
-
-Automated Monitoring
-^^^^^^^^^^^^^^^^^^^^
-
-Monitor RAPIDS environments automatically:
-
-.. code-block:: bash
-
-   #!/bin/bash
-   # daily_rapids_check.sh
-
-   if ! rapids doctor; then
-       rapids debug --json | mail -s "RAPIDS Health Check Failed" admin@example.com
-   fi
-
-Add to cron:
-
-.. code-block:: bash
-
-   0 9 * * * /path/to/daily_rapids_check.sh
+     run: rapids debug --json > debug.json
diff --git a/rapids_cli/doctor/doctor.py b/rapids_cli/doctor/doctor.py
index 0fdff86..0e8ce5d 100644
--- a/rapids_cli/doctor/doctor.py
+++ b/rapids_cli/doctor/doctor.py
@@ -34,26 +34,22 @@ def doctor_check(
     If specific subcommands are given, it validates them against valid
     subcommands and executes corresponding checks.
 
-    Parameters:
-    ----------
-    filters : list (optional)
-        A list of filters to run specific checks.
+    Args:
+        verbose: Whether to print verbose output.
+        dry_run: Whether to skip running checks.
+        filters: A list of filters to run specific checks.
 
     Raises:
-    -------
-    ValueError:
-        If an invalid subcommand is provided.
+        ValueError: If an invalid subcommand is provided.
 
-    Notes:
-    -----
-    The function discovers and loads check functions defined in entry points
-    under the 'rapids_doctor_check' group. It also checks specific
-    configurations related to a corresponding subcommand if given.
+    Note:
+        The function discovers and loads check functions defined in entry points
+        under the ``rapids_doctor_check`` group. It also checks specific
+        configurations related to a corresponding subcommand if given.
 
     Example:
-    --------
-    > doctor_check([])  # Run all health checks
-    > doctor_check(['cudf'])  # Run 'cudf' specific checks
+        >>> doctor_check(verbose=False, dry_run=False)
+        >>> doctor_check(verbose=False, dry_run=False, filters=['cudf'])
     """
     filters = [] if not filters else filters
     console.print(

From 126bca09a1ae803a3feeabd904c69f5d8af5b4de Mon Sep 17 00:00:00 2001
From: Mike McCarty <mmccarty@nvidia.com>
Date: Wed, 11 Feb 2026 17:06:36 -0500
Subject: [PATCH 3/3] refactored to use dependency injection for interfacing
 with hardware

---
 dependency-injection-refactoring.md     | 152 +++++++++++++++
 rapids_cli/debug/debug.py               |  46 +++--
 rapids_cli/doctor/checks/cuda_driver.py |  23 ++-
 rapids_cli/doctor/checks/gpu.py         |  42 +++--
 rapids_cli/doctor/checks/memory.py      |  65 +++++--
 rapids_cli/doctor/checks/nvlink.py      |  34 ++--
 rapids_cli/doctor/doctor.py             |   5 +-
 rapids_cli/hardware.py                  | 229 +++++++++++++++++++++++
 rapids_cli/tests/test_cuda.py           |  27 +--
 rapids_cli/tests/test_debug.py          |  63 +++----
 rapids_cli/tests/test_gpu.py            |  83 ++++-----
 rapids_cli/tests/test_hardware.py       | 235 ++++++++++++++++++++++++
 rapids_cli/tests/test_memory.py         |  96 +++++-----
 rapids_cli/tests/test_nvlink.py         |  74 ++++----
 14 files changed, 915 insertions(+), 259 deletions(-)
 create mode 100644 dependency-injection-refactoring.md
 create mode 100644 rapids_cli/hardware.py
 create mode 100644 rapids_cli/tests/test_hardware.py

diff --git a/dependency-injection-refactoring.md b/dependency-injection-refactoring.md
new file mode 100644
index 0000000..697642c
--- /dev/null
+++ b/dependency-injection-refactoring.md
@@ -0,0 +1,152 @@
+# Dependency Injection Refactoring
+
+## Context
+
+The check modules (`gpu.py`, `cuda_driver.py`, `memory.py`, `nvlink.py`)
+and `debug.py` previously called `pynvml`, `psutil`, and `cuda.pathfinder`
+directly. This forced tests to use 50+ `mock.patch` calls with deeply
+nested context managers and `MagicMock` objects to simulate hardware
+configurations. A thin abstraction layer was introduced so tests can
+construct plain dataclasses instead of mocking low-level library internals.
+
+## Approach: Default Parameter Injection with Provider Dataclasses
+
+A single new file `rapids_cli/hardware.py` was created containing:
+
+- **`DeviceInfo`** dataclass -- holds per-GPU data
+  (index, compute capability, memory, nvlink states)
+- **`GpuInfoProvider`** protocol -- read-only interface for GPU info
+  (`device_count`, `devices`, `cuda_driver_version`, `driver_version`)
+- **`SystemInfoProvider`** protocol -- read-only interface for system info
+  (`total_memory_bytes`, `cuda_runtime_path`)
+- **`NvmlGpuInfo`** -- real implementation backed by pynvml
+  (lazy-loads on first property access, caches results)
+- **`DefaultSystemInfo`** -- real implementation backed by
+  psutil + cuda.pathfinder (lazy-loads per property)
+- **`FakeGpuInfo`** / **`FakeSystemInfo`** -- test fakes
+  (plain dataclasses, no hardware dependency)
+- **`FailingGpuInfo`** / **`FailingSystemInfo`** -- test fakes that
+  raise `ValueError` on access (simulates missing hardware)
+
+Check functions gained an optional keyword parameter with `None` default:
+
+```python
+def gpu_check(verbose=False, *, gpu_info: GpuInfoProvider | None = None, **kwargs):
+    if gpu_info is None:  # pragma: no cover
+        gpu_info = NvmlGpuInfo()
+```
+
+The orchestrator (`doctor.py`) creates a shared `NvmlGpuInfo()` instance
+and passes it to all checks via `check_fn(verbose=verbose, gpu_info=gpu_info)`.
+Third-party plugins safely ignore the extra keyword argument via their
+own `**kwargs`.
+
+## Files Changed
+
+### New file: `rapids_cli/hardware.py`
+
+Contains all provider abstractions:
+
+- `DeviceInfo` dataclass with fields: `index`, `compute_capability`,
+  `memory_total_bytes`, `nvlink_states`
+- `GpuInfoProvider` and `SystemInfoProvider` protocols
+  (runtime-checkable)
+- `NvmlGpuInfo` -- calls `nvmlInit()` once on first property access,
+  queries all device info (count, compute capability, memory,
+  NVLink states), and caches everything
+- `DefaultSystemInfo` -- lazily loads system memory via psutil and
+  CUDA path via cuda.pathfinder (each cached independently)
+- `FakeGpuInfo`, `FakeSystemInfo` -- `@dataclass` test fakes with
+  pre-set data
+- `FailingGpuInfo`, `FailingSystemInfo` -- test fakes that raise
+  `ValueError` on any property access
+
+### Modified: `rapids_cli/doctor/checks/gpu.py`
+
+- Removed `import pynvml`
+- Added `gpu_info: GpuInfoProvider | None = None` parameter and
+  `**kwargs` to both `gpu_check()` and `check_gpu_compute_capability()`
+- Replaced direct `pynvml` calls with `gpu_info.device_count` and
+  iteration over `gpu_info.devices`
+
+### Modified: `rapids_cli/doctor/checks/cuda_driver.py`
+
+- Removed `import pynvml`
+- Added `gpu_info` parameter and `**kwargs` to `cuda_check()`
+- Replaced nested try/except with `gpu_info.cuda_driver_version`
+
+### Modified: `rapids_cli/doctor/checks/memory.py`
+
+- Removed `import pynvml` and `import psutil`
+- Added `system_info` parameter to `get_system_memory()`
+- Added `gpu_info` parameter to `get_gpu_memory()`
+- Added both `gpu_info` and `system_info` parameters to
+  `check_memory_to_gpu_ratio()`
+- `get_system_memory()` reads `system_info.total_memory_bytes`
+- `get_gpu_memory()` sums `dev.memory_total_bytes` from
+  `gpu_info.devices`
+- `check_memory_to_gpu_ratio()` passes injected providers down
+  to helpers
+
+### Modified: `rapids_cli/doctor/checks/nvlink.py`
+
+- Removed `import pynvml`
+- Added `gpu_info` parameter and `**kwargs` to `check_nvlink_status()`
+- Iterates `dev.nvlink_states` instead of calling
+  `nvmlDeviceGetNvLinkState`
+- **Side-fix**: the original code always passed `0` instead of
+  `nvlink_id` to `nvmlDeviceGetNvLinkState`; the refactored
+  `NvmlGpuInfo` queries each link by its actual index
+
+### Modified: `rapids_cli/debug/debug.py`
+
+- Removed `import pynvml` and `import cuda.pathfinder`
+- Added `gpu_info` parameter to `gather_cuda_version()`
+- Added `gpu_info` and `system_info` parameters to `run_debug()`
+- Replaced direct pynvml/cuda.pathfinder calls with provider
+  property accesses
+
+### Modified: `rapids_cli/doctor/doctor.py`
+
+- Imports `NvmlGpuInfo` from `rapids_cli.hardware`
+- Creates a shared `NvmlGpuInfo()` instance before the check loop
+- Passes it via `check_fn(verbose=verbose, gpu_info=gpu_info)`
+
+### Rewritten tests
+
+`test_gpu.py`, `test_cuda.py`, `test_memory.py`, `test_nvlink.py`,
+`test_debug.py`:
+
+- Replaced all `patch("pynvml.*")` / `patch("psutil.*")` /
+  `patch("cuda.pathfinder.*")` with `FakeGpuInfo` / `FakeSystemInfo` /
+  `FailingGpuInfo` construction
+- Tests for `debug.py` still use patches for non-hardware concerns
+  (subprocess, pathlib, gather_tools)
+
+### New file: `rapids_cli/tests/test_hardware.py`
+
+- Unit tests for `NvmlGpuInfo`
+  (init failure, loads once, device data, NVLink states, no NVLink)
+- Unit tests for `DefaultSystemInfo`
+  (total memory, CUDA runtime path, caching)
+- Tests for `FakeGpuInfo` / `FakeSystemInfo`
+  (defaults, custom values, protocol satisfaction)
+- Tests for `FailingGpuInfo` / `FailingSystemInfo`
+  (all properties raise)
+
+## Impact
+
+| Metric                                        | Before  | After                             |
+| --------------------------------------------- | ------- | --------------------------------- |
+| Hardware library patches in check/debug tests | ~51     | 0 (moved to test_hardware.py)     |
+| import pynvml in check/debug modules          | 5 files | 1 file (hardware.py)              |
+| MagicMock objects for hardware                | ~11     | 0                                 |
+| pynvml.nvmlInit() calls in production         | 7       | 1 (in NvmlGpuInfo._ensure_loaded) |
+| Total tests                                   | 53      | 72 (+19 hardware tests)           |
+| Coverage                                      | 95%+    | 97.72%                            |
+
+## Verification
+
+1. `pytest` -- all 72 tests pass
+2. `pytest --cov-fail-under=95` -- coverage at 97.72%, above threshold
+3. `pre-commit run --all-files` -- all checks pass
diff --git a/rapids_cli/debug/debug.py b/rapids_cli/debug/debug.py
index fca4d1d..b4afde5 100644
--- a/rapids_cli/debug/debug.py
+++ b/rapids_cli/debug/debug.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """This module contains the debug subcommand for the Rapids CLI."""
 
+from __future__ import annotations
+
 import json
 import platform
 import subprocess
@@ -9,22 +11,29 @@
 from datetime import datetime
 from importlib.metadata import distributions, version
 from pathlib import Path
+from typing import TYPE_CHECKING
 
-import cuda.pathfinder
-import pynvml
 from rich.console import Console
 from rich.table import Table
 
+if TYPE_CHECKING:
+    from rapids_cli.hardware import GpuInfoProvider, SystemInfoProvider
+
 console = Console()
 
 
-def gather_cuda_version():
+def gather_cuda_version(*, gpu_info: GpuInfoProvider | None = None):
     """Return CUDA driver version as a string, similar to nvidia-smi output."""
-    version = pynvml.nvmlSystemGetCudaDriverVersion()
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+
+    ver = gpu_info.cuda_driver_version
     # pynvml returns an int like 12040 for 12.4, so format as string
-    major = version // 1000
-    minor = (version % 1000) // 10
-    patch = version % 10
+    major = ver // 1000
+    minor = (ver % 1000) // 10
+    patch = ver % 10
     if patch == 0:
         return f"{major}.{minor}"
     else:
@@ -67,18 +76,31 @@ def gather_tools():
     }
 
 
-def run_debug(output_format="console"):
+def run_debug(
+    output_format="console",
+    *,
+    gpu_info: GpuInfoProvider | None = None,
+    system_info: SystemInfoProvider | None = None,
+):
     """Run debug."""
-    pynvml.nvmlInit()
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+    if system_info is None:  # pragma: no cover
+        from rapids_cli.hardware import DefaultSystemInfo
+
+        system_info = DefaultSystemInfo()
+
     debug_info = {
         "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         "platform": platform.platform(),
         "nvidia_smi_output": gather_command_output(
             ["nvidia-smi"], "Nvidia-smi not installed"
         ),
-        "driver_version": pynvml.nvmlSystemGetDriverVersion(),
-        "cuda_version": gather_cuda_version(),
-        "cuda_runtime_path": cuda.pathfinder.find_nvidia_header_directory("cudart"),
+        "driver_version": gpu_info.driver_version,
+        "cuda_version": gather_cuda_version(gpu_info=gpu_info),
+        "cuda_runtime_path": system_info.cuda_runtime_path,
         "system_ctk": sorted(
             [str(p) for p in Path("/usr/local").glob("cuda*") if p.is_dir()]
         ),
diff --git a/rapids_cli/doctor/checks/cuda_driver.py b/rapids_cli/doctor/checks/cuda_driver.py
index 252dd47..6275c1a 100644
--- a/rapids_cli/doctor/checks/cuda_driver.py
+++ b/rapids_cli/doctor/checks/cuda_driver.py
@@ -2,17 +2,22 @@
 # SPDX-License-Identifier: Apache-2.0
 """Check for CUDA and driver compatibility."""
 
-import pynvml
+from __future__ import annotations
 
+from typing import TYPE_CHECKING
 
-def cuda_check(verbose=False):
+if TYPE_CHECKING:
+    from rapids_cli.hardware import GpuInfoProvider
+
+
+def cuda_check(verbose=False, *, gpu_info: GpuInfoProvider | None = None, **kwargs):
     """Check CUDA availability."""
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+
     try:
-        pynvml.nvmlInit()
-        try:
-            cuda_version = pynvml.nvmlSystemGetCudaDriverVersion()
-            return cuda_version
-        except pynvml.NVMLError as e:
-            raise ValueError("Unable to look up CUDA version") from e
-    except pynvml.NVMLError as e:
+        return gpu_info.cuda_driver_version
+    except ValueError as e:
         raise ValueError("Unable to look up CUDA version") from e
diff --git a/rapids_cli/doctor/checks/gpu.py b/rapids_cli/doctor/checks/gpu.py
index 77e6ca6..d8e1a45 100644
--- a/rapids_cli/doctor/checks/gpu.py
+++ b/rapids_cli/doctor/checks/gpu.py
@@ -2,38 +2,52 @@
 # SPDX-License-Identifier: Apache-2.0
 """GPU checks for the doctor command."""
 
-import pynvml
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from rapids_cli.hardware import GpuInfoProvider
 
 REQUIRED_COMPUTE_CAPABILITY = 7
 
 
-def gpu_check(verbose=False):
+def gpu_check(verbose=False, *, gpu_info: GpuInfoProvider | None = None, **kwargs):
     """Check GPU availability."""
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+
     try:
-        pynvml.nvmlInit()
-        num_gpus = pynvml.nvmlDeviceGetCount()
-    except pynvml.NVMLError as e:
+        num_gpus = gpu_info.device_count
+    except ValueError as e:
         raise ValueError("No available GPUs detected") from e
     assert num_gpus > 0, "No GPUs detected"
     return f"GPU(s) detected: {num_gpus}"
 
 
-def check_gpu_compute_capability(verbose):
+def check_gpu_compute_capability(
+    verbose=False, *, gpu_info: GpuInfoProvider | None = None, **kwargs
+):
     """Check the system for GPU Compute Capability."""
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
+        devices = gpu_info.devices
+    except ValueError as e:
         raise ValueError("No GPU - cannot determine GPU Compute Capability") from e
 
-    for i in range(pynvml.nvmlDeviceGetCount()):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
-        if major >= REQUIRED_COMPUTE_CAPABILITY:
+    for dev in devices:
+        if dev.compute_capability[0] >= REQUIRED_COMPUTE_CAPABILITY:
             continue
         else:
             raise ValueError(
-                f"GPU {i} requires compute capability {REQUIRED_COMPUTE_CAPABILITY} "
-                f"or higher but only has {major}.{minor}."
+                f"GPU {dev.index} requires compute capability {REQUIRED_COMPUTE_CAPABILITY} "
+                f"or higher but only has {dev.compute_capability[0]}.{dev.compute_capability[1]}."
                 "See https://developer.nvidia.com/cuda-gpus for more information."
             )
     return True
diff --git a/rapids_cli/doctor/checks/memory.py b/rapids_cli/doctor/checks/memory.py
index cb1fcb5..f1d8231 100644
--- a/rapids_cli/doctor/checks/memory.py
+++ b/rapids_cli/doctor/checks/memory.py
@@ -2,46 +2,71 @@
 # SPDX-License-Identifier: Apache-2.0
 """Memory checks."""
 
+from __future__ import annotations
+
 import warnings
+from typing import TYPE_CHECKING
 
-import psutil
-import pynvml
+if TYPE_CHECKING:
+    from rapids_cli.hardware import GpuInfoProvider, SystemInfoProvider
 
 
-def get_system_memory(verbose=False):
+def get_system_memory(
+    verbose=False, *, system_info: SystemInfoProvider | None = None, **kwargs
+):
     """Get the total system memory."""
-    virtual_memory = psutil.virtual_memory()
-    total_memory = virtual_memory.total / (1024**3)  # converts bytes to gigabytes
+    if system_info is None:  # pragma: no cover
+        from rapids_cli.hardware import DefaultSystemInfo
+
+        system_info = DefaultSystemInfo()
+
+    total_memory = system_info.total_memory_bytes / (
+        1024**3
+    )  # converts bytes to gigabytes
     return total_memory
 
 
-def get_gpu_memory(verbose=False):
+def get_gpu_memory(verbose=False, *, gpu_info: GpuInfoProvider | None = None, **kwargs):
     """Get the total GPU memory."""
-    pynvml.nvmlInit()
-    gpus = pynvml.nvmlDeviceGetCount()
-    gpu_memory_total = 0
-    for i in range(gpus):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
-        gpu_memory_total += memory_info.total / (1024**3)  # converts to gigabytes
-
-    pynvml.nvmlShutdown()
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+
+    gpu_memory_total = sum(dev.memory_total_bytes for dev in gpu_info.devices) / (
+        1024**3
+    )  # converts to gigabytes
     return gpu_memory_total
 
 
-def check_memory_to_gpu_ratio(verbose=True):
+def check_memory_to_gpu_ratio(
+    verbose=True,
+    *,
+    gpu_info: GpuInfoProvider | None = None,
+    system_info: SystemInfoProvider | None = None,
+    **kwargs,
+):
     """Check the system for a 2:1 ratio of system Memory to total GPU Memory.
 
     This is especially useful for Dask.
 
     """
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+    if system_info is None:  # pragma: no cover
+        from rapids_cli.hardware import DefaultSystemInfo
+
+        system_info = DefaultSystemInfo()
+
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
+        _ = gpu_info.device_count
+    except ValueError as e:
         raise ValueError("GPU not found. Please ensure GPUs are installed.") from e
 
-    system_memory = get_system_memory(verbose)
-    gpu_memory = get_gpu_memory(verbose)
+    system_memory = get_system_memory(verbose, system_info=system_info)
+    gpu_memory = get_gpu_memory(verbose, gpu_info=gpu_info)
     ratio = system_memory / gpu_memory
     if ratio < 1.8:
         warnings.warn(
diff --git a/rapids_cli/doctor/checks/nvlink.py b/rapids_cli/doctor/checks/nvlink.py
index 22bbdd1..715a8fd 100644
--- a/rapids_cli/doctor/checks/nvlink.py
+++ b/rapids_cli/doctor/checks/nvlink.py
@@ -2,25 +2,33 @@
 # SPDX-License-Identifier: Apache-2.0
 """Check for NVLink status."""
 
-import pynvml
+from __future__ import annotations
 
+from typing import TYPE_CHECKING
 
-def check_nvlink_status(verbose=True):
+if TYPE_CHECKING:
+    from rapids_cli.hardware import GpuInfoProvider
+
+
+def check_nvlink_status(
+    verbose=True, *, gpu_info: GpuInfoProvider | None = None, **kwargs
+):
     """Check the system for NVLink with 2 or more GPUs."""
+    if gpu_info is None:  # pragma: no cover
+        from rapids_cli.hardware import NvmlGpuInfo
+
+        gpu_info = NvmlGpuInfo()
+
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
+        device_count = gpu_info.device_count
+    except ValueError as e:
         raise ValueError("GPU not found. Please ensure GPUs are installed.") from e
 
-    device_count = pynvml.nvmlDeviceGetCount()
     if device_count < 2:
         return False
 
-    for i in range(device_count):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        for nvlink_id in range(pynvml.NVML_NVLINK_MAX_LINKS):
-            try:
-                pynvml.nvmlDeviceGetNvLinkState(handle, 0)
-                return True
-            except pynvml.NVMLError as e:
-                raise ValueError(f"NVLink {nvlink_id} Status Check Failed") from e
+    for dev in gpu_info.devices:
+        if any(dev.nvlink_states):
+            return True
+
+    return False
diff --git a/rapids_cli/doctor/doctor.py b/rapids_cli/doctor/doctor.py
index 0e8ce5d..c497300 100644
--- a/rapids_cli/doctor/doctor.py
+++ b/rapids_cli/doctor/doctor.py
@@ -10,6 +10,7 @@
 
 from rapids_cli._compatibility import entry_points
 from rapids_cli.constants import DOCTOR_SYMBOL
+from rapids_cli.hardware import NvmlGpuInfo
 
 console = Console()
 
@@ -74,6 +75,8 @@ def doctor_check(
         console.print("Dry run, skipping checks")
         return True
 
+    gpu_info = NvmlGpuInfo()
+
     results: list[CheckResult] = []
     with console.status("[bold green]Running checks...") as ui_status:
         for i, check_fn in enumerate(checks):
@@ -85,7 +88,7 @@ def doctor_check(
                 with warnings.catch_warnings(record=True) as w:
                     warnings.simplefilter("always")
                     status = True
-                    value = check_fn(verbose=verbose)
+                    value = check_fn(verbose=verbose, gpu_info=gpu_info)
                     caught_warnings = w
 
             except Exception as e:
diff --git a/rapids_cli/hardware.py b/rapids_cli/hardware.py
new file mode 100644
index 0000000..94aab52
--- /dev/null
+++ b/rapids_cli/hardware.py
@@ -0,0 +1,229 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Hardware abstraction layer for GPU and system information."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Protocol, runtime_checkable
+
+
+@dataclass
+class DeviceInfo:
+    """Per-GPU device information."""
+
+    index: int
+    compute_capability: tuple[int, int]
+    memory_total_bytes: int
+    nvlink_states: list[bool] = field(default_factory=list)
+
+
+@runtime_checkable
+class GpuInfoProvider(Protocol):
+    """Read-only interface for GPU information."""
+
+    @property
+    def device_count(self) -> int:
+        """Return number of GPU devices."""
+        ...
+
+    @property
+    def devices(self) -> list[DeviceInfo]:
+        """Return list of device information."""
+        ...
+
+    @property
+    def cuda_driver_version(self) -> int:
+        """Return CUDA driver version as integer."""
+        ...
+
+    @property
+    def driver_version(self) -> str:
+        """Return driver version string."""
+        ...
+
+
+@runtime_checkable
+class SystemInfoProvider(Protocol):
+    """Read-only interface for system information."""
+
+    @property
+    def total_memory_bytes(self) -> int:
+        """Return total system memory in bytes."""
+        ...
+
+    @property
+    def cuda_runtime_path(self) -> str | None:
+        """Return path to CUDA runtime headers."""
+        ...
+
+
+class NvmlGpuInfo:
+    """Real GPU info provider backed by pynvml.
+
+    Lazily loads all device information on first property access and caches results.
+    """
+
+    def __init__(self) -> None:
+        """Initialize with empty cached state."""
+        self._loaded = False
+        self._device_count = 0
+        self._devices: list[DeviceInfo] = []
+        self._cuda_driver_version = 0
+        self._driver_version = ""
+
+    def _ensure_loaded(self) -> None:
+        if self._loaded:
+            return
+
+        import pynvml
+
+        try:
+            pynvml.nvmlInit()
+        except pynvml.NVMLError as e:
+            raise ValueError("Unable to initialize GPU driver (NVML)") from e
+
+        self._device_count = pynvml.nvmlDeviceGetCount()
+        self._cuda_driver_version = pynvml.nvmlSystemGetCudaDriverVersion()
+        self._driver_version = pynvml.nvmlSystemGetDriverVersion()
+
+        self._devices = []
+        for i in range(self._device_count):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+            memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+
+            nvlink_states: list[bool] = []
+            for link_id in range(pynvml.NVML_NVLINK_MAX_LINKS):
+                try:
+                    state = pynvml.nvmlDeviceGetNvLinkState(handle, link_id)
+                    nvlink_states.append(bool(state))
+                except pynvml.NVMLError:
+                    break
+
+            self._devices.append(
+                DeviceInfo(
+                    index=i,
+                    compute_capability=(major, minor),
+                    memory_total_bytes=memory_info.total,
+                    nvlink_states=nvlink_states,
+                )
+            )
+
+        self._loaded = True
+
+    @property
+    def device_count(self) -> int:
+        """Return number of GPU devices."""
+        self._ensure_loaded()
+        return self._device_count
+
+    @property
+    def devices(self) -> list[DeviceInfo]:
+        """Return list of device information."""
+        self._ensure_loaded()
+        return self._devices
+
+    @property
+    def cuda_driver_version(self) -> int:
+        """Return CUDA driver version as integer (e.g. 12040)."""
+        self._ensure_loaded()
+        return self._cuda_driver_version
+
+    @property
+    def driver_version(self) -> str:
+        """Return driver version string."""
+        self._ensure_loaded()
+        return self._driver_version
+
+
+class DefaultSystemInfo:
+    """Real system info provider backed by psutil and cuda.pathfinder.
+
+    Lazily loads each piece of information on first access.
+    """
+
+    def __init__(self) -> None:
+        """Initialize with empty cached state."""
+        self._memory_loaded = False
+        self._total_memory_bytes = 0
+        self._cuda_path_loaded = False
+        self._cuda_runtime_path: str | None = None
+
+    @property
+    def total_memory_bytes(self) -> int:
+        """Return total system memory in bytes."""
+        if not self._memory_loaded:
+            import psutil
+
+            self._total_memory_bytes = psutil.virtual_memory().total
+            self._memory_loaded = True
+        return self._total_memory_bytes
+
+    @property
+    def cuda_runtime_path(self) -> str | None:
+        """Return path to CUDA runtime headers."""
+        if not self._cuda_path_loaded:
+            import cuda.pathfinder
+
+            self._cuda_runtime_path = cuda.pathfinder.find_nvidia_header_directory(
+                "cudart"
+            )
+            self._cuda_path_loaded = True
+        return self._cuda_runtime_path
+
+
+@dataclass
+class FakeGpuInfo:
+    """Test fake for GPU information with pre-set data."""
+
+    device_count: int = 0
+    devices: list[DeviceInfo] = field(default_factory=list)
+    cuda_driver_version: int = 0
+    driver_version: str = ""
+
+
+@dataclass
+class FakeSystemInfo:
+    """Test fake for system information with pre-set data."""
+
+    total_memory_bytes: int = 0
+    cuda_runtime_path: str | None = None
+
+
+class FailingGpuInfo:
+    """Test fake that raises ValueError on any property access."""
+
+    @property
+    def device_count(self) -> int:
+        """Raise ValueError."""
+        raise ValueError("No GPU available")
+
+    @property
+    def devices(self) -> list[DeviceInfo]:
+        """Raise ValueError."""
+        raise ValueError("No GPU available")
+
+    @property
+    def cuda_driver_version(self) -> int:
+        """Raise ValueError."""
+        raise ValueError("No GPU available")
+
+    @property
+    def driver_version(self) -> str:
+        """Raise ValueError."""
+        raise ValueError("No GPU available")
+
+
+class FailingSystemInfo:
+    """Test fake that raises ValueError on any property access."""
+
+    @property
+    def total_memory_bytes(self) -> int:
+        """Raise ValueError."""
+        raise ValueError("System info unavailable")
+
+    @property
+    def cuda_runtime_path(self) -> str | None:
+        """Raise ValueError."""
+        raise ValueError("System info unavailable")
diff --git a/rapids_cli/tests/test_cuda.py b/rapids_cli/tests/test_cuda.py
index 70097b2..de4fd99 100644
--- a/rapids_cli/tests/test_cuda.py
+++ b/rapids_cli/tests/test_cuda.py
@@ -1,26 +1,17 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-from unittest.mock import patch
+import pytest
 
 from rapids_cli.doctor.checks.cuda_driver import cuda_check
+from rapids_cli.hardware import FailingGpuInfo, FakeGpuInfo
 
 
-def mock_cuda_version():
-    return 12050
+def test_cuda_check_success():
+    gpu_info = FakeGpuInfo(cuda_driver_version=12050)
+    assert cuda_check(verbose=True, gpu_info=gpu_info) == 12050
 
 
-def test_get_cuda_version_success():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
-    ):
-        version = mock_cuda_version()
-        assert version
-
-
-def test_cuda_check_success(capfd):
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
-    ):
-        assert cuda_check(verbose=True)
+def test_cuda_check_no_gpu():
+    gpu_info = FailingGpuInfo()
+    with pytest.raises(ValueError, match="Unable to look up CUDA version"):
+        cuda_check(verbose=False, gpu_info=gpu_info)
diff --git a/rapids_cli/tests/test_debug.py b/rapids_cli/tests/test_debug.py
index 91c330c..79b9db5 100644
--- a/rapids_cli/tests/test_debug.py
+++ b/rapids_cli/tests/test_debug.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 import json
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 from rapids_cli.debug.debug import (
     gather_command_output,
@@ -10,24 +10,22 @@
     gather_tools,
     run_debug,
 )
+from rapids_cli.hardware import FakeGpuInfo, FakeSystemInfo
 
 
 def test_gather_cuda_version():
-    """Test CUDA version gathering."""
-    with patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040):
-        result = gather_cuda_version()
-        assert result == "12.4"
+    gpu_info = FakeGpuInfo(cuda_driver_version=12040)
+    result = gather_cuda_version(gpu_info=gpu_info)
+    assert result == "12.4"
 
 
 def test_gather_cuda_version_with_patch():
-    """Test CUDA version with patch number."""
-    with patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12345):
-        result = gather_cuda_version()
-        assert result == "12.34.5"
+    gpu_info = FakeGpuInfo(cuda_driver_version=12345)
+    result = gather_cuda_version(gpu_info=gpu_info)
+    assert result == "12.34.5"
 
 
 def test_gather_package_versions():
-    """Test package version gathering."""
     result = gather_package_versions()
     assert isinstance(result, dict)
     assert len(result) > 0
@@ -36,25 +34,21 @@ def test_gather_package_versions():
 
 
 def test_gather_command_output_success():
-    """Test successful command output gathering."""
     result = gather_command_output(["echo", "test"])
     assert result == "test"
 
 
 def test_gather_command_output_with_fallback():
-    """Test command output with fallback."""
     result = gather_command_output(["nonexistent_command"], fallback_output="fallback")
     assert result == "fallback"
 
 
 def test_gather_command_output_no_fallback():
-    """Test command output without fallback."""
     result = gather_command_output(["nonexistent_command"])
     assert result is None
 
 
 def test_gather_tools():
-    """Test tools gathering."""
     with (
         patch(
             "rapids_cli.debug.debug.gather_command_output",
@@ -69,40 +63,41 @@ def test_gather_tools():
 
 
 def test_run_debug_console(capsys):
-    """Test run_debug with console output."""
-    mock_vm = MagicMock()
-    mock_vm.total = 32 * 1024**3
+    gpu_info = FakeGpuInfo(
+        device_count=1,
+        cuda_driver_version=12040,
+        driver_version="550.54.15",
+    )
+    system_info = FakeSystemInfo(
+        total_memory_bytes=32 * 1024**3,
+        cuda_runtime_path="/usr/local/cuda/include",
+    )
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54.15"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040),
-        patch(
-            "cuda.pathfinder.find_nvidia_header_directory",
-            return_value="/usr/local/cuda/include",
-        ),
         patch("pathlib.Path.glob", return_value=[]),
         patch("rapids_cli.debug.debug.gather_package_versions", return_value={}),
         patch("rapids_cli.debug.debug.gather_command_output", return_value=None),
         patch("rapids_cli.debug.debug.gather_tools", return_value={}),
         patch("pathlib.Path.read_text", return_value='NAME="Ubuntu"\nVERSION="22.04"'),
     ):
-        run_debug(output_format="console")
+        run_debug(output_format="console", gpu_info=gpu_info, system_info=system_info)
 
     captured = capsys.readouterr()
     assert "RAPIDS Debug Information" in captured.out
 
 
 def test_run_debug_json(capsys):
-    """Test run_debug with JSON output."""
+    gpu_info = FakeGpuInfo(
+        device_count=1,
+        cuda_driver_version=12040,
+        driver_version="550.54.15",
+    )
+    system_info = FakeSystemInfo(
+        total_memory_bytes=32 * 1024**3,
+        cuda_runtime_path="/usr/local/cuda/include",
+    )
+
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54.15"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040),
-        patch(
-            "cuda.pathfinder.find_nvidia_header_directory",
-            return_value="/usr/local/cuda/include",
-        ),
         patch("pathlib.Path.glob", return_value=[]),
         patch(
             "rapids_cli.debug.debug.gather_package_versions",
@@ -114,7 +109,7 @@ def test_run_debug_json(capsys):
         patch("rapids_cli.debug.debug.gather_tools", return_value={"pip": "pip 23.0"}),
         patch("pathlib.Path.read_text", return_value='NAME="Ubuntu"\nVERSION="22.04"'),
     ):
-        run_debug(output_format="json")
+        run_debug(output_format="json", gpu_info=gpu_info, system_info=system_info)
 
     captured = capsys.readouterr()
     output = json.loads(captured.out)
diff --git a/rapids_cli/tests/test_gpu.py b/rapids_cli/tests/test_gpu.py
index a895bc2..f9fdf28 100644
--- a/rapids_cli/tests/test_gpu.py
+++ b/rapids_cli/tests/test_gpu.py
@@ -1,7 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-from unittest.mock import patch
-
 import pytest
 
 from rapids_cli.doctor.checks.gpu import (
@@ -9,67 +7,60 @@
     check_gpu_compute_capability,
     gpu_check,
 )
+from rapids_cli.hardware import DeviceInfo, FailingGpuInfo, FakeGpuInfo
 
 
 def test_gpu_check_success():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-    ):
-        result = gpu_check(verbose=True)
-        assert result == "GPU(s) detected: 2"
+    gpu_info = FakeGpuInfo(device_count=2)
+    result = gpu_check(verbose=True, gpu_info=gpu_info)
+    assert result == "GPU(s) detected: 2"
 
 
 def test_gpu_check_no_gpus():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=0),
-    ):
-        with pytest.raises(AssertionError, match="No GPUs detected"):
-            gpu_check(verbose=False)
+    gpu_info = FakeGpuInfo(device_count=0)
+    with pytest.raises(AssertionError, match="No GPUs detected"):
+        gpu_check(verbose=False, gpu_info=gpu_info)
 
 
 def test_gpu_check_nvml_error():
-    import pynvml
-
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
-        with pytest.raises(ValueError, match="No available GPUs detected"):
-            gpu_check(verbose=False)
+    gpu_info = FailingGpuInfo()
+    with pytest.raises(ValueError, match="No available GPUs detected"):
+        gpu_check(verbose=False, gpu_info=gpu_info)
 
 
 def test_check_gpu_compute_capability_success():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex"),
-        patch(
-            "pynvml.nvmlDeviceGetCudaComputeCapability",
-            return_value=(REQUIRED_COMPUTE_CAPABILITY, 5),
+    devices = [
+        DeviceInfo(
+            index=0,
+            compute_capability=(REQUIRED_COMPUTE_CAPABILITY, 5),
+            memory_total_bytes=0,
         ),
-    ):
-        result = check_gpu_compute_capability(verbose=True)
-        assert result is True
+        DeviceInfo(
+            index=1,
+            compute_capability=(REQUIRED_COMPUTE_CAPABILITY, 5),
+            memory_total_bytes=0,
+        ),
+    ]
+    gpu_info = FakeGpuInfo(device_count=2, devices=devices)
+    result = check_gpu_compute_capability(verbose=True, gpu_info=gpu_info)
+    assert result is True
 
 
 def test_check_gpu_compute_capability_insufficient():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-        patch("pynvml.nvmlDeviceGetHandleByIndex"),
-        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(6, 0)),
+    devices = [
+        DeviceInfo(index=0, compute_capability=(6, 0), memory_total_bytes=0),
+    ]
+    gpu_info = FakeGpuInfo(device_count=1, devices=devices)
+    with pytest.raises(
+        ValueError,
+        match=f"GPU 0 requires compute capability {REQUIRED_COMPUTE_CAPABILITY}",
     ):
-        with pytest.raises(
-            ValueError,
-            match=f"GPU 0 requires compute capability {REQUIRED_COMPUTE_CAPABILITY}",
-        ):
-            check_gpu_compute_capability(verbose=False)
+        check_gpu_compute_capability(verbose=False, gpu_info=gpu_info)
 
 
 def test_check_gpu_compute_capability_no_gpu():
-    import pynvml
-
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
-        with pytest.raises(
-            ValueError, match="No GPU - cannot determine GPU Compute Capability"
-        ):
-            check_gpu_compute_capability(verbose=False)
+    gpu_info = FailingGpuInfo()
+    with pytest.raises(
+        ValueError, match="No GPU - cannot determine GPU Compute Capability"
+    ):
+        check_gpu_compute_capability(verbose=False, gpu_info=gpu_info)
diff --git a/rapids_cli/tests/test_hardware.py b/rapids_cli/tests/test_hardware.py
new file mode 100644
index 0000000..1236e0f
--- /dev/null
+++ b/rapids_cli/tests/test_hardware.py
@@ -0,0 +1,235 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from unittest.mock import MagicMock, patch
+
+import pynvml
+import pytest
+
+from rapids_cli.hardware import (
+    DefaultSystemInfo,
+    DeviceInfo,
+    FailingGpuInfo,
+    FailingSystemInfo,
+    FakeGpuInfo,
+    FakeSystemInfo,
+    GpuInfoProvider,
+    NvmlGpuInfo,
+    SystemInfoProvider,
+)
+
+# --- NvmlGpuInfo tests ---
+
+
+def test_nvml_gpu_info_init_failure():
+    with patch(
+        "pynvml.nvmlInit",
+        side_effect=pynvml.NVMLError(pynvml.NVML_ERROR_DRIVER_NOT_LOADED),
+    ):
+        gpu_info = NvmlGpuInfo()
+        with pytest.raises(ValueError, match="Unable to initialize GPU driver"):
+            _ = gpu_info.device_count
+
+
+def test_nvml_gpu_info_loads_once():
+    mock_handle = MagicMock()
+    mock_memory = MagicMock()
+    mock_memory.total = 16 * 1024**3
+
+    with (
+        patch("pynvml.nvmlInit") as mock_init,
+        patch("pynvml.nvmlDeviceGetCount", return_value=1),
+        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
+        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54"),
+        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
+        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(7, 5)),
+        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
+        patch(
+            "pynvml.nvmlDeviceGetNvLinkState", side_effect=pynvml.NVMLError_NotSupported
+        ),
+    ):
+        gpu_info = NvmlGpuInfo()
+        # Access multiple properties to verify caching
+        _ = gpu_info.device_count
+        _ = gpu_info.devices
+        _ = gpu_info.cuda_driver_version
+        _ = gpu_info.driver_version
+        # nvmlInit should be called exactly once
+        mock_init.assert_called_once()
+
+
+def test_nvml_gpu_info_device_data():
+    mock_handle = MagicMock()
+    mock_memory = MagicMock()
+    mock_memory.total = 24 * 1024**3
+
+    with (
+        patch("pynvml.nvmlInit"),
+        patch("pynvml.nvmlDeviceGetCount", return_value=2),
+        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12060),
+        patch("pynvml.nvmlSystemGetDriverVersion", return_value="560.10"),
+        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
+        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(9, 0)),
+        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
+        patch("pynvml.nvmlDeviceGetNvLinkState", return_value=1),
+    ):
+        gpu_info = NvmlGpuInfo()
+        assert gpu_info.device_count == 2
+        assert len(gpu_info.devices) == 2
+        assert gpu_info.devices[0].compute_capability == (9, 0)
+        assert gpu_info.devices[0].memory_total_bytes == 24 * 1024**3
+        assert gpu_info.cuda_driver_version == 12060
+        assert gpu_info.driver_version == "560.10"
+
+
+def test_nvml_gpu_info_nvlink_states():
+    mock_handle = MagicMock()
+    mock_memory = MagicMock()
+    mock_memory.total = 16 * 1024**3
+
+    def nvlink_side_effect(handle, link_id):
+        if link_id < 2:
+            return 1
+        raise pynvml.NVMLError_NotSupported()
+
+    with (
+        patch("pynvml.nvmlInit"),
+        patch("pynvml.nvmlDeviceGetCount", return_value=1),
+        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
+        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54"),
+        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
+        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(7, 5)),
+        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
+        patch("pynvml.nvmlDeviceGetNvLinkState", side_effect=nvlink_side_effect),
+    ):
+        gpu_info = NvmlGpuInfo()
+        assert gpu_info.devices[0].nvlink_states == [True, True]
+
+
+def test_nvml_gpu_info_no_nvlink():
+    mock_handle = MagicMock()
+    mock_memory = MagicMock()
+    mock_memory.total = 16 * 1024**3
+
+    with (
+        patch("pynvml.nvmlInit"),
+        patch("pynvml.nvmlDeviceGetCount", return_value=1),
+        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
+        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54"),
+        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
+        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(7, 5)),
+        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
+        patch(
+            "pynvml.nvmlDeviceGetNvLinkState", side_effect=pynvml.NVMLError_NotSupported
+        ),
+    ):
+        gpu_info = NvmlGpuInfo()
+        assert gpu_info.devices[0].nvlink_states == []
+
+
+# --- DefaultSystemInfo tests ---
+
+
+def test_default_system_info_total_memory():
+    mock_vm = MagicMock()
+    mock_vm.total = 64 * 1024**3
+    with patch("psutil.virtual_memory", return_value=mock_vm):
+        sys_info = DefaultSystemInfo()
+        assert sys_info.total_memory_bytes == 64 * 1024**3
+
+
+def test_default_system_info_cuda_runtime_path():
+    with patch(
+        "cuda.pathfinder.find_nvidia_header_directory",
+        return_value="/usr/local/cuda/include",
+    ):
+        sys_info = DefaultSystemInfo()
+        assert sys_info.cuda_runtime_path == "/usr/local/cuda/include"
+
+
+def test_default_system_info_caches():
+    mock_vm = MagicMock()
+    mock_vm.total = 64 * 1024**3
+    with patch("psutil.virtual_memory", return_value=mock_vm) as mock_psutil:
+        sys_info = DefaultSystemInfo()
+        _ = sys_info.total_memory_bytes
+        _ = sys_info.total_memory_bytes
+        mock_psutil.assert_called_once()
+
+
+# --- FakeGpuInfo tests ---
+
+
+def test_fake_gpu_info_defaults():
+    fake = FakeGpuInfo()
+    assert fake.device_count == 0
+    assert fake.devices == []
+    assert fake.cuda_driver_version == 0
+    assert fake.driver_version == ""
+
+
+def test_fake_gpu_info_custom():
+    devices = [
+        DeviceInfo(index=0, compute_capability=(8, 0), memory_total_bytes=32 * 1024**3)
+    ]
+    fake = FakeGpuInfo(
+        device_count=1,
+        devices=devices,
+        cuda_driver_version=12040,
+        driver_version="550.0",
+    )
+    assert fake.device_count == 1
+    assert len(fake.devices) == 1
+    assert fake.cuda_driver_version == 12040
+
+
+def test_fake_gpu_info_satisfies_protocol():
+    assert isinstance(FakeGpuInfo(), GpuInfoProvider)
+
+
+# --- FakeSystemInfo tests ---
+
+
+def test_fake_system_info_defaults():
+    fake = FakeSystemInfo()
+    assert fake.total_memory_bytes == 0
+    assert fake.cuda_runtime_path is None
+
+
+def test_fake_system_info_satisfies_protocol():
+    assert isinstance(FakeSystemInfo(), SystemInfoProvider)
+
+
+# --- FailingGpuInfo tests ---
+
+
+def test_failing_gpu_info_device_count():
+    with pytest.raises(ValueError, match="No GPU available"):
+        _ = FailingGpuInfo().device_count
+
+
+def test_failing_gpu_info_devices():
+    with pytest.raises(ValueError, match="No GPU available"):
+        _ = FailingGpuInfo().devices
+
+
+def test_failing_gpu_info_cuda_driver_version():
+    with pytest.raises(ValueError, match="No GPU available"):
+        _ = FailingGpuInfo().cuda_driver_version
+
+
+def test_failing_gpu_info_driver_version():
+    with pytest.raises(ValueError, match="No GPU available"):
+        _ = FailingGpuInfo().driver_version
+
+
+# --- FailingSystemInfo tests ---
+
+
+def test_failing_system_info_total_memory():
+    with pytest.raises(ValueError, match="System info unavailable"):
+        _ = FailingSystemInfo().total_memory_bytes
+
+
+def test_failing_system_info_cuda_runtime_path():
+    with pytest.raises(ValueError, match="System info unavailable"):
+        _ = FailingSystemInfo().cuda_runtime_path
diff --git a/rapids_cli/tests/test_memory.py b/rapids_cli/tests/test_memory.py
index 572df33..183d7ff 100644
--- a/rapids_cli/tests/test_memory.py
+++ b/rapids_cli/tests/test_memory.py
@@ -1,7 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-from unittest.mock import MagicMock, patch
-
 import pytest
 
 from rapids_cli.doctor.checks.memory import (
@@ -9,74 +7,62 @@
     get_gpu_memory,
     get_system_memory,
 )
+from rapids_cli.hardware import DeviceInfo, FailingGpuInfo, FakeGpuInfo, FakeSystemInfo
 
 
 def test_get_system_memory():
-    mock_vm = MagicMock()
-    mock_vm.total = 32 * 1024**3  # 32 GB in bytes
-    with patch("psutil.virtual_memory", return_value=mock_vm):
-        result = get_system_memory(verbose=False)
-        assert result == 32.0
+    system_info = FakeSystemInfo(total_memory_bytes=32 * 1024**3)
+    result = get_system_memory(verbose=False, system_info=system_info)
+    assert result == 32.0
 
 
 def test_get_gpu_memory_single_gpu():
-    mock_handle = MagicMock()
-    mock_memory_info = MagicMock()
-    mock_memory_info.total = 16 * 1024**3  # 16 GB in bytes
-
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory_info),
-        patch("pynvml.nvmlShutdown"),
-    ):
-        result = get_gpu_memory(verbose=False)
-        assert result == 16.0
+    devices = [
+        DeviceInfo(index=0, compute_capability=(7, 0), memory_total_bytes=16 * 1024**3)
+    ]
+    gpu_info = FakeGpuInfo(device_count=1, devices=devices)
+    result = get_gpu_memory(verbose=False, gpu_info=gpu_info)
+    assert result == 16.0
 
 
 def test_get_gpu_memory_multiple_gpus():
-    mock_handle = MagicMock()
-    mock_memory_info = MagicMock()
-    mock_memory_info.total = 16 * 1024**3  # 16 GB per GPU
-
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=4),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory_info),
-        patch("pynvml.nvmlShutdown"),
-    ):
-        result = get_gpu_memory(verbose=False)
-        assert result == 64.0  # 16 GB * 4 GPUs
+    devices = [
+        DeviceInfo(index=i, compute_capability=(7, 0), memory_total_bytes=16 * 1024**3)
+        for i in range(4)
+    ]
+    gpu_info = FakeGpuInfo(device_count=4, devices=devices)
+    result = get_gpu_memory(verbose=False, gpu_info=gpu_info)
+    assert result == 64.0  # 16 GB * 4 GPUs
 
 
 def test_check_memory_to_gpu_ratio_good_ratio():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("rapids_cli.doctor.checks.memory.get_system_memory", return_value=64.0),
-        patch("rapids_cli.doctor.checks.memory.get_gpu_memory", return_value=32.0),
-    ):
-        result = check_memory_to_gpu_ratio(verbose=True)
-        assert result is True
+    devices = [
+        DeviceInfo(index=0, compute_capability=(7, 0), memory_total_bytes=32 * 1024**3)
+    ]
+    gpu_info = FakeGpuInfo(device_count=1, devices=devices)
+    system_info = FakeSystemInfo(total_memory_bytes=64 * 1024**3)
+    result = check_memory_to_gpu_ratio(
+        verbose=True, gpu_info=gpu_info, system_info=system_info
+    )
+    assert result is True
 
 
 def test_check_memory_to_gpu_ratio_warning():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("rapids_cli.doctor.checks.memory.get_system_memory", return_value=32.0),
-        patch("rapids_cli.doctor.checks.memory.get_gpu_memory", return_value=32.0),
-    ):
-        with pytest.warns(UserWarning, match="System Memory to total GPU Memory ratio"):
-            result = check_memory_to_gpu_ratio(verbose=True)
-            assert result is True
+    devices = [
+        DeviceInfo(index=0, compute_capability=(7, 0), memory_total_bytes=32 * 1024**3)
+    ]
+    gpu_info = FakeGpuInfo(device_count=1, devices=devices)
+    system_info = FakeSystemInfo(total_memory_bytes=32 * 1024**3)
+    with pytest.warns(UserWarning, match="System Memory to total GPU Memory ratio"):
+        result = check_memory_to_gpu_ratio(
+            verbose=True, gpu_info=gpu_info, system_info=system_info
+        )
+        assert result is True
 
 
 def test_check_memory_to_gpu_ratio_no_gpu():
-    import pynvml
-
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
-        with pytest.raises(
-            ValueError, match="GPU not found. Please ensure GPUs are installed."
-        ):
-            check_memory_to_gpu_ratio(verbose=False)
+    gpu_info = FailingGpuInfo()
+    with pytest.raises(
+        ValueError, match="GPU not found. Please ensure GPUs are installed."
+    ):
+        check_memory_to_gpu_ratio(verbose=False, gpu_info=gpu_info)
diff --git a/rapids_cli/tests/test_nvlink.py b/rapids_cli/tests/test_nvlink.py
index e2d82c7..a849ed0 100644
--- a/rapids_cli/tests/test_nvlink.py
+++ b/rapids_cli/tests/test_nvlink.py
@@ -1,54 +1,54 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-from unittest.mock import MagicMock, patch
-
 import pytest
 
 from rapids_cli.doctor.checks.nvlink import check_nvlink_status
+from rapids_cli.hardware import DeviceInfo, FailingGpuInfo, FakeGpuInfo
 
 
 def test_check_nvlink_status_success():
-    mock_handle = MagicMock()
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetNvLinkState", return_value=1),
-    ):
-        result = check_nvlink_status(verbose=True)
-        assert result is True
+    devices = [
+        DeviceInfo(
+            index=0,
+            compute_capability=(7, 0),
+            memory_total_bytes=0,
+            nvlink_states=[True],
+        ),
+        DeviceInfo(
+            index=1,
+            compute_capability=(7, 0),
+            memory_total_bytes=0,
+            nvlink_states=[True],
+        ),
+    ]
+    gpu_info = FakeGpuInfo(device_count=2, devices=devices)
+    result = check_nvlink_status(verbose=True, gpu_info=gpu_info)
+    assert result is True
 
 
 def test_check_nvlink_status_single_gpu():
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-    ):
-        result = check_nvlink_status(verbose=False)
-        assert result is False
+    gpu_info = FakeGpuInfo(device_count=1)
+    result = check_nvlink_status(verbose=False, gpu_info=gpu_info)
+    assert result is False
 
 
 def test_check_nvlink_status_no_gpu():
-    import pynvml
-
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
-        with pytest.raises(
-            ValueError, match="GPU not found. Please ensure GPUs are installed."
-        ):
-            check_nvlink_status(verbose=False)
-
+    gpu_info = FailingGpuInfo()
+    with pytest.raises(
+        ValueError, match="GPU not found. Please ensure GPUs are installed."
+    ):
+        check_nvlink_status(verbose=False, gpu_info=gpu_info)
 
-def test_check_nvlink_status_nvml_error():
-    import pynvml
 
-    mock_handle = MagicMock()
-    with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch(
-            "pynvml.nvmlDeviceGetNvLinkState", side_effect=pynvml.NVMLError_NotSupported
+def test_check_nvlink_status_no_nvlink():
+    devices = [
+        DeviceInfo(
+            index=0, compute_capability=(7, 0), memory_total_bytes=0, nvlink_states=[]
         ),
-    ):
-        with pytest.raises(ValueError, match="NVLink 0 Status Check Failed"):
-            check_nvlink_status(verbose=False)
+        DeviceInfo(
+            index=1, compute_capability=(7, 0), memory_total_bytes=0, nvlink_states=[]
+        ),
+    ]
+    gpu_info = FakeGpuInfo(device_count=2, devices=devices)
+    result = check_nvlink_status(verbose=True, gpu_info=gpu_info)
+    assert result is False