diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index c76119af..0b5a04fd 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -17,7 +17,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        # Reduced Python version matrix for faster CI
+        python-version: ["3.11", "3.12"]
     env:
       UV_PYTHON: ${{ matrix.python-version }}
 
@@ -30,8 +31,21 @@ jobs:
         with:
           enable-cache: true
 
+      # Cache model weights to speed up any foundation model tests
+      - name: Cache model weights
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/huggingface
+            ~/.cache/torch
+          key: model-weights-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
+          restore-keys: |
+            model-weights-${{ runner.os }}-${{ matrix.python-version }}-
+            model-weights-${{ runner.os }}-
+
       - name: Run tests 
         run: uv run pytest
+        timeout-minutes: 10
 
       - name: Test import class
         run: uv run -- python -c "from timecopilot import TimeCopilot, TimeCopilotForecaster" 
@@ -53,6 +67,7 @@ jobs:
 
       - name: Run tests 
         run: uv run pytest -m live
+        timeout-minutes: 15
         env: 
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 
 
diff --git a/.github/workflows/comprehensive.yaml b/.github/workflows/comprehensive.yaml
new file mode 100644
index 00000000..700b41e3
--- /dev/null
+++ b/.github/workflows/comprehensive.yaml
@@ -0,0 +1,58 @@
+name: Comprehensive Tests
+
+on:
+  schedule:
+    # Run comprehensive tests nightly at 2 AM UTC
+    - cron: '0 2 * * *'
+  workflow_dispatch:
+    # Allow manual triggering
+  push:
+    branches: [main]
+    paths:
+      # Only run when model-related files change
+      - 'timecopilot/models/**'
+      - 'tests/models/test_models_comprehensive.py'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-comprehensive:
+    name: comprehensive test on ${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # Test on fewer Python versions for comprehensive tests
+        python-version: ["3.11", "3.12"]
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+
+    steps:
+      - name: Clone repo
+        uses: actions/checkout@v4
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v6 
+        with:
+          enable-cache: true
+
+      # Cache model weights to speed up foundation model tests
+      - name: Cache model weights
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/huggingface
+            ~/.cache/torch
+          key: model-weights-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
+          restore-keys: |
+            model-weights-${{ runner.os }}-${{ matrix.python-version }}-
+            model-weights-${{ runner.os }}-
+
+      - name: Run comprehensive tests
+        run: uv run pytest -m slow -v
+        timeout-minutes: 45
+
+      - name: Test import class
+        run: uv run -- python -c "from timecopilot import TimeCopilot, TimeCopilotForecaster"
\ No newline at end of file
diff --git a/TESTING.md b/TESTING.md
new file mode 100644
index 00000000..97c81e18
--- /dev/null
+++ b/TESTING.md
@@ -0,0 +1,107 @@
+# Testing Guide
+
+This project uses a two-tier testing strategy to balance speed and comprehensive coverage.
+
+## Test Categories
+
+### Fast Tests (Regular CI)
+- **Purpose**: Quick validation for every PR/push
+- **Runtime**: ~2-3 minutes
+- **Models**: Lightweight benchmark models (SeasonalNaive, ZeroModel, AutoARIMA, Prophet, etc.)
+- **Trigger**: Runs on every push/PR
+- **Command**: `uv run pytest` (excludes slow tests by default)
+
+### Slow Tests (Comprehensive)
+- **Purpose**: Full model validation including heavy foundation models  
+- **Runtime**: ~30-45 minutes
+- **Models**: All models including Chronos, Moirai, TimesFM, TiRex, etc.
+- **Trigger**: Nightly schedule + manual dispatch
+- **Command**: `uv run pytest -m slow`
+
+### Live Tests
+- **Purpose**: Test integration with external APIs (OpenAI, etc.)
+- **Models**: Uses real LLM endpoints
+- **Trigger**: Regular CI (with API keys)
+- **Command**: `uv run pytest -m live`
+
+## Running Tests Locally
+
+```bash
+# Fast tests only (recommended for development)
+uv run pytest
+
+# Include slow foundation model tests  
+uv run pytest -m "not live"
+
+# All tests including live API calls
+uv run pytest -m ""
+
+# Specific test categories
+uv run pytest -m slow          # Foundation models only
+uv run pytest -m live          # API integration only
+uv run pytest tests/test_smoke.py  # Basic smoke tests
+```
+
+## Test Optimization Details
+
+### Model Selection Strategy
+- **Fast models** (`tests/conftest.py::fast_models`): Used in regular CI
+  - Benchmark models: SeasonalNaive, ZeroModel, AutoARIMA, Prophet, ADIDA
+  - Lightweight foundation: TabPFN (MOCK mode)
+  
+- **Comprehensive models** (`tests/conftest.py::all_models`): Used in nightly tests
+  - All fast models plus heavy foundation models
+  - Foundation models: Chronos, Moirai, TimesFM, TiRex, Toto
+
+### Parameterization Reductions
+To speed up CI, several test parameters were reduced:
+- **Frequencies**: 4 → 2 (H,D,W-MON,MS → D,W-MON)
+- **Series counts**: 5 → 3 per test
+- **Quantiles**: 9 → 3 (0.1-0.9 → 0.1,0.5,0.9)  
+- **Cross-validation horizon**: 12 → 6
+- **Python versions**: 4 → 2 (3.11, 3.12 for regular CI)
+
+### Caching Strategy
+- **Model weights**: HuggingFace and PyTorch caches preserved between runs
+- **Dependencies**: UV package cache enabled
+- **Cache keys**: Include Python version and dependency hash
+
+## Adding New Tests
+
+### For Fast Models
+Add tests to existing files in `tests/` - they will automatically use fast models and run in regular CI.
+
+### For Foundation Models
+- Add tests with `@pytest.mark.slow` decorator
+- Consider using reduced parameterization for speed
+- Test will run in nightly comprehensive workflow
+
+### Example
+```python
+@pytest.mark.slow
+@pytest.mark.parametrize("model", all_models)  # Use all_models for comprehensive
+def test_new_foundation_feature(model):
+    # Test with foundation models
+    pass
+
+@pytest.mark.parametrize("model", models)  # Use models for fast tests
+def test_new_basic_feature(model):
+    # Test with fast models only
+    pass
+```
+
+## CI Workflows
+
+### `.github/workflows/ci.yaml` 
+- **Triggers**: Push to main, PRs
+- **Python versions**: 3.11, 3.12
+- **Timeout**: 10 minutes regular tests, 15 minutes live tests
+- **Excludes**: Slow tests (foundation models)
+
+### `.github/workflows/comprehensive.yaml`
+- **Triggers**: Nightly at 2 AM UTC, manual dispatch, model file changes
+- **Python versions**: 3.11, 3.12  
+- **Timeout**: 45 minutes
+- **Includes**: Only slow tests (foundation models)
+
+This structure ensures rapid feedback for developers while maintaining comprehensive test coverage through automated nightly runs.
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 8f31e695..b5521f20 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -88,9 +88,10 @@ disable_error_code = ["no-redef"]  # for fasthtml
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-addopts = "-m 'not live' -n auto"
+addopts = "-m 'not live and not slow' -n auto"
 markers = [
-    "live: marks tests that require calls to llm providers"
+    "live: marks tests that require calls to llm providers",
+    "slow: marks tests with heavy foundation models that take longer to run"
 ]
 
 [tool.ruff]
diff --git a/tests/conftest.py b/tests/conftest.py
index 6b2434b9..778e1c29 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,28 +5,37 @@
 from timecopilot.models.foundational.moirai import Moirai
 from timecopilot.models.foundational.toto import Toto
 
+# Fast benchmark models for regular CI
 benchmark_models = [
     "AutoARIMA",
     "SeasonalNaive",
     "ZeroModel",
     "ADIDA",
-    "TimesFM",
     "Prophet",
 ]
-models = [MODELS[str_model] for str_model in benchmark_models]
-if sys.version_info >= (3, 11):
-    from timecopilot.models.foundational.tirex import TiRex
 
-    models.append(TiRex())
+# Fast models for regular CI (exclude TimesFM as it's heavy)
+fast_models = [MODELS[str_model] for str_model in benchmark_models]
 
+# Add lightweight foundation models for fast tests
 if sys.version_info < (3, 13):
     from tabpfn_time_series import TabPFNMode
-
     from timecopilot.models.foundational.tabpfn import TabPFN
+    fast_models.append(TabPFN(mode=TabPFNMode.MOCK))
+
+# Comprehensive model list including heavy foundation models (for slow tests)
+all_models = fast_models.copy()
 
-    models.append(TabPFN(mode=TabPFNMode.MOCK))
+# Add TimesFM to comprehensive list
+all_models.append(MODELS["TimesFM"])
 
-models.extend(
+# Add TiRex for Python >= 3.11
+if sys.version_info >= (3, 11):
+    from timecopilot.models.foundational.tirex import TiRex
+    all_models.append(TiRex())
+
+# Add heavy foundation models to comprehensive list
+all_models.extend(
     [
         Chronos(repo_id="amazon/chronos-t5-tiny", alias="Chronos-T5"),
         Chronos(repo_id="amazon/chronos-bolt-tiny", alias="Chronos-Bolt"),
@@ -44,3 +53,6 @@
         ),
     ]
 )
+
+# Default to fast models for regular CI
+models = fast_models
diff --git a/tests/models/test_models.py b/tests/models/test_models.py
index 57b04eb2..3fdd0701 100644
--- a/tests/models/test_models.py
+++ b/tests/models/test_models.py
@@ -40,7 +40,7 @@ def test_tabpfn_import_fails():
 
 
 @pytest.mark.parametrize("model", models)
-@pytest.mark.parametrize("freq", ["H", "D", "W-MON", "MS"])
+@pytest.mark.parametrize("freq", ["D", "W-MON"])  # Reduced freq combinations for speed
 def test_freq_inferred_correctly(model, freq):
     n_series = 2
     df = generate_series(
@@ -66,10 +66,10 @@ def test_freq_inferred_correctly(model, freq):
 
 
 @pytest.mark.parametrize("model", models)
-@pytest.mark.parametrize("freq", ["H", "D", "W-MON", "MS"])
+@pytest.mark.parametrize("freq", ["D", "W-MON"])  # Reduced freq combinations for speed
 @pytest.mark.parametrize("h", [1, 12])
 def test_correct_forecast_dates(model, freq, h):
-    n_series = 5
+    n_series = 3  # Reduced from 5 for speed
     df = generate_series(
         n_series,
         freq=freq,
@@ -91,11 +91,11 @@ def test_correct_forecast_dates(model, freq, h):
 
 
 @pytest.mark.parametrize("model", models)
-@pytest.mark.parametrize("freq", ["H", "D", "W-MON", "MS"])
+@pytest.mark.parametrize("freq", ["D", "W-MON"])  # Reduced freq combinations for speed
 @pytest.mark.parametrize("n_windows", [1, 4])
 def test_cross_validation(model, freq, n_windows):
-    h = 12
-    n_series = 5
+    h = 6  # Reduced from 12 for speed
+    n_series = 3  # Reduced from 5 for speed
     df = generate_series(n_series, freq=freq, equal_ends=True)
     cv_df = model.cross_validation(
         df,
@@ -152,8 +152,8 @@ def test_passing_both_level_and_quantiles(model):
 
 @pytest.mark.parametrize("model", models)
 def test_using_quantiles(model):
-    qs = [round(i * 0.1, 1) for i in range(1, 10)]
-    df = generate_series(n_series=3, freq="D")
+    qs = [0.1, 0.5, 0.9]  # Reduced quantiles for speed
+    df = generate_series(n_series=2, freq="D")  # Reduced series for speed
     fcst_df = model.forecast(
         df=df,
         h=2,
@@ -186,7 +186,7 @@ def test_using_quantiles(model):
 
 @pytest.mark.parametrize("model", models)
 def test_using_level(model):
-    level = [0, 20, 40, 60, 80]  # corresponds to qs [0.1, 0.2, ..., 0.9]
+    level = [20, 80]  # Reduced levels for speed
     df = generate_series(n_series=2, freq="D")
     fcst_df = model.forecast(
         df=df,
diff --git a/tests/models/test_models_comprehensive.py b/tests/models/test_models_comprehensive.py
new file mode 100644
index 00000000..b55e0c30
--- /dev/null
+++ b/tests/models/test_models_comprehensive.py
@@ -0,0 +1,132 @@
+"""
+Comprehensive tests for all models including heavy foundation models.
+These tests are marked as slow and excluded from regular CI runs.
+"""
+import sys
+
+import pandas as pd
+import pytest
+from utilsforecast.data import generate_series as _generate_series
+
+from ..conftest import all_models
+
+
+def generate_series(n_series, freq, **kwargs):
+    df = _generate_series(n_series, freq, **kwargs)
+    df["unique_id"] = df["unique_id"].astype(str)
+    return df
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("model", all_models)
+@pytest.mark.parametrize("freq", ["D", "W-MON"])  # Reduced freq combinations for speed
+def test_freq_inferred_correctly_comprehensive(model, freq):
+    n_series = 2
+    df = generate_series(
+        n_series,
+        freq=freq,
+    )
+    fcsts_no_freq = model.forecast(df, h=3)
+    fcsts_with_freq = model.forecast(df, h=3, freq=freq)
+    cv_no_freq = model.cross_validation(df, h=3)
+    cv_with_freq = model.cross_validation(df, h=3, freq=freq)
+    # some foundation models produce different results
+    # each time they are called
+    cols_to_check = ["unique_id", "ds"]
+    cols_to_check_cv = ["unique_id", "ds", "y", "cutoff"]
+    pd.testing.assert_frame_equal(
+        fcsts_no_freq[cols_to_check],
+        fcsts_with_freq[cols_to_check],
+    )
+    pd.testing.assert_frame_equal(
+        cv_no_freq[cols_to_check_cv],
+        cv_with_freq[cols_to_check_cv],
+    )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("model", all_models)
+@pytest.mark.parametrize("freq", ["D", "W-MON"])  # Reduced freq combinations
+@pytest.mark.parametrize("h", [1, 12])
+def test_correct_forecast_dates_comprehensive(model, freq, h):
+    n_series = 3  # Reduced from 5 for speed
+    df = generate_series(
+        n_series,
+        freq=freq,
+    )
+    df_test = df.groupby("unique_id").tail(h)
+    df_train = df.drop(df_test.index)
+    fcst_df = model.forecast(
+        df_train,
+        h=h,
+        freq=freq,
+    )
+    exp_n_cols = 3
+    assert fcst_df.shape == (n_series * h, exp_n_cols)
+    exp_cols = ["unique_id", "ds"]
+    pd.testing.assert_frame_equal(
+        fcst_df[exp_cols].sort_values(["unique_id", "ds"]).reset_index(drop=True),
+        df_test[exp_cols].sort_values(["unique_id", "ds"]).reset_index(drop=True),
+    )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("model", all_models)
+@pytest.mark.parametrize("freq", ["D"])  # Only test daily for foundation models
+@pytest.mark.parametrize("n_windows", [1])  # Reduced windows for speed
+def test_cross_validation_comprehensive(model, freq, n_windows):
+    h = 6  # Reduced from 12 for speed
+    n_series = 3  # Reduced from 5 for speed
+    df = generate_series(n_series, freq=freq, equal_ends=True)
+    cv_df = model.cross_validation(
+        df,
+        h=h,
+        freq=freq,
+        n_windows=n_windows,
+    )
+    exp_n_cols = 5  # unique_id, cutoff, ds, y, model
+    assert cv_df.shape == (n_series * h * n_windows, exp_n_cols)
+    cutoffs = cv_df["cutoff"].unique()
+    assert len(cutoffs) == n_windows
+    df_test = df.groupby("unique_id").tail(h * n_windows)
+    exp_cols = ["unique_id", "ds", "y"]
+    pd.testing.assert_frame_equal(
+        cv_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols],
+        df_test.sort_values(["unique_id", "ds"]).reset_index(drop=True)[exp_cols],
+    )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("model", all_models)
+def test_using_quantiles_comprehensive(model):
+    qs = [0.1, 0.5, 0.9]  # Reduced quantiles for speed
+    df = generate_series(n_series=2, freq="D")  # Reduced series
+    fcst_df = model.forecast(
+        df=df,
+        h=2,
+        freq="D",
+        quantiles=qs,
+    )
+    exp_qs_cols = [f"{model.alias}-q-{int(100 * q)}" for q in qs]
+    assert all(col in fcst_df.columns for col in exp_qs_cols)
+    assert not any(("-lo-" in col or "-hi-" in col) for col in fcst_df.columns)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("model", all_models)
+def test_using_level_comprehensive(model):
+    level = [20, 80]  # Reduced levels for speed
+    df = generate_series(n_series=2, freq="D")
+    fcst_df = model.forecast(
+        df=df,
+        h=2,
+        freq="D",
+        level=level,
+    )
+    exp_lv_cols = []
+    for lv in level:
+        if lv == 0:
+            continue
+        exp_lv_cols.extend([f"{model.alias}-lo-{lv}", f"{model.alias}-hi-{lv}"])
+    assert all(col in fcst_df.columns for col in exp_lv_cols)
+    assert not any(("-q-" in col) for col in fcst_df.columns)
\ No newline at end of file
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
new file mode 100644
index 00000000..a4e93e2b
--- /dev/null
+++ b/tests/test_smoke.py
@@ -0,0 +1,44 @@
+"""
+Fast smoke tests for basic functionality.
+These tests run very quickly to catch import and basic setup issues.
+"""
+import pytest
+
+
+def test_timecopilot_imports():
+    """Test that main classes can be imported without errors."""
+    from timecopilot import TimeCopilot, TimeCopilotForecaster
+    assert TimeCopilot is not None
+    assert TimeCopilotForecaster is not None
+
+
+def test_models_import():
+    """Test that model classes can be imported."""
+    from timecopilot.models import SeasonalNaive, ZeroModel
+    assert SeasonalNaive is not None
+    assert ZeroModel is not None
+
+
+def test_basic_model_creation():
+    """Test that basic models can be instantiated."""
+    from timecopilot.models import SeasonalNaive, ZeroModel
+    
+    model1 = SeasonalNaive()
+    model2 = ZeroModel()
+    
+    assert model1 is not None
+    assert model2 is not None
+    assert hasattr(model1, 'forecast')
+    assert hasattr(model2, 'forecast')
+
+
+def test_forecaster_creation():
+    """Test that TimeCopilotForecaster can be created with basic models."""
+    from timecopilot.forecaster import TimeCopilotForecaster
+    from timecopilot.models import SeasonalNaive, ZeroModel
+    
+    models = [SeasonalNaive(), ZeroModel()]
+    forecaster = TimeCopilotForecaster(models=models)
+    
+    assert forecaster is not None
+    assert len(forecaster.models) == 2
\ No newline at end of file