diff --git a/econml/_lazy.py b/econml/_lazy.py
new file mode 100644
index 000000000..ce151b0c8
--- /dev/null
+++ b/econml/_lazy.py
@@ -0,0 +1,40 @@
+# Copyright (c) PyWhy contributors. All rights reserved.
+# Licensed under the MIT License.
+
+"""Lazy module loading to avoid expensive imports at package load time."""
+
+import importlib
+
+
+class _LazyModule:
+    """Proxy that delays importing a module until an attribute is accessed.
+
+    Use at module level as a drop-in replacement for ``import heavy_lib``::
+
+        heavy_lib = _LazyModule("heavy_lib")
+
+    The real module is imported on first attribute access, so the cost is
+    deferred until the functionality is actually needed.
+    """
+
+    def __init__(self, module_name):
+        object.__setattr__(self, "_module_name", module_name)
+        object.__setattr__(self, "_module", None)
+
+    def _load(self):
+        module = object.__getattribute__(self, "_module")
+        if module is None:
+            name = object.__getattribute__(self, "_module_name")
+            module = importlib.import_module(name)
+            object.__setattr__(self, "_module", module)
+        return module
+
+    def __getattr__(self, name):
+        return getattr(self._load(), name)
+
+    def __repr__(self):
+        module = object.__getattribute__(self, "_module")
+        if module is not None:
+            return repr(module)
+        name = object.__getattribute__(self, "_module_name")
+        return f"<_LazyModule '{name}' (not yet loaded)>"
diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
index cf966b631..e27b47a67 100644
--- a/econml/_ortho_learner.py
+++ b/econml/_ortho_learner.py
@@ -41,6 +41,9 @@ class in this module implements the general logic in a very versatile way
                         filter_none_kwargs, one_hot_encoder, strata_from_discrete_arrays,
                         jacify_featurizer, reshape, shape)
 from .sklearn_extensions.model_selection import ModelSelector
+from ._lazy import _LazyModule
+
+_rlearner = _LazyModule("econml.dml._rlearner")  # lazy: avoid circular import
 
 try:
     import ray
@@ -1149,9 +1152,7 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None, s
         }
         # If using an _rlearner, the scoring parameter can be passed along, if provided
         if scoring is not None:
-            # Cannot import in header, or circular imports
-            from .dml._rlearner import _ModelFinal
-            if isinstance(self._ortho_learner_model_final, _ModelFinal):
+            if isinstance(self._ortho_learner_model_final, _rlearner._ModelFinal):
                 score_kwargs['scoring'] = scoring
             else:
                 raise NotImplementedError("scoring parameter only implemented for "
diff --git a/econml/_shap.py b/econml/_shap.py
index a0d75642c..2c84e6f02 100644
--- a/econml/_shap.py
+++ b/econml/_shap.py
@@ -13,11 +13,13 @@
 """
 
 import inspect
-import shap
 from collections import defaultdict
 import numpy as np
+from ._lazy import _LazyModule
 from .utilities import broadcast_unit_treatments, cross_product, get_feature_names_or_default
 
+shap = _LazyModule("shap")  # lazy: heavy dependency only needed when shap_values() is called
+
 
 def _shap_explain_cme(cme_model, X, d_t, d_y,
                       feature_names=None, treatment_names=None, output_names=None,
diff --git a/econml/data/dynamic_panel_dgp.py b/econml/data/dynamic_panel_dgp.py
index 0eaeb88f3..e14429d98 100644
--- a/econml/data/dynamic_panel_dgp.py
+++ b/econml/data/dynamic_panel_dgp.py
@@ -1,6 +1,5 @@
 import numpy as np
-from econml.utilities import cross_product
-from statsmodels.tools.tools import add_constant
+from econml.utilities import cross_product, add_constant
 import pandas as pd
 import scipy as sp
 from scipy.stats import expon
diff --git a/econml/dml/causal_forest.py b/econml/dml/causal_forest.py
index e83ca4d4e..d6ff14eeb 100644
--- a/econml/dml/causal_forest.py
+++ b/econml/dml/causal_forest.py
@@ -17,9 +17,12 @@
 from .._cate_estimator import LinearCateEstimator
 from .._shap import _shap_explain_multitask_model_cate
 from .._ortho_learner import _OrthoLearner
+from .._lazy import _LazyModule
 from ..validate.sensitivity_analysis import (sensitivity_interval, RV, dml_sensitivity_values,
                                              sensitivity_summary)
 
+_score = _LazyModule("econml.score")  # lazy: avoid circular import
+
 
 class _CausalForestFinalWrapper:
 
@@ -757,7 +760,7 @@ def tune(self, Y, T, *, X=None, W=None,
             The tuned causal forest object. This is the same object (not a copy) as the original one, but where
             all parameters of the object have been set to the best performing parameters from the tuning grid.
         """
-        from ..score import RScorer  # import here to avoid circular import issue
+        RScorer = _score.RScorer
         Y, T, X, sample_weight, groups = check_input_arrays(Y, T, X, sample_weight, groups)
         W, = check_input_arrays(W, force_all_finite='allow-nan' if 'W' in self._gen_allowed_missing_vars() else True,
                                 ensure_2d=True)
diff --git a/econml/inference/_bootstrap.py b/econml/inference/_bootstrap.py
index 7993a9c74..36089bd63 100644
--- a/econml/inference/_bootstrap.py
+++ b/econml/inference/_bootstrap.py
@@ -6,6 +6,9 @@
 from joblib import Parallel, delayed
 from sklearn.base import clone
 from scipy.stats import norm
+from .._lazy import _LazyModule
+
+_cate_estimator = _LazyModule("econml._cate_estimator")  # lazy: avoid circular import
 
 
 class BootstrapEstimator:
@@ -83,10 +86,8 @@ def fit(self, *args, **named_args):
 
         The full signature of this method is the same as that of the wrapped object's `fit` method.
         """
-        from .._cate_estimator import BaseCateEstimator  # need to nest this here to avoid circular import
-
         index_chunks = None
-        if isinstance(self._instances[0], BaseCateEstimator):
+        if isinstance(self._instances[0], _cate_estimator.BaseCateEstimator):
             index_chunks = self._instances[0]._strata(*args, **named_args)
             if index_chunks is not None:
                 index_chunks = self.__stratified_indices(index_chunks)
diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py
index 65efb5fad..8c44ee31f 100644
--- a/econml/sklearn_extensions/linear_model.py
+++ b/econml/sklearn_extensions/linear_model.py
@@ -20,7 +20,7 @@
 import warnings
 from collections.abc import Iterable
 from scipy.stats import norm
-from ..utilities import ndim, shape, reshape, _safe_norm_ppf, check_input_arrays
+from ..utilities import ndim, shape, reshape, _safe_norm_ppf, check_input_arrays, add_constant
 import sklearn
 from sklearn import clone
 from sklearn.linear_model import LinearRegression, LassoCV, MultiTaskLassoCV, Lasso, MultiTaskLasso
@@ -33,12 +33,14 @@
 from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import check_is_fitted
 from sklearn.base import BaseEstimator
-from statsmodels.tools.tools import add_constant
-from statsmodels.api import RLM
-import statsmodels
+from .._lazy import _LazyModule
 from joblib import Parallel, delayed
 from typing import List
 
+_statsmodels_api = _LazyModule("statsmodels.api")  # lazy: only needed for RLM
+_statsmodels = _LazyModule("statsmodels")  # lazy: only needed for RLM robust norms
+_model_selection = _LazyModule("econml.sklearn_extensions.model_selection")  # lazy: avoid circular import
+
 
 class _WeightedCVIterableWrapper(_CVIterableWrapper):
     def __init__(self, cv):
@@ -56,15 +58,13 @@ def split(self, X=None, y=None, groups=None, sample_weight=None):
 
 
 def _weighted_check_cv(cv=5, y=None, classifier=False, random_state=None):
-    # local import to avoid circular imports
-    from .model_selection import WeightedKFold, WeightedStratifiedKFold
     cv = 5 if cv is None else cv
     if isinstance(cv, numbers.Integral):
         if (classifier and (y is not None) and
                 (type_of_target(y) in ('binary', 'multiclass'))):
-            return WeightedStratifiedKFold(cv, random_state=random_state)
+            return _model_selection.WeightedStratifiedKFold(cv, random_state=random_state)
         else:
-            return WeightedKFold(cv, random_state=random_state)
+            return _model_selection.WeightedKFold(cv, random_state=random_state)
 
     if not hasattr(cv, 'split') or isinstance(cv, str):
         if not isinstance(cv, Iterable) or isinstance(cv, str):
@@ -2041,9 +2041,9 @@ def fit(self, X, y):
         self._n_out = 0 if len(y.shape) == 1 else (y.shape[1],)
 
         def model_gen(y):
-            return RLM(endog=y,
+            return _statsmodels_api.RLM(endog=y,
                        exog=X,
-                       M=statsmodels.robust.norms.HuberT(t=self.t)).fit(cov=self.cov_type,
+                       M=_statsmodels.robust.norms.HuberT(t=self.t)).fit(cov=self.cov_type,
                                                                         maxiter=self.maxiter,
                                                                         tol=self.tol)
         if y.ndim < 2:
diff --git a/econml/utilities.py b/econml/utilities.py
index 5673771c8..f378455b8 100644
--- a/econml/utilities.py
+++ b/econml/utilities.py
@@ -19,15 +19,60 @@
 from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, LabelEncoder
 import warnings
 from warnings import warn
-from statsmodels.iolib.table import SimpleTable
-from statsmodels.iolib.summary import summary_return
+from ._lazy import _LazyModule
 from inspect import signature
 from packaging.version import parse
 
+_statsmodels_table = _LazyModule("statsmodels.iolib.table")  # lazy: only needed for Summary output
+_statsmodels_summary = _LazyModule("statsmodels.iolib.summary")  # lazy: only needed for Summary output
+
 
 MAX_RAND_SEED = np.iinfo(np.int32).max
 
 
+def add_constant(data, prepend=True, has_constant='skip'):
+    """Add a column of ones to a numpy array.
+
+    Parameters
+    ----------
+    data : array_like
+        A column-ordered design matrix.
+    prepend : bool, default True
+        If True the constant is in the first column, else appended.
+    has_constant : {'skip', 'add', 'raise'}, default 'skip'
+        Behavior when *data* already contains a constant column.
+        ``'skip'`` returns *data* unchanged, ``'raise'`` raises
+        ``ValueError``, ``'add'`` adds another column of ones anyway.
+
+    Returns
+    -------
+    ndarray
+        The array with a ones column prepended (or appended).
+    """
+    x = np.asarray(data)
+    if isinstance(data, pd.DataFrame):
+        raise TypeError(
+            "add_constant does not support pandas DataFrames; "
+            "pass a numpy array instead"
+        )
+    if x.ndim == 1:
+        x = x[:, None]
+    elif x.ndim > 2:
+        raise ValueError('Only implemented for 2-dimensional arrays')
+
+    if has_constant != 'add':
+        is_const = (np.ptp(x, axis=0) == 0) & np.all(x != 0.0, axis=0)
+        if is_const.any():
+            if has_constant == 'skip':
+                return x
+            cols = ",".join(str(c) for c in np.where(is_const)[0])
+            raise ValueError(f"Column(s) {cols} are constant.")
+
+    ones = np.ones(x.shape[0])
+    parts = [ones, x] if prepend else [x, ones]
+    return np.column_stack(parts)
+
+
 class IdentityFeatures(TransformerMixin):
     """Featurizer that just returns the input data."""
 
@@ -1147,7 +1192,7 @@ def _repr_html_(self):
         return self.as_html()
 
     def add_table(self, res, header, index, title):
-        table = SimpleTable(res, header, index, title)
+        table = _statsmodels_table.SimpleTable(res, header, index, title)
         self.tables.append(table)
 
     def add_extra_txt(self, etext):
@@ -1170,7 +1215,7 @@ def as_text(self):
             summary tables and extra text as one string
 
         """
-        txt = summary_return(self.tables, return_fmt='text')
+        txt = _statsmodels_summary.summary_return(self.tables, return_fmt='text')
         if self.extra_txt is not None:
             txt = txt + '\n\n' + self.extra_txt
         return txt
@@ -1190,7 +1235,7 @@ def as_latex(self):
         tables.
 
         """
-        latex = summary_return(self.tables, return_fmt='latex')
+        latex = _statsmodels_summary.summary_return(self.tables, return_fmt='latex')
         if self.extra_txt is not None:
             latex = latex + '\n\n' + self.extra_txt.replace('\n', ' \\newline\n ')
         return latex
@@ -1204,7 +1249,7 @@ def as_csv(self):
             concatenated summary tables in comma delimited format
 
         """
-        csv = summary_return(self.tables, return_fmt='csv')
+        csv = _statsmodels_summary.summary_return(self.tables, return_fmt='csv')
         if self.extra_txt is not None:
             csv = csv + '\n\n' + self.extra_txt
         return csv
@@ -1218,7 +1263,7 @@ def as_html(self):
             concatenated summary tables in HTML format
 
         """
-        html = summary_return(self.tables, return_fmt='html')
+        html = _statsmodels_summary.summary_return(self.tables, return_fmt='html')
         if self.extra_txt is not None:
             html = html + '<br/><br/>' + self.extra_txt.replace('\n', '<br/>')
         return html
diff --git a/econml/validate/drtester.py b/econml/validate/drtester.py
index c79330218..e90025786 100644
--- a/econml/validate/drtester.py
+++ b/econml/validate/drtester.py
@@ -5,14 +5,13 @@
 import scipy.stats as st
 from sklearn.model_selection import check_cv
 from sklearn.model_selection import cross_val_predict, StratifiedKFold, KFold
-from statsmodels.api import OLS
-from statsmodels.tools import add_constant
-
-from econml.utilities import deprecated
-
+from econml._lazy import _LazyModule
+from econml.utilities import deprecated, add_constant
 from .results import CalibrationEvaluationResults, BLPEvaluationResults, UpliftEvaluationResults, EvaluationResults
 from .utils import calculate_dr_outcomes, calc_uplift
 
+_statsmodels_api = _LazyModule("statsmodels.api")  # lazy: only needed for evaluate_blp()
+
 
 class DRTester:
     """
@@ -479,7 +478,7 @@ def evaluate_blp(
             self.get_cate_preds(Xval, Xtrain)
 
         if self.n_treat == 1:  # binary treatment
-            reg = OLS(self.dr_val_, add_constant(self.cate_preds_val_)).fit()
+            reg = _statsmodels_api.OLS(self.dr_val_, add_constant(self.cate_preds_val_)).fit()
             params = [reg.params[1]]
             errs = [reg.bse[1]]
             pvals = [reg.pvalues[1]]
@@ -488,7 +487,10 @@ def evaluate_blp(
             errs = []
             pvals = []
             for k in range(self.n_treat):  # run a separate regression for each
-                reg = OLS(self.dr_val_[:, k], add_constant(self.cate_preds_val_[:, k])).fit(cov_type='HC1')
+                reg = _statsmodels_api.OLS(
+                    self.dr_val_[:, k],
+                    add_constant(self.cate_preds_val_[:, k])
+                ).fit(cov_type='HC1')
                 params.append(reg.params[1])
                 errs.append(reg.bse[1])
                 pvals.append(reg.pvalues[1])