DeclareDesign · graemeblair · Jan 6, 2025 · Jan 7, 2025 · Jan 21, 2025 · Jan 22, 2025
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -1,12 +1,17 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+#
+# NOTE: This workflow is overkill for most R packages and
+# check-standard.yaml is likely a better choice.
+# usethis::use_github_action("check-standard") will install it.
 on:
   push:
-    branches:
-      - master
+    branches: [main, master]
   pull_request:
-    branches:
-      - master
 
-name: R-CMD-check
+name: R-CMD-check.yaml
+
+permissions: read-all
 
 jobs:
   R-CMD-check:
@@ -18,72 +23,40 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: macOS-latest,   r: 'devel'}
-          - {os: macOS-latest,   r: 'release'}
-          - {os: macOS-latest,   r: 'oldrel'}
-          - {os: windows-latest, r: 'devel'}
+          - {os: macos-latest,   r: 'release'}
+
           - {os: windows-latest, r: 'release'}
-          - {os: windows-latest, r: 'oldrel'}
-          - {os: windows-latest, r: '3.5'}
-          - {os: ubuntu-16.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: 'oldrel',  rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.5',     rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.4',     rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.3',     rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
+          # use 4.0 or 4.1 to check with rtools40's older compiler
+          - {os: windows-latest, r: 'oldrel-4'}
 
+          - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-latest,  r: 'release'}
+          - {os: ubuntu-latest,  r: 'oldrel-1'}
+          - {os: ubuntu-latest,  r: 'oldrel-2'}
+          - {os: ubuntu-latest,  r: 'oldrel-3'}
+          - {os: ubuntu-latest,  r: 'oldrel-4'}
 
     env:
-      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
-      RSPM: ${{ matrix.config.rspm }}
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
 
-      - uses: r-lib/actions/setup-pandoc@master
-
-      - name: Query dependencies
-        run: |
-          install.packages('remotes')
-          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
-        shell: Rscript {0}
-
-      - name: Cache R packages
-        if: runner.os != 'Windows'
-        uses: actions/cache@v1
+      - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-r-new-${{ matrix.config.r }}-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-1-
-
-      - name: Install system dependencies
-        if: runner.os == 'Linux'
-        env:
-          RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
-        run: |
-          Rscript -e "remotes::install_github('r-hub/sysreqs')"
-          sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
-          sudo -s eval "$sysreqs"
-
-      - name: Install dependencies
-        run: |
-          remotes::install_deps(dependencies = TRUE)
-          remotes::install_cran("rcmdcheck")
-        shell: Rscript {0}
-
-      - name: Check
-        env:
-          _R_CHECK_CRAN_INCOMING_: false
-        run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
-        shell: Rscript {0}
+          extra-packages: any::rcmdcheck
+          needs: check
 
-      - name: Upload check results
-        if: failure()
-        uses: actions/upload-artifact@master
+      - uses: r-lib/actions/check-r-package@v2
         with:
-          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
-          path: check
+          upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -0,0 +1,3 @@
+Version: 1.0.6
+Date: 2025-02-28 18:37:11 UTC
+SHA: 1a43e440fb663c0d5a8d565b96f7e1d964a7d928
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,16 @@
 Package: estimatr
 Type: Package
 Title: Fast Estimators for Design-Based Inference
-Version: 1.0.4
+Version: 1.0.6
 Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@gmail.com", role = c("aut", "cre")),
              person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")),
              person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")),
              person("Macartan", "Humphreys", email = "macartan@gmail.com", role = c("aut")),
              person("Luke", "Sonnet", email = "luke.sonnet@gmail.com", role = c("aut")),
              person("Neal", "Fultz", email = "nfultz@gmail.com", role = c("ctb")),
              person("Lily", "Medina", email = "lilymiru@gmail.com", role = c("ctb")),
-             person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb")))
+             person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb")),
+             person("Molly", "Offer-Westort", email = "mollyow@uchicago.edu", role = c("ctb")))
 Description: Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) <doi:10.1214/12-AOAS583>.
 URL: https://declaredesign.org/r/estimatr/, https://github.com/DeclareDesign/estimatr
 BugReports: https://github.com/DeclareDesign/estimatr/issues
@@ -23,7 +24,7 @@ Imports:
     rlang (>= 0.2.0)
 LinkingTo: Rcpp, RcppEigen
 Encoding: UTF-8
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 LazyData: true
 Suggests:
     fabricatr (>= 0.10.0),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+# estimatr 1.0.6 
+
+* Allows for prediction with lm_lin() when treatment is a factor and/or multi-valued.
+* Adds saved treatment_levels to the returned lm_lin model object.
+* Stops prediction for lm_lin if the treatment values in new data are not a subset of treatment_levels.
+* Standardizes model fit for lm_lin() models with no intercept.
+* Adds tests to ensure identical predictions from lm_lin() models where treatment is either numeric or factorial, and fit with/without an intercept.
+* Adds relevant examples to predict and lm_robust and lm_lin documentation.
+* Adds Molly Offer-Westort as a contributor.
+* Remove functionality for using `lh_robust` with multiple hypotheses.
+* Restricted functionality for using `lh_robust` with clustered standard errors to CR0 standard errors.
+
 # estimatr 1.0.4
 
 * Test suite changes for M1 mac stay current on CRAN.

diff --git a/R/S3_predict.R b/R/S3_predict.R
@@ -61,6 +61,27 @@
 #' new_dat$w <- runif(n)
 #' predict(lm_out, newdata = new_dat, weights = w, interval = "prediction")
 #'
+#' # Works for 'lm_lin' models as well
+#' dat$z <- sample(1:3, size = nrow(dat), replace = TRUE)
+#' lmlin_out1 <- lm_lin(y ~ z, covariates = ~ x, data = dat)
+#' predict(lmlin_out1, newdata = dat, interval = "prediction")
+#'
+#' # Predictions from Lin models are equivalent with and without an intercept
+#' # and for multi-level treatments entered as numeric or factor variables
+#' lmlin_out2 <- lm_lin(y ~ z - 1, covariates = ~ x, data = dat)
+#' lmlin_out3 <- lm_lin(y ~ factor(z), covariates = ~ x, data = dat)
+#' lmlin_out4 <- lm_lin(y ~ factor(z) - 1, covariates = ~ x, data = dat)
+#'
+#' predict(lmlin_out2, newdata = dat, interval = "prediction")
+#' predict(lmlin_out3, newdata = dat, interval = "prediction")
+#' predict(lmlin_out4, newdata = dat, interval = "prediction")
+#'
+#' # In Lin models, predict will stop with an error message if new
+#' # treatment levels are supplied in the new data
+#' new_dat$z <- sample(0:3, size = nrow(new_dat), replace = TRUE)
+#' # predict(lmlin_out, newdata = new_dat)
+#'
+#'
 #' @export
 predict.lm_robust <- function(object,
                               newdata,
@@ -74,30 +95,6 @@ predict.lm_robust <- function(object,
 
   X <- get_X(object, newdata, na.action)
 
-  # lm_lin scaling
-  if (!is.null(object$scaled_center)) {
-    demeaned_covars <-
-      scale(
-        X[
-          ,
-          names(object$scaled_center),
-          drop = FALSE
-        ],
-        center = object$scaled_center,
-        scale = FALSE
-      )
-
-    # Interacted with treatment
-    treat_name <- attr(object$terms, "term.labels")[1]
-    interacted_covars <- X[, treat_name] * demeaned_covars
-
-    X <- cbind(
-      X[, attr(X, "assign") <= 1, drop = FALSE],
-      demeaned_covars,
-      interacted_covars
-    )
-  }
-
   # Get coefs
   coefs <- as.matrix(coef(object))
 
@@ -224,9 +221,64 @@ get_X <- function(object, newdata, na.action) {
 
   X <- model.matrix(rhs_terms, mf, contrasts.arg = object$contrasts)
 
+  # lm_lin scaling (moved down from predict.lm_robust)
+  if (!is.null(object$scaled_center)) {
+    # Covariates
+    demeaned_covars <-
+      scale(
+        X[
+          ,
+          names(object$scaled_center),
+          drop = FALSE
+        ],
+        center = object$scaled_center,
+        scale = FALSE
+      )
+
+    # Handle treatment variable reconstruction
+    treat_name <- attr(object$terms, "term.labels")[1]
+    treatment <- mf[, treat_name]
+    vals <- sort(unique(treatment))
+    old_vals <- object$treatment_levels
+
+    # Ensure treatment levels in newdata are subset of those for model fit
+    if (!all(as.character(vals) %in% as.character(old_vals))) {
+      stop(
+        "Levels of treatment variable in `newdata` must be a subset of those ",
+        "in the model fit."
+      )
+    }
+    treatment <- model.matrix(~ factor(treatment, levels = old_vals) - 1)
+
+    colnames(treatment) <- paste0(treat_name, "_", old_vals)
+    # Drop out first group if there is an intercept
+    if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE]
+
+    # Interactions matching original fitting logic
+    n_treat_cols <- ncol(treatment)
+    n_covars <- ncol(demeaned_covars)
+
+    interaction_matrix <- matrix(0, nrow = nrow(X), ncol = n_covars * n_treat_cols)
+
+    for (i in 1:n_covars) {
+      cols <- (i - 1) * n_treat_cols + (1:n_treat_cols)
+      interaction_matrix[, cols] <- treatment * demeaned_covars[, i]
+    }
+
+    X <- cbind(
+      if (attr(rhs_terms, "intercept") == 1) {
+        matrix(1, nrow = nrow(X), ncol = 1, dimnames = list(NULL, "(Intercept)"))
+      },
+      treatment,
+      if (attr(rhs_terms, "intercept") == 1 || ncol(treatment) == 1) demeaned_covars,
+      interaction_matrix
+    )
+  }
+
   return(X)
 }
 
+
 add_fes <- function(preds, object, newdata) {
 
   # Add factors!

diff --git a/R/estimatr_lh_robust.R b/R/estimatr_lh_robust.R
@@ -3,21 +3,23 @@
 #' @description This function fits a linear model with robust standard errors and performs linear hypothesis test.
 #' @param ... Other arguments to be passed to  \code{\link{lm_robust}}
 #' @param data A \code{data.frame}
-#' @param linear_hypothesis A character string or a matrix specifying combination, to be passed to the hypothesis.matrix argument of car::linearHypothesis
+#' @param linear_hypothesis A length 1 character string or a matrix specifying combination, to be passed to the hypothesis.matrix argument of car::linearHypothesis. Joint hypotheses are currently not handled by lh_robust.
 #' See \code{\link[car]{linearHypothesis}} for more details.
 #' @details
 #'
 #' This function is a wrapper for \code{\link{lm_robust}} and for
 #' \code{\link[car]{linearHypothesis}}. It first runs \code{lm_robust} and
 #' next passes \code{"lm_robust"} object as an argument to \code{linearHypothesis}.
+#' Currently CR2 standard errors are not handled by lh_robust.
 #'
 #' @return An object of class \code{"lh_robust"} containing the two following components:
 #'
 #' \item{lm_robust}{an object as returned by \code{lm_robust}.}
 #' \item{lh}{A data frame with most of its columns pulled from \code{linearHypothesis}' output.}
 #'
-#' The only analyis directly performed by \code{lh_robust} is a \code{t-test} for the null hypothesis of no effects of the linear combination of coefficients as specified by the user.
+#' The only analysis directly performed by \code{lh_robust} is a \code{t-test} for the null hypothesis of no effects of the linear combination of coefficients as specified by the user.
 #' All other output components are either extracted from \code{linearHypothesis} or \code{lm_robust}.
+#' Note that the estimate returned is the value of the LHS of an equation of the form f(X) = 0. Hyptheses "x - z = 1", "x +1= z + 2" and "x-z-1=0" will all return the value for "x-y-1"
 #'
 #' The original output returned by \code{linearHypothesis} is added as an attribute under the \code{"linear_hypothesis"} attribute.
 #'
@@ -64,11 +66,22 @@ lh_robust <- function(..., data, linear_hypothesis) {
   requireNamespace("car")
 
   # fit lm_robust model
-  lm_robust_fit <- lm_robust(..., data = data)
+  lm_robust_fit <- lm_robust(...,  data = data)
 
-  alpha <- eval_tidy(quos(...)$alpha)
-  if (is.null(alpha)) {
-    alpha <- 0.05
+  alpha <- lm_robust_fit$alpha
+
+  # Checks
+  # This stop could also be limited to hypotheses involving more than one coefficient
+  if(!is.null(lm_robust_fit$se_type) && lm_robust_fit$se_type == "CR2") {
+    stop("lh_robust not available for CR2 standard errors")
+  }
+
+  if(lm_robust_fit$clustered  && is.null(lm_robust_fit$se_type)) {
+    stop("lh_robust not available for CR2 standard errors; please specify CR0")
+  }
+
+  if(length(linear_hypothesis) > 1) {
+    stop("lh_robust currently implements tests for hypotheses involving linear combinations of variables but not joint hypotheses (for instance X1 = X2, but not X1 = 0 and X2=0")
   }
 
   # calculate linear hypothesis
@@ -78,8 +91,16 @@ lh_robust <- function(..., data, linear_hypothesis) {
   estimate  <- drop(attr(car_lht, "value"))
   std.error <- sqrt(diag(attr(car_lht, "vcov")))
 
-  # this df is not in general correct, but unclear what to replace it with
-  df <- lm_robust_fit$df.residual
+  if(length(estimate) > 1) {
+    stop("lh_robust currently implements tests for hypotheses involving linear combinations of variables but not joint hypotheses (for instance X1 = X2, but not X1 = 0 and X2=0")
+  }
+
+  df <- lm_robust_fit$df[1]
+
+  # appropriate when all elements of df are identical
+  if(length(lm_robust_fit$df) >1 && var(lm_robust_fit$df > 0)) {
+    warning("lh_robust inference may be inaccurate if degrees of freedom vary across coefficients")
+  }
 
   statistic <- estimate / std.error
   p.value <-  2 * pt(abs(statistic), df, lower.tail = FALSE)