diff --git a/R/rlassoEffects.R b/R/rlassoEffects.R index 1081336..84a3593 100644 --- a/R/rlassoEffects.R +++ b/R/rlassoEffects.R @@ -27,6 +27,8 @@ #' @return The function returns an object of class \code{rlassoEffects} with the following entries: \item{coefficients}{vector with estimated #' values of the coefficients for each selected variable} \item{se}{standard error (vector)} #' \item{t}{t-statistic} \item{pval}{p-value} \item{samplesize}{sample size of the data set} \item{index}{index of the variables for which inference is performed} +#' \item{selection.matrix}{A matrix indicating if a variable has been selected (TRUE) during the internal lasso estimation steps. Each column illustrates the variable selection for the inference procedure that corresponds to a specific treatment/target variable.} +#' \item{coefficients.reg}{Coefficient estimates from internal lasso regressions. Note that traditional inference on these coefficients is not valid in general.} #' @references A. Belloni, V. Chernozhukov, C. Hansen (2014). Inference on #' treatment effects after selection among high-dimensional controls. The #' Review of Economic Studies 81(2), 608-650. @@ -138,14 +140,14 @@ rlassoEffects.default <- function(x, y, index = c(1:ncol(x)), method = "partiall reside[, i] <- col$residuals$epsilon residv[, i] <- col$residuals$v coef.mat[[i]] <- col$coefficients.reg - selection.matrix[-index[i],i] <- col$selection.index + selection.matrix[-index[i],i] <- col$selection.matrix } } - #colnames(coef.mat) <- colnames(x)[index] + names(coef.mat) <- colnames(x)[index] residuals <- list(e = reside, v = residv) res <- list(coefficients = coefficients, se = se, t = t, pval = pval, lasso.regs = lasso.regs, index = index, call = match.call(), samplesize = n, - residuals = residuals, coef.mat = coef.mat, selection.matrix = selection.matrix) + residuals = residuals, coefficients.reg = coef.mat, selection.matrix = selection.matrix) class(res) <- "rlassoEffects" return(res) } @@ -247,9 +249,11 @@ rlassoEffect <- function(x, y, d, method = "double selection", I3 = NULL, # samplesize=n) se <- drop(se) names(se) <- colnames(d) + selection.matrix = as.matrix(I) + colnames(selection.matrix) = colnames(d) results <- list(alpha = alpha, se = se, t = tval, pval = pval, no.selected = no.selected, coefficients = alpha, coefficient = alpha, - coefficients.reg = coef(reg1), selection.index = I, residuals = res, call = match.call(), + coefficients.reg = coef(reg1), selection.matrix = selection.matrix, residuals = res, call = match.call(), samplesize = n) } @@ -269,9 +273,11 @@ rlassoEffect <- function(x, y, d, method = "double selection", I3 = NULL, I2 <- reg2$index I <- as.logical(I1 + I2) names(I) <- union(names(I1),names(I2)) + selection.matrix = as.matrix(I) + colnames(selection.matrix) = colnames(d) results <- list(alpha = unname(alpha), se = drop(se), t = unname(tval), pval = unname(pval), coefficients = unname(alpha), coefficient = unname(alpha), - coefficients.reg = coef(reg1), selection.index = I, residuals = res, call = match.call(), + coefficients.reg = coef(reg1), selection.matrix = selection.matrix, residuals = res, call = match.call(), samplesize = n) } class(results) <- "rlassoEffects" @@ -570,21 +576,12 @@ coef.rlassoEffects <- function(object, complete = TRUE, selection.matrix = FALSE mat <- object$selection.matrix - if (is.null(mat)) { - mat <- cbind(object$selection.index) - dmat2 <- dim(mat)[2] - rnames <- rownames(mat) - targetindx <- stats::complete.cases(mat) - } - - else { - dmat2 <- dim(mat)[2] - rnames <- rownames(mat) - targetindx <- stats::complete.cases(mat) - mat <- cbind(mat, as.logical(apply(mat, 1, sum))) - colnames(mat)[dim(mat)[2]] <- "global" - } - + dmat2 <- dim(mat)[2] + rnames <- rownames(mat) + targetindx <- stats::complete.cases(mat) + mat <- cbind(mat, as.logical(apply(mat, 1, sum))) + colnames(mat)[dim(mat)[2]] <- "global" + if (include.targets == FALSE) { mat <- mat[targetindx, , drop = FALSE] rnames <- rownames(mat) diff --git a/R/rlassologit.R b/R/rlassologit.R index 53b7c3e..07883e8 100644 --- a/R/rlassologit.R +++ b/R/rlassologit.R @@ -46,7 +46,7 @@ #' beta <- c(rep(2,px), rep(0,p-px)) #' intercept <- 1 #' P <- exp(intercept + X %*% beta)/(1+exp(intercept + X %*% beta)) -#' y <- rbinom(length(y), size=1, prob=P) +#' y <- rbinom(n, size=1, prob=P) #' ## fit rlassologit object #' rlassologit.reg <- rlassologit(y~X) #' ## methods diff --git a/R/rlassologitEffect.R b/R/rlassologitEffect.R index 3d2780d..c1ed01d 100644 --- a/R/rlassologitEffect.R +++ b/R/rlassologitEffect.R @@ -17,7 +17,9 @@ #' @param \dots additional parameters #' @return The function returns an object of class \code{rlassologitEffects} with the following entries: \item{coefficients}{estimated #' value of the coefficients} \item{se}{standard errors} -#' \item{t}{t-statistics} \item{pval}{p-values} \item{samplesize}{sample size of the data set} \item{I}{index of variables of the union of the lasso regressions} +#' \item{t}{t-statistics} \item{pval}{p-values} \item{samplesize}{sample size of the data set} +#' \item{selection.matrix}{A matrix indicating if a variable has been selected (TRUE) during the internal lasso/logistic lasso estimation steps. Each column illustrates the variable selection for the inference procedure that corresponds to a specific treatment/target variable.} +#' \item{coefficients.reg}{Coefficient estimates from final glm estimation step after internal selection with lasso/logistic lasso regressions. Note that traditional inference on these coefficients is not valid in general.} #' @references A. Belloni, V. Chernozhukov, Y. Wei (2013). Honest confidence regions for a regression parameter in logistic regression with a loarge number of controls. #' cemmap working paper CWP67/13. #' @export @@ -87,7 +89,10 @@ rlassologitEffects.default <- function(x, y, index = c(1:ncol(x)), I3 = NULL, po lasso.regs <- vector("list", k) reside <- matrix(NA, nrow = n, ncol = p1) residv <- matrix(NA, nrow = n, ncol = p1) - names(coefficients) <- names(se) <- names(t) <- names(pval) <- names(lasso.regs) <- colnames(reside) <- colnames(residv) <- colnames(x)[index] + coef.mat <- list() + selection.matrix <- matrix(NA, ncol = k, nrow = dim(x)[2]) + names(coefficients) <- names(se) <- names(t) <- names(pval) <- names(lasso.regs) <- colnames(reside) <- colnames(residv) <- colnames(selection.matrix) <- colnames(x)[index] + rownames(selection.matrix) <- colnames(x) for (i in 1:k) { d <- x[, index[i], drop = FALSE] @@ -103,12 +108,15 @@ rlassologitEffects.default <- function(x, y, index = c(1:ncol(x)), I3 = NULL, po pval[i] <- col$pval reside[,i] <- col$residuals$epsilon residv[,i] <- col$residuals$v + coef.mat[[i]] <- col$coefficients.reg + selection.matrix[-index[i],i] <- col$selection.matrix } } + names(coef.mat) <- colnames(x)[index] residuals <- list(e = reside, v = residv) res <- list(coefficients = coefficients, se = se, t = t, pval = pval, lasso.regs = lasso.regs, index = I, call = match.call(), samplesize = n, - residuals = residuals) + residuals = residuals, coefficients.reg = coef.mat, selection.matrix = selection.matrix) class(res) <- c("rlassologitEffects") return(res) } @@ -187,12 +195,12 @@ rlassologitEffect <- function(x, y, d, I3 = NULL, post = TRUE) { I <- I1 + I2 I <- as.logical(I) } - xselect <- x[, I] - p3 <- dim(xselect)[2] + dxselect <- cbind(d, x[, I]) + p3 <- dim(dxselect)[2]-1 #la3 <- 1.1/2 * sqrt(n) * qnorm(1 - 0.05/(max(n, (p3 + 1) * log(n)))) #l3 <- rlassologit(cbind(d, xselect), y, post = TRUE, normalize = TRUE, # intercept = TRUE, penalty = list(lambda.start = la3)) - l3 <- glm(y ~ cbind(d, xselect),family=binomial(link='logit')) + l3 <- glm(y ~ dxselect, family = binomial(link = 'logit')) alpha <- l3$coef[2] names(alpha) <- colnames(d) t3 <- predict(l3, type = "link") @@ -222,8 +230,11 @@ rlassologitEffect <- function(x, y, d, I3 = NULL, post = TRUE) { res <- list(epsilon= l3$residuals, v= z) se <- drop(se) names(se) <- colnames(d) + selection.matrix = as.matrix(I) + colnames(selection.matrix) = colnames(d) results <- list(alpha = alpha, se = se, t = tval, pval = pval, - no.selected = no.selected, coefficients = alpha, coefficient = alpha, + no.selected = no.selected, coefficients = alpha, coefficient = alpha, + coefficients.reg = coef(l3), selection.matrix = selection.matrix, residuals = res, call = match.call(), samplesize = n, post = post) class(results) <- c("rlassologitEffects") return(results) diff --git a/man/rlassoEffects.Rd b/man/rlassoEffects.Rd index 6446eac..ba2c69d 100644 --- a/man/rlassoEffects.Rd +++ b/man/rlassoEffects.Rd @@ -65,6 +65,8 @@ If not found in data, the variables are taken from environment(formula), typical The function returns an object of class \code{rlassoEffects} with the following entries: \item{coefficients}{vector with estimated values of the coefficients for each selected variable} \item{se}{standard error (vector)} \item{t}{t-statistic} \item{pval}{p-value} \item{samplesize}{sample size of the data set} \item{index}{index of the variables for which inference is performed} +\item{selection.matrix}{A matrix indicating if a variable has been selected (TRUE) during the internal lasso estimation steps. Each column illustrates the variable selection for the inference procedure that corresponds to a specific treatment/target variable.} +\item{coefficients.reg}{Coefficient estimates from internal lasso regressions. Note that traditional inference on these coefficients is not valid in general.} } \description{ Estimation and inference of (low-dimensional) target coefficients in a high-dimensional linear model. diff --git a/man/rlassologit.Rd b/man/rlassologit.Rd index 61cf5ee..00aea78 100644 --- a/man/rlassologit.Rd +++ b/man/rlassologit.Rd @@ -103,7 +103,7 @@ X <- matrix(rnorm(n*p), ncol=p) beta <- c(rep(2,px), rep(0,p-px)) intercept <- 1 P <- exp(intercept + X \%*\% beta)/(1+exp(intercept + X \%*\% beta)) -y <- rbinom(length(y), size=1, prob=P) +y <- rbinom(n, size=1, prob=P) ## fit rlassologit object rlassologit.reg <- rlassologit(y~X) ## methods diff --git a/man/rlassologitEffects.Rd b/man/rlassologitEffects.Rd index 3db703e..1fb7a7f 100644 --- a/man/rlassologitEffects.Rd +++ b/man/rlassologitEffects.Rd @@ -45,7 +45,9 @@ If not found in data, the variables are taken from environment(formula), typical \value{ The function returns an object of class \code{rlassologitEffects} with the following entries: \item{coefficients}{estimated value of the coefficients} \item{se}{standard errors} -\item{t}{t-statistics} \item{pval}{p-values} \item{samplesize}{sample size of the data set} \item{I}{index of variables of the union of the lasso regressions} +\item{t}{t-statistics} \item{pval}{p-values} \item{samplesize}{sample size of the data set} +\item{selection.matrix}{A matrix indicating if a variable has been selected (TRUE) during the internal lasso/logistic lasso estimation steps. Each column illustrates the variable selection for the inference procedure that corresponds to a specific treatment/target variable.} +\item{coefficients.reg}{Coefficient estimates from final glm estimation step after internal selection with lasso/logistic lasso regressions. Note that traditional inference on these coefficients is not valid in general.} } \description{ The function estimates (low-dimensional) target coefficients in a high-dimensional logistic model.