From 217d04a5cf16830b099fbd8be3f500db73a3cd19 Mon Sep 17 00:00:00 2001 From: malte Date: Mon, 3 Jun 2019 17:20:22 +0200 Subject: [PATCH 1/5] compute correlations only once --- R/LassoShooting.fit.R | 4 ++-- R/help_functions.R | 5 +++-- R/rlasso.R | 8 +++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/R/LassoShooting.fit.R b/R/LassoShooting.fit.R index e7bcbbe..9cb6426 100644 --- a/R/LassoShooting.fit.R +++ b/R/LassoShooting.fit.R @@ -28,7 +28,7 @@ LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000, - optTol = 10^(-5), zeroThreshold = 10^(-6)), XX = NULL, Xy = NULL, beta.start = NULL) { + optTol = 10^(-5), zeroThreshold = 10^(-6)), XX = NULL, Xy = NULL, beta.start = NULL, corr = NULL) { n <- dim(x)[1] p <- dim(x)[2] if (is.null(XX)) @@ -41,7 +41,7 @@ LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000, # diag(1, p)) %*% Xy # # solve(XX+diag(as.vector(lambda))%*%diag(1,p))%*%Xy beta[is.nan(beta)] # <- 0 Zero-start beta <- rep(0,p) highest correlation start - beta <- init_values(x, y, intercept = FALSE)$coef + beta <- init_values(x, y, intercept = FALSE, corr=corr)$coef } else { beta <- beta.start } diff --git a/R/help_functions.R b/R/help_functions.R index 800e34c..1549ac1 100644 --- a/R/help_functions.R +++ b/R/help_functions.R @@ -4,8 +4,9 @@ format.perc <- function(probs, digits) paste(format(100 * probs, trim = TRUE, # function for calculation of the errors after choosing the five # variables with the highest correlation -init_values <- function(X, y, number = 5, intercept = TRUE) { - suppressWarnings(corr <- abs(cor(y, X))) +init_values <- function(X, y, number = 5, intercept = TRUE, corr = NULL) { + if (is.null(corr)) + (suppressWarnings(corr <- abs(cor(y, X)))) kx <- dim(X)[2] index <- order(corr, decreasing = T)[1:min(number, kx)] coefficients <- rep(0, kx) diff --git a/R/rlasso.R b/R/rlasso.R index 8e18fcd..149bea5 100644 --- a/R/rlasso.R +++ b/R/rlasso.R @@ -221,7 +221,9 @@ rlasso.default <- function(x, y, post = TRUE, intercept = TRUE, model = TRUE, XX <- crossprod(x) Xy <- crossprod(x, y) - startingval <- init_values(x,y)$residuals + suppressWarnings(corr <- abs(cor(y, x))) + + startingval <- init_values(x,y, corr=corr)$residuals pen <- lambdaCalculation(penalty = penalty, y = startingval, x = x) lambda <- pen$lambda Ups0 <- Ups1 <- pen$Ups0 @@ -235,13 +237,13 @@ rlasso.default <- function(x, y, post = TRUE, intercept = TRUE, model = TRUE, #coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy)$coefficients #xn <- t(t(x)/as.vector(Ups1)) if (mm==1 && post) { - coefTemp <- LassoShooting.fit(x, y, lambda/2, XX = XX, Xy = Xy)$coefficients + coefTemp <- LassoShooting.fit(x, y, lambda/2, XX = XX, Xy = Xy, corr=corr)$coefficients #lasso.reg <- glmnet::glmnet(xn, y, family = c("gaussian"), alpha = 1, # lambda = lambda0/(2*n)/2, standardize = FALSE, intercept = FALSE) #lasso.reg <- glmnet::glmnet(x, y, family = c("gaussian"), alpha = 1, # lambda = lambda0/(2*n)/2, standardize = FALSE, intercept = FALSE, penalty.factor = Ups1) } else { - coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy)$coefficients + coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy, corr=corr)$coefficients #lasso.reg <- glmnet::glmnet(xn, y, family = c("gaussian"), alpha = 1, # lambda = lambda0/(2*n), standardize = FALSE, intercept = FALSE) #lasso.reg <- glmnet::glmnet(x, y, family = c("gaussian"), alpha = 1, From 09f8a7efb854fd9fc05aa675bcf1292e120fcc45 Mon Sep 17 00:00:00 2001 From: "Malte S. Kurz" Date: Mon, 3 Jun 2019 17:35:41 +0200 Subject: [PATCH 2/5] call init_values at central place and pass through --- R/LassoShooting.fit.R | 2 +- R/help_functions.R | 5 ++--- R/rlasso.R | 8 ++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/R/LassoShooting.fit.R b/R/LassoShooting.fit.R index 9cb6426..508a6de 100644 --- a/R/LassoShooting.fit.R +++ b/R/LassoShooting.fit.R @@ -28,7 +28,7 @@ LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000, - optTol = 10^(-5), zeroThreshold = 10^(-6)), XX = NULL, Xy = NULL, beta.start = NULL, corr = NULL) { + optTol = 10^(-5), zeroThreshold = 10^(-6)), XX = NULL, Xy = NULL, beta.start = NULL) { n <- dim(x)[1] p <- dim(x)[2] if (is.null(XX)) diff --git a/R/help_functions.R b/R/help_functions.R index 1549ac1..800e34c 100644 --- a/R/help_functions.R +++ b/R/help_functions.R @@ -4,9 +4,8 @@ format.perc <- function(probs, digits) paste(format(100 * probs, trim = TRUE, # function for calculation of the errors after choosing the five # variables with the highest correlation -init_values <- function(X, y, number = 5, intercept = TRUE, corr = NULL) { - if (is.null(corr)) - (suppressWarnings(corr <- abs(cor(y, X)))) +init_values <- function(X, y, number = 5, intercept = TRUE) { + suppressWarnings(corr <- abs(cor(y, X))) kx <- dim(X)[2] index <- order(corr, decreasing = T)[1:min(number, kx)] coefficients <- rep(0, kx) diff --git a/R/rlasso.R b/R/rlasso.R index 149bea5..b01c9ae 100644 --- a/R/rlasso.R +++ b/R/rlasso.R @@ -221,9 +221,9 @@ rlasso.default <- function(x, y, post = TRUE, intercept = TRUE, model = TRUE, XX <- crossprod(x) Xy <- crossprod(x, y) - suppressWarnings(corr <- abs(cor(y, x))) + init_vals <- init_values(x,y) - startingval <- init_values(x,y, corr=corr)$residuals + startingval <- init_vals$residuals pen <- lambdaCalculation(penalty = penalty, y = startingval, x = x) lambda <- pen$lambda Ups0 <- Ups1 <- pen$Ups0 @@ -237,13 +237,13 @@ rlasso.default <- function(x, y, post = TRUE, intercept = TRUE, model = TRUE, #coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy)$coefficients #xn <- t(t(x)/as.vector(Ups1)) if (mm==1 && post) { - coefTemp <- LassoShooting.fit(x, y, lambda/2, XX = XX, Xy = Xy, corr=corr)$coefficients + coefTemp <- LassoShooting.fit(x, y, lambda/2, XX = XX, Xy = Xy, beta.start = init_vals$coefficients)$coefficients #lasso.reg <- glmnet::glmnet(xn, y, family = c("gaussian"), alpha = 1, # lambda = lambda0/(2*n)/2, standardize = FALSE, intercept = FALSE) #lasso.reg <- glmnet::glmnet(x, y, family = c("gaussian"), alpha = 1, # lambda = lambda0/(2*n)/2, standardize = FALSE, intercept = FALSE, penalty.factor = Ups1) } else { - coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy, corr=corr)$coefficients + coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy, beta.start = init_vals$coefficients)$coefficients #lasso.reg <- glmnet::glmnet(xn, y, family = c("gaussian"), alpha = 1, # lambda = lambda0/(2*n), standardize = FALSE, intercept = FALSE) #lasso.reg <- glmnet::glmnet(x, y, family = c("gaussian"), alpha = 1, From 0e40e0f8ee3479b13ea7b84c7d573928742e83d7 Mon Sep 17 00:00:00 2001 From: "Malte S. Kurz" Date: Thu, 18 Jul 2019 10:39:17 +0200 Subject: [PATCH 3/5] small fix, corr is no longer transferred to init_values() --- R/LassoShooting.fit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/LassoShooting.fit.R b/R/LassoShooting.fit.R index 508a6de..e7bcbbe 100644 --- a/R/LassoShooting.fit.R +++ b/R/LassoShooting.fit.R @@ -41,7 +41,7 @@ LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000, # diag(1, p)) %*% Xy # # solve(XX+diag(as.vector(lambda))%*%diag(1,p))%*%Xy beta[is.nan(beta)] # <- 0 Zero-start beta <- rep(0,p) highest correlation start - beta <- init_values(x, y, intercept = FALSE, corr=corr)$coef + beta <- init_values(x, y, intercept = FALSE)$coef } else { beta <- beta.start } From a07ae961de43f9d5de6f8ea0191c503224b75352 Mon Sep 17 00:00:00 2001 From: "Malte S. Kurz" Date: Fri, 22 Jan 2021 13:06:48 +0100 Subject: [PATCH 4/5] proposals for speed up of rlasso and lambda calculation --- R/rlasso.R | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/R/rlasso.R b/R/rlasso.R index b01c9ae..32ee478 100644 --- a/R/rlasso.R +++ b/R/rlasso.R @@ -282,8 +282,10 @@ rlasso.default <- function(x, y, post = TRUE, intercept = TRUE, model = TRUE, # refinement variance estimation if (post) { - reg <- lm(y ~ -1 + x1) - coefT <- coef(reg) + #reg <- lm(y ~ -1 + x1) + #coefT <- coef(reg) + reg <- .lm.fit(x1, y) + coefT <- reg$coefficients coefT[is.na(coefT)] <- 0 e1 <- y - x1 %*% coefT coefTemp[ind1] <- coefT @@ -494,14 +496,22 @@ lambdaCalculation <- function(penalty = list(homoscedastic = FALSE, X.dependent. # } xehat <- x*ehat psi <- apply(xehat, 2, function(x) mean(x^2)) - tXehattpsi <- t(t(xehat)/sqrt(psi)) + #tXehattpsi <- t(t(xehat)/sqrt(psi)) + # + # for (l in 1:R) { + # g <- matrix(rep(rnorm(n), each = p), ncol = p, byrow = TRUE) + # #sim[l] <- n * max(2 * colMeans(x * ehat* g)) + # sim[l] <- n * max(2 * abs(colMeans(tXehattpsi * g))) + # } - for (l in 1:R) { - g <- matrix(rep(rnorm(n), each = p), ncol = p, byrow = TRUE) - #sim[l] <- n * max(2 * colMeans(x * ehat* g)) - sim[l] <- n * max(2 * abs(colMeans(tXehattpsi * g))) - } + Xehattpsi <- t(xehat)/sqrt(psi) + for (l in 1:R) { + g <- rnorm(n) + sim[l] <- max(2* abs(Xehattpsi %*% g)) + } + #g <- matrix(rnorm(n*R), ncol = R, byrow=FALSE) + #sim <- apply(2 * abs(Xehattpsi %*% g), 2, max) lambda0 <- penalty$c * quantile(sim, probs = 1 - penalty$gamma) Ups0 <- 1/sqrt(n) * sqrt(t(t(y^2) %*% (x^2))) From bdc9d0178718a68353dc1288a34628e574ad1cd6 Mon Sep 17 00:00:00 2001 From: "Malte S. Kurz" Date: Fri, 22 Jan 2021 16:02:40 +0100 Subject: [PATCH 5/5] added import statement for .lm.fit --- NAMESPACE | 1 + R/pkg-package.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 7bcc23c..4916296 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -84,6 +84,7 @@ export(rlassologitEffects) export(tsls) importFrom(Formula,model.part) importFrom(methods,is) +importFrom(stats,.lm.fit) importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,coef) diff --git a/R/pkg-package.R b/R/pkg-package.R index 2d998dd..48804ec 100644 --- a/R/pkg-package.R +++ b/R/pkg-package.R @@ -33,6 +33,7 @@ #' @importFrom stats cor #' @importFrom stats glm #' @importFrom stats lm +#' @importFrom stats .lm.fit #' @importFrom stats model.frame #' @importFrom stats model.matrix #' @importFrom stats model.response