diff --git a/R/f_generics_clvfittedtransactions.R b/R/f_generics_clvfittedtransactions.R index c4eaf868..8a455e12 100644 --- a/R/f_generics_clvfittedtransactions.R +++ b/R/f_generics_clvfittedtransactions.R @@ -335,7 +335,8 @@ setMethod("clv.controlflow.predict.new.customer", signature = signature(clv.fitt check_err_msg("Parameter newdata has to be output from calling `newcustomer()`!") } - return(drop(clv.model.predict.new.customer( + # 1+: Include initial order + return(1 + drop(clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, clv.newcustomer=clv.newcustomer))) diff --git a/R/f_generics_clvfittedtransactionsdyncov.R b/R/f_generics_clvfittedtransactionsdyncov.R index 9318c968..6f70e5b9 100644 --- a/R/f_generics_clvfittedtransactionsdyncov.R +++ b/R/f_generics_clvfittedtransactionsdyncov.R @@ -107,7 +107,8 @@ setMethod(f = "clv.controlflow.predict.new.customer", signature = signature(clv. tp.prediction.end=tp.prediction.end)) - return(clv.model.predict.new.customer( + # 1+: Include initial order + return(1 + clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, clv.newcustomer=clv.newcustomer)) diff --git a/R/f_generics_clvfittedtransactionsstaticcov.R b/R/f_generics_clvfittedtransactionsstaticcov.R index b21c84d2..1122274c 100644 --- a/R/f_generics_clvfittedtransactionsstaticcov.R +++ b/R/f_generics_clvfittedtransactionsstaticcov.R @@ -91,7 +91,8 @@ setMethod(f = "clv.controlflow.predict.new.customer", signature = signature(clv. check_err_msg(check_user_data_predict_newcustomer_staticcov(clv.fitted=clv.fitted, clv.newcustomer=clv.newcustomer)) - return(drop(clv.model.predict.new.customer( + # 1+: Include initial order + return(1 + drop(clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, clv.newcustomer=clv.newcustomer))) diff --git a/R/f_interface_newcustomer.R b/R/f_interface_newcustomer.R index f9d05f75..91c6c34d 100644 --- a/R/f_interface_newcustomer.R +++ b/R/f_interface_newcustomer.R @@ -4,11 +4,10 @@ #' @description #' The methods documented here are to be used together with #' \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to obtain -#' the expected number of transactions of an average newly alive customer and +#' the expected number of transactions of an average, yet-to-be acquired customer and #' with \link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to obtain -#' the expected spending of an average newly alive customer. -#' This prediction is only sensible for (fictional) customers without order history: -#' Customers which just came alive and have not had the chance to reveal any more of their behavior. +#' the expected spending of an average yet-to-be acquired customer. +#' See the \code{Method} subsection in Details for more explanations. #' #' The methods described here produce the data required as input to #' \code{predict(newdata=)} to make this new customer prediction. @@ -16,16 +15,20 @@ #' See details for the required format. #' #' \code{newcustomer()}, \code{newcustomer.static()}, \code{newcustomer.dynamic()}: -#' To predict the number of transactions a single, fictional, average new customer is expected to make in -#' the \code{num.periods} periods since making the first transaction ("coming alive"). +#' To predict the number of transactions a single, fictional, average, yet-to-be acquired +#' customer is expected to make in the first \code{num.periods} periods. #' -#' \code{newcustomer.spending()}: To estimate how much a single, fictional, average -#' new customer is expected to spend on average per transaction. +#' \code{newcustomer.spending()}: To estimate how much a single, fictional, average, +#' yet-to-be acquired customer is expected to spend on average per transaction. +#' Note that the spending model should be fit with \code{remove.first.transaction=FALSE} +#' because the spending predictions are also used for the first orders. #' -#' @param num.periods A positive, numeric scalar indicating the number of periods to predict. +#' +#' +#' @param num.periods A positive, numeric scalar indicating the number of periods to predict from the initial transaction. #' @param data.cov.life Numeric-only covariate data for the lifetime process for a single customer, \code{data.table} or \code{data.frame}. See details. #' @param data.cov.trans Numeric-only covariate data for the transaction process for a single customer, \code{data.table} or \code{data.frame}. See details. -#' @param first.transaction For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive") for which a prediction is made. +#' @param first.transaction For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive"). #' Has to be within the time range of the covariate data. #' #' @seealso \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to use the output of the methods described here. @@ -52,14 +55,23 @@ #' additionally required because the exact covariates that are active during the prediction period have #' to be known. #' +#' +#' \subsection{Method}{ +#' These predictions are for average, prospective customers: Yet-to-be acquired +#' customers which still have to place their first order. +#' Therefore, the predicted number of expected orders also includes the initial purchase (1+). +#' The subsequent orders in the first \code{t} periods are then predicted using the unconditional expectation. +#' In case of the Pareto/NBD this is +#' +#' \deqn{1 + E[X(t)]= 1 + \frac{r \beta}{\alpha (s-1)} \left[ 1- \left (\frac{\beta}{\beta+t} \right)^{s-1} \right].} +#' } +#' #' @returns #' \item{newcustomer()}{An object of class \code{clv.newcustomer.no.cov}} #' \item{newcustomer.static()}{An object of class \code{clv.newcustomer.static.cov}} #' \item{newcustomer.dynamic()}{An object of class \code{clv.newcustomer.dynamic.cov}} #' \item{newcustomer.spending()}{An object of class \code{clv.newcustomer.spending}} #' -#' -#' #' @examples #' \donttest{ #' data("apparelTrans") @@ -96,7 +108,9 @@ #' #' #' # Spending model -#' gg.apparel <- gg(clv.data.apparel) +#' # Note: remove.first.transaction=FALSE as the predicted spending will be multiplied +#' # with the total number of orders that also includes the initial purchase +#' gg.apparel <- gg(clv.data.apparel, remove.first.transaction=FALSE) #' predict(gg.apparel, newdata = newcustomer.spending()) #' #' diff --git a/man/newcustomer.Rd b/man/newcustomer.Rd index a156ba82..8b4ab4ee 100644 --- a/man/newcustomer.Rd +++ b/man/newcustomer.Rd @@ -21,13 +21,13 @@ newcustomer.dynamic( newcustomer.spending() } \arguments{ -\item{num.periods}{A positive, numeric scalar indicating the number of periods to predict.} +\item{num.periods}{A positive, numeric scalar indicating the number of periods to predict from the initial transaction.} \item{data.cov.life}{Numeric-only covariate data for the lifetime process for a single customer, \code{data.table} or \code{data.frame}. See details.} \item{data.cov.trans}{Numeric-only covariate data for the transaction process for a single customer, \code{data.table} or \code{data.frame}. See details.} -\item{first.transaction}{For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive") for which a prediction is made. +\item{first.transaction}{For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive"). Has to be within the time range of the covariate data.} } \value{ @@ -39,11 +39,10 @@ Has to be within the time range of the covariate data.} \description{ The methods documented here are to be used together with \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to obtain -the expected number of transactions of an average newly alive customer and +the expected number of transactions of an average, yet-to-be acquired customer and with \link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to obtain -the expected spending of an average newly alive customer. -This prediction is only sensible for (fictional) customers without order history: -Customers which just came alive and have not had the chance to reveal any more of their behavior. +the expected spending of an average yet-to-be acquired customer. +See the \code{Method} subsection in Details for more explanations. The methods described here produce the data required as input to \code{predict(newdata=)} to make this new customer prediction. @@ -51,11 +50,13 @@ This is mostly covariate data for static and dynamic covariate models. See details for the required format. \code{newcustomer()}, \code{newcustomer.static()}, \code{newcustomer.dynamic()}: -To predict the number of transactions a single, fictional, average new customer is expected to make in -the \code{num.periods} periods since making the first transaction ("coming alive"). +To predict the number of transactions a single, fictional, average, yet-to-be acquired +customer is expected to make in the first \code{num.periods} periods. -\code{newcustomer.spending()}: To estimate how much a single, fictional, average -new customer is expected to spend on average per transaction. +\code{newcustomer.spending()}: To estimate how much a single, fictional, average, +yet-to-be acquired customer is expected to spend on average per transaction. +Note that the spending model should be fit with \code{remove.first.transaction=FALSE} +because the spending predictions are also used for the first orders. } \details{ The covariate data has to contain one column for every covariate parameter in the fitted model. Only numeric values are allowed, no factors or characters. @@ -77,6 +78,17 @@ See examples. For models with dynamic covariates, the time point of the first purchase (\code{first.transaction}) is additionally required because the exact covariates that are active during the prediction period have to be known. + + +\subsection{Method}{ +These predictions are for average, prospective customers: Yet-to-be acquired +customers which still have to place their first order. +Therefore, the predicted number of expected orders also includes the initial purchase (1+). +The subsequent orders in the first \code{t} periods are then predicted using the unconditional expectation. +In case of the Pareto/NBD this is + +\deqn{1 + E[X(t)]= 1 + \frac{r \beta}{\alpha (s-1)} \left[ 1- \left (\frac{\beta}{\beta+t} \right)^{s-1} \right].} +} } \examples{ \donttest{ @@ -114,7 +126,9 @@ predict( # Spending model -gg.apparel <- gg(clv.data.apparel) +# Note: remove.first.transaction=FALSE as the predicted spending will be multiplied +# with the total number of orders that also includes the initial purchase +gg.apparel <- gg(clv.data.apparel, remove.first.transaction=FALSE) predict(gg.apparel, newdata = newcustomer.spending()) diff --git a/tests/testthat/helper_s3_fitted_plot.R b/tests/testthat/helper_s3_fitted_plot.R index 936a004b..34a5ff7d 100644 --- a/tests/testthat/helper_s3_fitted_plot.R +++ b/tests/testthat/helper_s3_fitted_plot.R @@ -313,6 +313,6 @@ fct.testthat.runability.clvfittedspending.plot <- function(fitted.spending){ skip_on_cran() expect_silent(res.plot.10 <- plot(fitted.spending, n = 10, verbose=FALSE)) expect_silent(res.plot.20 <- plot(fitted.spending, n = 20, verbose=FALSE)) - expect_false(isTRUE(all.equal(res.plot.10, res.plot.20))) + expect_false(isTRUE(all.equal(res.plot.10@layers, res.plot.20@layers))) }) } diff --git a/tests/testthat/helper_testthat_correctness_transactions.R b/tests/testthat/helper_testthat_correctness_transactions.R index 5138b5d6..83ed2673 100644 --- a/tests/testthat/helper_testthat_correctness_transactions.R +++ b/tests/testthat/helper_testthat_correctness_transactions.R @@ -218,9 +218,9 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.regularization.lambda.0 } fct.testthat.correctness.clvfittedtransactions.nocov.predict.newcustomer.0.for.num.periods.eq.0 <- function(clv.fitted){ - test_that("nocov: predict newcustomer 0 for t=0", { + test_that("nocov: predict newcustomer==1 for t=0", { expect_silent(pred <- predict(clv.fitted, newdata=newcustomer(num.periods = 0))) - expect_true(pred == 0) + expect_true(pred == 1) }) } @@ -284,7 +284,7 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.dif } fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.0.for.num.periods.eq.0 <- function(m.fitted.static){ - test_that("staticcov: predict(newcustomer) 0 for t=0", { + test_that("staticcov: predict(newcustomer)==1 for t=0", { df.cov <- fct.helper.default.newcustomer.covdata.static() expect_silent(pred <- predict( m.fitted.static, @@ -292,7 +292,7 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.0.f num.periods = 0, data.cov.life = df.cov, data.cov.trans = df.cov))) - expect_true(pred == 0) + expect_true(pred == 1) }) } diff --git a/tests/testthat/test_correctness_pnbd_dyncov.R b/tests/testthat/test_correctness_pnbd_dyncov.R index e6c494c6..0e50997f 100644 --- a/tests/testthat/test_correctness_pnbd_dyncov.R +++ b/tests/testthat/test_correctness_pnbd_dyncov.R @@ -273,14 +273,14 @@ fct.testthat.correctness.dyncov.predict.newcustomer <- function(){ p.dyn <- fct.helper.dyncov.quickfit.apparel.data() df.cov <- fct.helper.default.newcustomer.covdata.dyncov() - test_that("dyncov: predict newcustomer 0 for t=0", { + test_that("dyncov: predict newcustomer==1 for t=0", { expect_silent(pred <- predict(p.dyn, newdata=newcustomer.dynamic( num.periods = 0, data.cov.life = df.cov, data.cov.trans = df.cov, first.transaction = "2000-01-04" ))) - expect_equal(pred, 0) + expect_equal(pred, 1) }) test_that("dyncov predict newcustomer different results for different covs", {