From cfe5d131cf811968118a88ac9d54f6dd7ac3c93f Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Fri, 11 Nov 2022 12:57:44 -0500 Subject: [PATCH 01/21] update service_account.json for access to bigquery --- service_account.json | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/service_account.json b/service_account.json index 8b13789..b471968 100644 --- a/service_account.json +++ b/service_account.json @@ -1 +1,12 @@ - +{ + "type": "service_account", + "project_id": "wise-weaver-282922", + "private_key_id": "9d46c694ba5d9a9b0e3568dbeef37e41236190e1", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDPp92OgQkcgcdo\nrRoMywbPF9omFAyGCS6IqzEeN+ENrKNMXLCb7LzPo/mxTqYUO8rqzVAUDUkmDaRX\nR6iD2bqS2VQdeXidxLjEq+xSvUcm3edVfz628a/0jrJNODUWQ0BTV9oDk3i9gI0/\npvFzPsMGxZA51SzrM+7k1jSvkfAVSEA7lsbYi6yiuMRDfgxmMRCii1/abYBlAL5q\nYYpmrCzevPgZjmG+TytHDkriQaZyMZ8DAkbE73Ywh6JCupwiGYO3SdGFn3/BCKmG\npj35+PEhFIKsQcGVgN+jd+1qz/znktkm6t4saZm1PQSagyRyLGHZgxFnPyRGRfOW\nUyQvPhONAgMBAAECgf98GtdPU88eOnHzHYF0I+Lt4lN+bn0Nnm8xPx8b8EmQ4py2\nBq+GgPZXIqkYTujzLskXi51QXqBZHc4aW7lbE8Vt/nVHKFHt+KR05ktXJ8heckDP\n2DyjGc8PWjk17WHM7GSFavvhJ0IB/9JrOFsVhW0JnwmThtRRvbgJC/xw8ceK74+M\nB5IKp/0lMJwUgPUm6sRnxHlfoYW4kLt/Z8454lItJi6Dqniy0Ou7qBei6ZRMCHSq\nsCJKhQvbkyCJYUaKsum2rOF4PO+cz4Gs7r7m03Ya/cY4aTt6twl0hgQl+alfza7Z\nE1DSyTgJXl1X8nv/9Q2byOgxv2OdVGYOWO19AGECgYEA8cNXwCSfvpxXtZ7ZrePc\nlj6ZB/ZlNnIuCE4U7pKtzkhq9PDn5WSWsYWyudRp/J2pJOS3gsu67T3kxb1uf3OT\nJOhbxBLiM3S/RsJDLv3aAi6O3jhyzA9YPEkAeyPJIBdJWJCABo1yMKKZ28PnRyZz\nqi1C8ueYvZ9ObuE70WNWd7kCgYEA2+JX7XxTngsZhP0hy9fffTKHdcW9Kz34b7sF\n06kCeUHfoq0WnTRt0YQn/VJvkGG7Fjp+9+CvyZI5eZ2MAgqYA8PoCdvz1cApZUza\n0wbgauEIWnow66sOrRue0IafcSTXoPYxrfXNgne4PWX7hmC7+aeAguz8bmmHZEVp\nVmr6PHUCgYEA60CGdvYsa7J+tsCoWUlNMPYgu1rMCjr5CCPSwoQAlKZrTYXOYa2L\n5ZqP27wSljpkiP32hFPyxUBhCPnYODUijdt80EL/0cpdBmGqNmPWbCEZ7KtJKcAU\nwDTZPvoHkC6YtJGHwtiIsULILM1olBeanZIFf5BVVM9b5xFMv+fGECECgYBQeqi6\nGH42Y1Ok1E1xYYLAPSVNWuGhGR58LpS0YrCwFwc2cGnNgioLvHYyhuCklNC4ExLz\nwP42Jdn5hEuBV8VJ44NjUGkPvDSqihWxlVvAYrr++qg7KbHGdEyNgCiNbfGqTVHj\nnQrqDM2XU2NbTL6gYslrojfGZcOTVMB4S9FpHQKBgQC2vBJcrAmAy10nB91BVgOd\nQRE6Evc8QPrjFsNoUoD/8yVEMVCcdf8BkycpPGtvVkBrf6KRIEgMAOUrEq0vmteA\nN+4IYDqBI+blIwnpmRrekt/Z2X5Pb3nK9XOvWiBqr32UFtL++V5xJfE35zUgroBe\nUk/bnJoEKKQFg6VP7IQPIA==\n-----END PRIVATE KEY-----\n", + "client_email": "test-project@wise-weaver-282922.iam.gserviceaccount.com", + "client_id": "102252416935570843819", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-project%40wise-weaver-282922.iam.gserviceaccount.com" +} From fc01331fbdfab9a10f328704916e70be459d9f1b Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Mon, 14 Nov 2022 21:53:51 -0500 Subject: [PATCH 02/21] create first pass base table --- models/base/_source.yml | 9 ++++++++ .../base__acme1_recharge_subscriptions.sql | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 models/base/_source.yml create mode 100644 models/base/base__acme1_recharge_subscriptions.sql diff --git a/models/base/_source.yml b/models/base/_source.yml new file mode 100644 index 0000000..853156c --- /dev/null +++ b/models/base/_source.yml @@ -0,0 +1,9 @@ +version: 2 + +sources: + - name: raw_data_sandbox + database: wise-weaver-282922 + schema: raw_data_sandbox + + tables: + - name: acme1_recharge_subscriptions diff --git a/models/base/base__acme1_recharge_subscriptions.sql b/models/base/base__acme1_recharge_subscriptions.sql new file mode 100644 index 0000000..d24fe88 --- /dev/null +++ b/models/base/base__acme1_recharge_subscriptions.sql @@ -0,0 +1,21 @@ +-- the intent of this base model is to logically organize the columns in the source +-- table for readability and do some light cleaning + re-aliasing where necessary. + +select + id as recharge_subscription_id, + customer_id, + + -- subscription product details + shopify_product_id, + recharge_subscription_id, + shopify_variant_id, + sku, + product_title, + price, + + -- timestamps + created_at, + updated_at, + cancelled_at + +from {{ source('raw_data_sandbox', 'acme1_recharge_subscriptions') }} From 71135bfe6b1583a08f07d86f5f3e05c6ca49b3e3 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Mon, 14 Nov 2022 22:53:29 -0500 Subject: [PATCH 03/21] clean up base model, remove test.sql --- models/base/base__acme1_recharge_subscriptions.sql | 8 ++++++-- models/test.sql | 2 -- 2 files changed, 6 insertions(+), 4 deletions(-) delete mode 100644 models/test.sql diff --git a/models/base/base__acme1_recharge_subscriptions.sql b/models/base/base__acme1_recharge_subscriptions.sql index d24fe88..7be4e7e 100644 --- a/models/base/base__acme1_recharge_subscriptions.sql +++ b/models/base/base__acme1_recharge_subscriptions.sql @@ -2,17 +2,21 @@ -- table for readability and do some light cleaning + re-aliasing where necessary. select - id as recharge_subscription_id, + id, customer_id, -- subscription product details shopify_product_id, - recharge_subscription_id, + recharge_product_id, shopify_variant_id, sku, product_title, price, + -- status and other info + status, + lower(cancellation_reason) as cancellation_reason, + -- timestamps created_at, updated_at, diff --git a/models/test.sql b/models/test.sql deleted file mode 100644 index aee96f8..0000000 --- a/models/test.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * -from `wise-weaver-282922.raw_data_sandbox.acme1_recharge_subscriptions` From 550bf3aeac1e68027d0ad2fe6096a44f320239aa Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 11:35:38 -0500 Subject: [PATCH 04/21] rename base model --- .../base__acme1_recharge_subscriptions.sql | 25 ----------- models/base/base__recharge_subscriptions.sql | 42 +++++++++++++++++++ 2 files changed, 42 insertions(+), 25 deletions(-) delete mode 100644 models/base/base__acme1_recharge_subscriptions.sql create mode 100644 models/base/base__recharge_subscriptions.sql diff --git a/models/base/base__acme1_recharge_subscriptions.sql b/models/base/base__acme1_recharge_subscriptions.sql deleted file mode 100644 index 7be4e7e..0000000 --- a/models/base/base__acme1_recharge_subscriptions.sql +++ /dev/null @@ -1,25 +0,0 @@ --- the intent of this base model is to logically organize the columns in the source --- table for readability and do some light cleaning + re-aliasing where necessary. - -select - id, - customer_id, - - -- subscription product details - shopify_product_id, - recharge_product_id, - shopify_variant_id, - sku, - product_title, - price, - - -- status and other info - status, - lower(cancellation_reason) as cancellation_reason, - - -- timestamps - created_at, - updated_at, - cancelled_at - -from {{ source('raw_data_sandbox', 'acme1_recharge_subscriptions') }} diff --git a/models/base/base__recharge_subscriptions.sql b/models/base/base__recharge_subscriptions.sql new file mode 100644 index 0000000..488d04b --- /dev/null +++ b/models/base/base__recharge_subscriptions.sql @@ -0,0 +1,42 @@ +-- intent of this base model: +---- logically organize the columns in the source table for readability +---- light cleaning + re-aliasing where necessary +---- derive fields that will be helpful downstream + +select + id as subscription_id, + customer_id, + + -- subscription product details + shopify_product_id, + recharge_product_id, + shopify_variant_id, + sku, + product_title, + price, + + -- status and other info + status, + lower(cancellation_reason) as cancellation_reason, + + -- derived + + -- business definition for cancelled subscriptions does not count instances + -- where cancellation reason is due to max number of charge attempts + case + when status = 'CANCELLED' + and lower(cancellation_reason) not like '%max number of charge attempts%' + then true + else false + end as was_cancelled, + + -- timestamps + created_at, + cancelled_at, + updated_at, + + -- dates + date(created_at) as created_date, + date(cancelled_at) as cancelled_date + +from {{ source('raw_data_sandbox', 'acme1_recharge_subscriptions') }} From 389b2335231c1b4bf29308204447080557ec1255 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 12:00:33 -0500 Subject: [PATCH 05/21] establish final model and include easy metrics (subscriptions new and cancelled) --- models/final/subscriptions_daily_metrics.sql | 17 +++++++++++++++++ models/intermediate/_intermediate.yml | 18 ++++++++++++++++++ .../intermediate/subscriptions_cancelled.sql | 7 +++++++ models/intermediate/subscriptions_new.sql | 6 ++++++ 4 files changed, 48 insertions(+) create mode 100644 models/final/subscriptions_daily_metrics.sql create mode 100644 models/intermediate/_intermediate.yml create mode 100644 models/intermediate/subscriptions_cancelled.sql create mode 100644 models/intermediate/subscriptions_new.sql diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql new file mode 100644 index 0000000..b576947 --- /dev/null +++ b/models/final/subscriptions_daily_metrics.sql @@ -0,0 +1,17 @@ +with joined as ( + + select + coalesce( + subscriptions_new.date, + subscriptions_cancelled.date + ) as date, + + subscriptions_new.subscriptions_new, + subscriptions_cancelled.subscriptions_cancelled + + from {{ ref('subscriptions_new') }} + full outer join {{ ref('subscriptions_cancelled') }} + on subscriptions_new.date = subscriptions_cancelled.date +) + +select * from joined diff --git a/models/intermediate/_intermediate.yml b/models/intermediate/_intermediate.yml new file mode 100644 index 0000000..88215e1 --- /dev/null +++ b/models/intermediate/_intermediate.yml @@ -0,0 +1,18 @@ +version: 2 + +models: + - name: subscriptions_new + + columns: + - name: date + tests: + - unique + - not_null + + - name: subscriptions_cancelled + + columns: + - name: date + tests: + - unique + - not_null diff --git a/models/intermediate/subscriptions_cancelled.sql b/models/intermediate/subscriptions_cancelled.sql new file mode 100644 index 0000000..4b3382b --- /dev/null +++ b/models/intermediate/subscriptions_cancelled.sql @@ -0,0 +1,7 @@ +select + cancelled_date as date, + count(*) as subscriptions_cancelled + +from {{ ref('base__recharge_subscriptions') }} +where was_cancelled = true +group by 1 diff --git a/models/intermediate/subscriptions_new.sql b/models/intermediate/subscriptions_new.sql new file mode 100644 index 0000000..928e780 --- /dev/null +++ b/models/intermediate/subscriptions_new.sql @@ -0,0 +1,6 @@ +select + created_date as date, + count(*) as subscriptions_new + +from {{ ref('base__recharge_subscriptions') }} +group by 1 From 68257a5038993f92c51bcc289aa9d98f21d3bded Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 12:13:12 -0500 Subject: [PATCH 06/21] add row_number to base model to derive subscribers_new --- models/base/base__recharge_subscriptions.sql | 3 +++ models/final/subscriptions_daily_metrics.sql | 27 ++++++++++---------- models/intermediate/subscribers_new.sql | 7 +++++ 3 files changed, 23 insertions(+), 14 deletions(-) create mode 100644 models/intermediate/subscribers_new.sql diff --git a/models/base/base__recharge_subscriptions.sql b/models/base/base__recharge_subscriptions.sql index 488d04b..7e41547 100644 --- a/models/base/base__recharge_subscriptions.sql +++ b/models/base/base__recharge_subscriptions.sql @@ -30,6 +30,9 @@ select else false end as was_cancelled, + -- window functions + row_number() over (partition by customer_id order by created_at asc) as customer_subscription_number, + -- timestamps created_at, cancelled_at, diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql index b576947..20ac6c9 100644 --- a/models/final/subscriptions_daily_metrics.sql +++ b/models/final/subscriptions_daily_metrics.sql @@ -1,17 +1,16 @@ -with joined as ( +select + coalesce( + subscriptions_new.date, + subscriptions_cancelled.date + ) as date, - select - coalesce( - subscriptions_new.date, - subscriptions_cancelled.date - ) as date, + subscriptions_new.subscriptions_new, + subscriptions_cancelled.subscriptions_cancelled, - subscriptions_new.subscriptions_new, - subscriptions_cancelled.subscriptions_cancelled + subscribers_new.subscribers_new - from {{ ref('subscriptions_new') }} - full outer join {{ ref('subscriptions_cancelled') }} - on subscriptions_new.date = subscriptions_cancelled.date -) - -select * from joined +from {{ ref('subscriptions_new') }} +full outer join {{ ref('subscriptions_cancelled') }} + on subscriptions_new.date = subscriptions_cancelled.date +left join {{ ref('subscribers_new') }} + on subscriptions_new.date = subscribers_new.date diff --git a/models/intermediate/subscribers_new.sql b/models/intermediate/subscribers_new.sql new file mode 100644 index 0000000..2e99827 --- /dev/null +++ b/models/intermediate/subscribers_new.sql @@ -0,0 +1,7 @@ +select + created_date as date, + count(*) as subscribers_new + +from {{ ref('base__recharge_subscriptions') }} +where customer_subscription_number = 1 +group by 1 From a8187a6a133ea1501efefb4d95b3866ad907b1ca Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 13:07:38 -0500 Subject: [PATCH 07/21] first pass subscription date spine model --- models/base/date_spine.sql | 8 +++++++ models/intermediate/subscriptions_days.sql | 25 ++++++++++++++++++++++ packages.yml | 3 +++ 3 files changed, 36 insertions(+) create mode 100644 models/base/date_spine.sql create mode 100644 models/intermediate/subscriptions_days.sql create mode 100644 packages.yml diff --git a/models/base/date_spine.sql b/models/base/date_spine.sql new file mode 100644 index 0000000..0107504 --- /dev/null +++ b/models/base/date_spine.sql @@ -0,0 +1,8 @@ +-- To Do: is there a way to make these variables that accept the min and max of the +-- created dates from the subscriptions table? +{{ dbt_utils.date_spine( + datepart="day", + start_date="'2019-01-01'", + end_date="current_date" + ) +}} diff --git a/models/intermediate/subscriptions_days.sql b/models/intermediate/subscriptions_days.sql new file mode 100644 index 0000000..f047ebb --- /dev/null +++ b/models/intermediate/subscriptions_days.sql @@ -0,0 +1,25 @@ +-- intent of this model: +---- fan out subscriptions by days between created date and cancelled date to identify +---- all the days where this subscription was active and contributing to the overall +---- active subscriptions count. + +select + date_spine.date_day, + subscriptions.subscription_id, + subscriptions.customer_id, + + subscriptions.created_date, + subscriptions.cancelled_date, + + -- identify the days that the subscription was active + case + when subscriptions.cancelled_date is null then true + when date_day < subscriptions.cancelled_date then true + when date_day = subscriptions.cancelled_date then false + end as is_active + +from {{ ref('date_spine') }} +left join {{ ref('base__recharge_subscriptions') }} as subscriptions + on date_spine.date_day >= subscriptions.created_date + -- not ideal to hard code a date in the future, but placeholding to make this work before refactoring + and date_spine.date_day <= coalesce(subscriptions.cancelled_date, '2099-01-01') diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..c681086 --- /dev/null +++ b/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 0.9.2 From a087b0de8271fa447c9e5ed3498f444ebd72e691 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 16:48:42 -0500 Subject: [PATCH 08/21] add tests to base models --- models/base/_base.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 models/base/_base.yml diff --git a/models/base/_base.yml b/models/base/_base.yml new file mode 100644 index 0000000..8d648cd --- /dev/null +++ b/models/base/_base.yml @@ -0,0 +1,21 @@ +version: 2 + +models: + - name: base__recharge_subscriptions + columns: + - name: subscription_id + description: > + the primary key id of the subscription source table. Re-aliased for clarity downstream. + tests: + - unique + - not_null + + - name: date_spine + description: > + a utility table built using dbt_utils. this can be used downstream to fan out other tables + to build a picture of daily activity from timestamp/date fields. + columns: + - name: date_day + tests: + - unique + - not_null From dfa398df690f57fa26ed177df3029dabb182af1d Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 17:22:51 -0500 Subject: [PATCH 09/21] edit subscriptions date spine model and create active subs models --- models/final/subscriptions_daily_metrics.sql | 8 +++++- models/intermediate/subscribers_active.sql | 9 +++++++ models/intermediate/subscriptions_active.sql | 6 +++++ models/intermediate/subscriptions_days.sql | 26 +++++++++++--------- 4 files changed, 36 insertions(+), 13 deletions(-) create mode 100644 models/intermediate/subscribers_active.sql create mode 100644 models/intermediate/subscriptions_active.sql diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql index 20ac6c9..6dd00a7 100644 --- a/models/final/subscriptions_daily_metrics.sql +++ b/models/final/subscriptions_daily_metrics.sql @@ -6,11 +6,17 @@ select subscriptions_new.subscriptions_new, subscriptions_cancelled.subscriptions_cancelled, + subscriptions_active.subscriptions_active, - subscribers_new.subscribers_new + subscribers_new.subscribers_new, + subscribers_active.subscribers_active from {{ ref('subscriptions_new') }} full outer join {{ ref('subscriptions_cancelled') }} on subscriptions_new.date = subscriptions_cancelled.date +left join {{ ref('subscriptions_active') }} + on subscriptions_new.date = subscriptions_active.date left join {{ ref('subscribers_new') }} on subscriptions_new.date = subscribers_new.date +full outer join {{ ref('subscribers_active') }} + on subscriptions_new.date = subscribers_active.date diff --git a/models/intermediate/subscribers_active.sql b/models/intermediate/subscribers_active.sql new file mode 100644 index 0000000..481ec43 --- /dev/null +++ b/models/intermediate/subscribers_active.sql @@ -0,0 +1,9 @@ +-- this model takes the date-spined subscriptions model and aggregates to the date grain. +-- from there we calculate the number of subscriptions and subscribers that were considered that +-- day. +select + date, + count(distinct customer_id) as subscribers_active + +from {{ ref('subscriptions_days') }} +group by 1 diff --git a/models/intermediate/subscriptions_active.sql b/models/intermediate/subscriptions_active.sql new file mode 100644 index 0000000..30b5ca7 --- /dev/null +++ b/models/intermediate/subscriptions_active.sql @@ -0,0 +1,6 @@ +select + date, + count(*) as subscriptions_active + +from {{ ref('subscriptions_days') }} +group by 1 diff --git a/models/intermediate/subscriptions_days.sql b/models/intermediate/subscriptions_days.sql index f047ebb..96c1897 100644 --- a/models/intermediate/subscriptions_days.sql +++ b/models/intermediate/subscriptions_days.sql @@ -1,25 +1,27 @@ --- intent of this model: ----- fan out subscriptions by days between created date and cancelled date to identify ----- all the days where this subscription was active and contributing to the overall ----- active subscriptions count. +-- this model fans out subscriptions by days between created date and cancelled date to identify +-- all the days where this subscription was active and contributing to the overall active +-- subscriptions count. downstream we can aggregate and filter this in multiple ways to calculate +-- different daily metrics. select - date_spine.date_day, + date_spine.date_day as date, subscriptions.subscription_id, subscriptions.customer_id, subscriptions.created_date, subscriptions.cancelled_date, - -- identify the days that the subscription was active + -- identify the days that the subscription was active. case when subscriptions.cancelled_date is null then true when date_day < subscriptions.cancelled_date then true when date_day = subscriptions.cancelled_date then false - end as is_active + end as is_subscription_active -from {{ ref('date_spine') }} -left join {{ ref('base__recharge_subscriptions') }} as subscriptions - on date_spine.date_day >= subscriptions.created_date - -- not ideal to hard code a date in the future, but placeholding to make this work before refactoring - and date_spine.date_day <= coalesce(subscriptions.cancelled_date, '2099-01-01') +from {{ ref('base__recharge_subscriptions') }} as subscriptions +left join {{ ref('date_spine') }} + on subscriptions.created_date <= date_spine.date_day + and ( + subscriptions.cancelled_date >= date_spine.date_day + or subscriptions.cancelled_date is null + ) From be1f6ee50aeee849492379ffe270b294594f8106 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 17:25:28 -0500 Subject: [PATCH 10/21] quick logic fix --- models/intermediate/subscriptions_active.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/models/intermediate/subscriptions_active.sql b/models/intermediate/subscriptions_active.sql index 30b5ca7..d04eb12 100644 --- a/models/intermediate/subscriptions_active.sql +++ b/models/intermediate/subscriptions_active.sql @@ -3,4 +3,5 @@ select count(*) as subscriptions_active from {{ ref('subscriptions_days') }} +where is_active = true group by 1 From 375b235b7f6ead9da979314d579aa7a93b12260f Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 18:34:24 -0500 Subject: [PATCH 11/21] rearrange final model to all join to date spine, add coalesces --- models/base/date_spine.sql | 6 ++-- models/final/subscriptions_daily_metrics.sql | 34 ++++++++++--------- .../intermediate/subscriptions_returning.sql | 20 +++++++++++ 3 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 models/intermediate/subscriptions_returning.sql diff --git a/models/base/date_spine.sql b/models/base/date_spine.sql index 0107504..74f16f0 100644 --- a/models/base/date_spine.sql +++ b/models/base/date_spine.sql @@ -1,8 +1,8 @@ --- To Do: is there a way to make these variables that accept the min and max of the --- created dates from the subscriptions table? +-- To Do: look into using Jinja to set these parameters using a SQL statement in a macro +-- https://stackoverflow.com/questions/64007239/hi-how-do-we-define-select-statement-as-a-variable-in-dbt {{ dbt_utils.date_spine( datepart="day", start_date="'2019-01-01'", - end_date="current_date" + end_date="'2022-04-08'" ) }} diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql index 6dd00a7..f962698 100644 --- a/models/final/subscriptions_daily_metrics.sql +++ b/models/final/subscriptions_daily_metrics.sql @@ -1,22 +1,24 @@ select - coalesce( - subscriptions_new.date, - subscriptions_cancelled.date - ) as date, + date_spine.date_day as date, - subscriptions_new.subscriptions_new, - subscriptions_cancelled.subscriptions_cancelled, - subscriptions_active.subscriptions_active, + coalesce(subscriptions_new.subscriptions_new) as subscriptions_new, + coalesce(subscriptions_returning.subscriptions_returning) as subscriptions_returning, + coalesce(subscriptions_cancelled.subscriptions_cancelled) as subscriptions_cancelled, + coalesce(subscriptions_active.subscriptions_active) as subscriptions_active, - subscribers_new.subscribers_new, - subscribers_active.subscribers_active + coalesce(subscribers_new.subscribers_new) as subscribers_new, + coalesce(subscribers_active.subscribers_active) as subscribers_active -from {{ ref('subscriptions_new') }} -full outer join {{ ref('subscriptions_cancelled') }} - on subscriptions_new.date = subscriptions_cancelled.date +from {{ ref('date_spine') }} +left join {{ ref('subscriptions_new') }} + on date_spine.date_day = subscriptions_new.date +left join {{ ref('subscriptions_returning') }} + on date_spine.date_day = subscriptions_returning.date +left join {{ ref('subscriptions_cancelled') }} + on date_spine.date_day = subscriptions_cancelled.date left join {{ ref('subscriptions_active') }} - on subscriptions_new.date = subscriptions_active.date + on date_spine.date_day = subscriptions_active.date left join {{ ref('subscribers_new') }} - on subscriptions_new.date = subscribers_new.date -full outer join {{ ref('subscribers_active') }} - on subscriptions_new.date = subscribers_active.date + on date_spine.date_day = subscribers_new.date +left join {{ ref('subscribers_active') }} + on date_spine.date_day = subscribers_active.date diff --git a/models/intermediate/subscriptions_returning.sql b/models/intermediate/subscriptions_returning.sql new file mode 100644 index 0000000..0033289 --- /dev/null +++ b/models/intermediate/subscriptions_returning.sql @@ -0,0 +1,20 @@ +with base_lagged as ( + + select + *, + lag(was_cancelled) over ( + + partition by customer_id, recharge_product_id + order by created_at + ) as was_previous_subscription_cancelled + + from {{ ref('base__recharge_subscriptions') }} +) + +select + date(created_at) as date, + count(*) as subscriptions_returning + +from base_lagged +where was_previous_subscription_cancelled = true +group by 1 From c5356f08fa435ff6f088ad1f8ec1dd1b891905e7 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 18:34:38 -0500 Subject: [PATCH 12/21] fix typo --- models/intermediate/subscriptions_active.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/intermediate/subscriptions_active.sql b/models/intermediate/subscriptions_active.sql index d04eb12..d8b924d 100644 --- a/models/intermediate/subscriptions_active.sql +++ b/models/intermediate/subscriptions_active.sql @@ -3,5 +3,5 @@ select count(*) as subscriptions_active from {{ ref('subscriptions_days') }} -where is_active = true +where is_subscription_active = true group by 1 From f02313dd4695bca45683aceabf75af1bdcf11b52 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 18:34:51 -0500 Subject: [PATCH 13/21] add final model PK test --- models/final/_final.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 models/final/_final.yml diff --git a/models/final/_final.yml b/models/final/_final.yml new file mode 100644 index 0000000..65b9547 --- /dev/null +++ b/models/final/_final.yml @@ -0,0 +1,10 @@ +version: 2 + +models: + - name: subscriptions_daily_metrics + + columns: + - name: date + tests: + - unique + - not_null From ecadcca5de13dbfee030650b95c012d183d7ef6a Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 19:59:46 -0500 Subject: [PATCH 14/21] filter out bad records where status is cancelled but not cancelled at --- models/base/base__recharge_subscriptions.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/models/base/base__recharge_subscriptions.sql b/models/base/base__recharge_subscriptions.sql index 7e41547..bc4dca7 100644 --- a/models/base/base__recharge_subscriptions.sql +++ b/models/base/base__recharge_subscriptions.sql @@ -2,6 +2,8 @@ ---- logically organize the columns in the source table for readability ---- light cleaning + re-aliasing where necessary ---- derive fields that will be helpful downstream +---- filter out records that should not be included, e.g. subscriptions that are +------ status = 'CANCELLED' but don't have a cancelled_at timestamp. select id as subscription_id, @@ -43,3 +45,6 @@ select date(cancelled_at) as cancelled_date from {{ source('raw_data_sandbox', 'acme1_recharge_subscriptions') }} + +-- filtering out these records allows for final daily metrics to match more closely. +where not (status = 'CANCELLED' and cancelled_at is null) From 402c4a94f904912f62f477d8856adba934da7032 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 22:05:02 -0500 Subject: [PATCH 15/21] losing the plot, need to capture work now before changing more --- models/final/subscriptions_daily_metrics.sql | 9 ++- models/intermediate/subscribers_churned.sql | 76 +++++++++++++++++++ .../intermediate/subscriptions_cancelled.sql | 35 +++++++-- models/intermediate/subscriptions_churned.sql | 0 4 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 models/intermediate/subscribers_churned.sql create mode 100644 models/intermediate/subscriptions_churned.sql diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql index f962698..ade8574 100644 --- a/models/final/subscriptions_daily_metrics.sql +++ b/models/final/subscriptions_daily_metrics.sql @@ -1,10 +1,11 @@ select date_spine.date_day as date, - coalesce(subscriptions_new.subscriptions_new) as subscriptions_new, - coalesce(subscriptions_returning.subscriptions_returning) as subscriptions_returning, - coalesce(subscriptions_cancelled.subscriptions_cancelled) as subscriptions_cancelled, - coalesce(subscriptions_active.subscriptions_active) as subscriptions_active, + coalesce(subscriptions_new.subscriptions_new,0) as subscriptions_new, + coalesce(subscriptions_returning.subscriptions_returning,0) as subscriptions_returning, + coalesce(subscriptions_cancelled.subscriptions_cancelled,0) as subscriptions_cancelled, + coalesce(subscriptions_active.subscriptions_active,0) as subscriptions_active, + coalesce(subscriptions_cancelled.subscriptions_churned,0) as subscriptions_churned, coalesce(subscribers_new.subscribers_new) as subscribers_new, coalesce(subscribers_active.subscribers_active) as subscribers_active diff --git a/models/intermediate/subscribers_churned.sql b/models/intermediate/subscribers_churned.sql new file mode 100644 index 0000000..35710f0 --- /dev/null +++ b/models/intermediate/subscribers_churned.sql @@ -0,0 +1,76 @@ +-- first step, get a model that is as the customer*day grain for every day they had +-- at least one active subscription. + +with customers_days as ( + + select + date, + customer_id, + max(cancelled_date) as cancelled_date + + from {{ ref('subscriptions_days') }} + where is_subscription_active = false + group by 1,2 +), + +customers_days_with_lead as ( + -- use lead() to find the next date that this model shows the customer has an + -- active subscription. + select + *, + date_diff( + lead(date) over ( + partition by customer_id + order by date asc + ), + date, + day + ) as days_to_next_customer_subscription_day + + from customers_days +), + +customers_days_next_day_churn as ( + -- if the customer's next subscription date is more than 1 day beyond the given date + -- or if the customer doesn't have a next active subscription date, then the subscriber + -- will churn the following day. + select + *, + + case + when days_to_next_customer_subscription_day > 1 then true + when days_to_next_customer_subscription_day is null then true + else false + end as is_churn_next_day + + from customers_days_with_lead +), + +customers_days_churn_date_added as ( + -- based on the churn logic in the prompt, the churn date is the first day + -- that a subscriber does not have an active subscription, so we derive that + -- here based on the date in the given row where is_churn_next_day = true. + select + *, + + case + when is_churn_next_day + then date_add(date, interval 1 day) + else null + end as churned_date + + from customers_days_next_day_churn +), + +subscriber_cancellations as ( + + select + date, + count(*) as subscribers_cancelled + + from customers_days_churn_date_added + where is_churn_next_day + group by 1 +) + +select * from subscriber_cancellations diff --git a/models/intermediate/subscriptions_cancelled.sql b/models/intermediate/subscriptions_cancelled.sql index 4b3382b..5a8535d 100644 --- a/models/intermediate/subscriptions_cancelled.sql +++ b/models/intermediate/subscriptions_cancelled.sql @@ -1,7 +1,32 @@ +-- this model gets the daily subscriptions cancelled count, then date spines this +-- data to fill in any cases where there were no cancellations in a given day, +-- then uses a sum window function to get the running total of how many subscriptions +-- have been cancelled up to a given date. + +with subscriptions_cancelled as ( + select + cancelled_date as date, + count(*) as subscriptions_cancelled + + from {{ ref('base__recharge_subscriptions') }} + where was_cancelled = true + group by 1 +), + +subscriptions_cancelled_date_spine as ( + -- spine allows for days with no cancellations to be coalesced to 0 and therefore + -- incorporated into the sum() over () window function following this cte + select + subscriptions_cancelled.date, + coalesce(subscriptions_cancelled.subscriptions_cancelled,0) as subscriptions_cancelled + + from subscriptions_cancelled + left join {{ ref('date_spine') }} + on subscriptions_cancelled.date = date_spine.date_day +) + select - cancelled_date as date, - count(*) as subscriptions_cancelled + *, + sum(subscriptions_cancelled) over (order by date asc) as subscriptions_churned -from {{ ref('base__recharge_subscriptions') }} -where was_cancelled = true -group by 1 +from subscriptions_cancelled_date_spine diff --git a/models/intermediate/subscriptions_churned.sql b/models/intermediate/subscriptions_churned.sql new file mode 100644 index 0000000..e69de29 From 9c6df213ee50631645ec1ca5829fb019253c50b8 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 22:11:01 -0500 Subject: [PATCH 16/21] rename model to cancelled --- .../{subscribers_churned.sql => subscribers_cancelled.sql} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename models/intermediate/{subscribers_churned.sql => subscribers_cancelled.sql} (95%) diff --git a/models/intermediate/subscribers_churned.sql b/models/intermediate/subscribers_cancelled.sql similarity index 95% rename from models/intermediate/subscribers_churned.sql rename to models/intermediate/subscribers_cancelled.sql index 35710f0..b396e46 100644 --- a/models/intermediate/subscribers_churned.sql +++ b/models/intermediate/subscribers_cancelled.sql @@ -6,10 +6,10 @@ with customers_days as ( select date, customer_id, + count(subscription_id) as customer_subscriptions_count, max(cancelled_date) as cancelled_date from {{ ref('subscriptions_days') }} - where is_subscription_active = false group by 1,2 ), @@ -65,7 +65,7 @@ customers_days_churn_date_added as ( subscriber_cancellations as ( select - date, + churned_date as date, count(*) as subscribers_cancelled from customers_days_churn_date_added From b51c263e33a84b357c678b6628b7e27bd94cd19a Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 22:30:08 -0500 Subject: [PATCH 17/21] reorganize models, clean up dependencies --- models/final/subscriptions_daily_metrics.sql | 3 +++ .../subscribers_active.sql | 0 .../subscribers_cancelled.sql | 20 ++++--------------- .../subscriptions_active.sql | 0 .../subscribers_days.sql} | 0 .../subscriptions_days.sql | 0 models/intermediate/subscribers_churned.sql | 0 7 files changed, 7 insertions(+), 16 deletions(-) rename models/intermediate/{ => date_spine_derivatives}/subscribers_active.sql (100%) rename models/intermediate/{ => date_spine_derivatives}/subscribers_cancelled.sql (79%) rename models/intermediate/{ => date_spine_derivatives}/subscriptions_active.sql (100%) rename models/intermediate/{subscriptions_churned.sql => date_spined_fanouts/subscribers_days.sql} (100%) rename models/intermediate/{ => date_spined_fanouts}/subscriptions_days.sql (100%) create mode 100644 models/intermediate/subscribers_churned.sql diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql index ade8574..94e58e6 100644 --- a/models/final/subscriptions_daily_metrics.sql +++ b/models/final/subscriptions_daily_metrics.sql @@ -8,6 +8,7 @@ select coalesce(subscriptions_cancelled.subscriptions_churned,0) as subscriptions_churned, coalesce(subscribers_new.subscribers_new) as subscribers_new, + coalesce(subscribers_cancelled.subscribers_cancelled) as subscribers_cancelled, coalesce(subscribers_active.subscribers_active) as subscribers_active from {{ ref('date_spine') }} @@ -21,5 +22,7 @@ left join {{ ref('subscriptions_active') }} on date_spine.date_day = subscriptions_active.date left join {{ ref('subscribers_new') }} on date_spine.date_day = subscribers_new.date +left join {{ ref('subscribers_cancelled') }} + on date_spine.date_day = subscribers_cancelled.date left join {{ ref('subscribers_active') }} on date_spine.date_day = subscribers_active.date diff --git a/models/intermediate/subscribers_active.sql b/models/intermediate/date_spine_derivatives/subscribers_active.sql similarity index 100% rename from models/intermediate/subscribers_active.sql rename to models/intermediate/date_spine_derivatives/subscribers_active.sql diff --git a/models/intermediate/subscribers_cancelled.sql b/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql similarity index 79% rename from models/intermediate/subscribers_cancelled.sql rename to models/intermediate/date_spine_derivatives/subscribers_cancelled.sql index b396e46..933a632 100644 --- a/models/intermediate/subscribers_cancelled.sql +++ b/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql @@ -1,19 +1,7 @@ --- first step, get a model that is as the customer*day grain for every day they had --- at least one active subscription. +-- build off the customer * day grain model with cascading cte's to self derive +-- churn events. -with customers_days as ( - - select - date, - customer_id, - count(subscription_id) as customer_subscriptions_count, - max(cancelled_date) as cancelled_date - - from {{ ref('subscriptions_days') }} - group by 1,2 -), - -customers_days_with_lead as ( +with customers_days_with_lead as ( -- use lead() to find the next date that this model shows the customer has an -- active subscription. select @@ -27,7 +15,7 @@ customers_days_with_lead as ( day ) as days_to_next_customer_subscription_day - from customers_days + from {{ ref('subscribers_days') }} ), customers_days_next_day_churn as ( diff --git a/models/intermediate/subscriptions_active.sql b/models/intermediate/date_spine_derivatives/subscriptions_active.sql similarity index 100% rename from models/intermediate/subscriptions_active.sql rename to models/intermediate/date_spine_derivatives/subscriptions_active.sql diff --git a/models/intermediate/subscriptions_churned.sql b/models/intermediate/date_spined_fanouts/subscribers_days.sql similarity index 100% rename from models/intermediate/subscriptions_churned.sql rename to models/intermediate/date_spined_fanouts/subscribers_days.sql diff --git a/models/intermediate/subscriptions_days.sql b/models/intermediate/date_spined_fanouts/subscriptions_days.sql similarity index 100% rename from models/intermediate/subscriptions_days.sql rename to models/intermediate/date_spined_fanouts/subscriptions_days.sql diff --git a/models/intermediate/subscribers_churned.sql b/models/intermediate/subscribers_churned.sql new file mode 100644 index 0000000..e69de29 From ccf1c1405fe9162b6ddbd7ac4a0464166ab41152 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 22:30:35 -0500 Subject: [PATCH 18/21] create subscribers_days model --- .../date_spined_fanouts/subscribers_days.sql | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/models/intermediate/date_spined_fanouts/subscribers_days.sql b/models/intermediate/date_spined_fanouts/subscribers_days.sql index e69de29..4b96cd6 100644 --- a/models/intermediate/date_spined_fanouts/subscribers_days.sql +++ b/models/intermediate/date_spined_fanouts/subscribers_days.sql @@ -0,0 +1,10 @@ +-- first step for many subscriber metrics, get a model that is as the customer*day grain for +-- every day they had at least one active subscription. +select + date, + customer_id, + count(subscription_id) as customer_subscriptions_count, + max(cancelled_date) as cancelled_date + +from {{ ref('subscriptions_days') }} +group by 1,2 From 029ef96fe3afb765a3a083a2af53f8713ae00793 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 22:52:19 -0500 Subject: [PATCH 19/21] add subscribers churned metric --- models/final/subscriptions_daily_metrics.sql | 7 ++++--- .../date_spine_derivatives/subscribers_cancelled.sql | 6 +++++- models/intermediate/subscribers_churned.sql | 0 3 files changed, 9 insertions(+), 4 deletions(-) delete mode 100644 models/intermediate/subscribers_churned.sql diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql index 94e58e6..572ac26 100644 --- a/models/final/subscriptions_daily_metrics.sql +++ b/models/final/subscriptions_daily_metrics.sql @@ -7,9 +7,10 @@ select coalesce(subscriptions_active.subscriptions_active,0) as subscriptions_active, coalesce(subscriptions_cancelled.subscriptions_churned,0) as subscriptions_churned, - coalesce(subscribers_new.subscribers_new) as subscribers_new, - coalesce(subscribers_cancelled.subscribers_cancelled) as subscribers_cancelled, - coalesce(subscribers_active.subscribers_active) as subscribers_active + coalesce(subscribers_new.subscribers_new,0) as subscribers_new, + coalesce(subscribers_cancelled.subscribers_cancelled,0) as subscribers_cancelled, + coalesce(subscribers_active.subscribers_active,0) as subscribers_active, + coalesce(subscribers_cancelled.subscribers_churned,0) as subscribers_churned from {{ ref('date_spine') }} left join {{ ref('subscriptions_new') }} diff --git a/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql b/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql index 933a632..92173b3 100644 --- a/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql +++ b/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql @@ -61,4 +61,8 @@ subscriber_cancellations as ( group by 1 ) -select * from subscriber_cancellations +select + *, + sum(subscribers_cancelled) over (order by date asc) as subscribers_churned + +from subscriber_cancellations diff --git a/models/intermediate/subscribers_churned.sql b/models/intermediate/subscribers_churned.sql deleted file mode 100644 index e69de29..0000000 From e4f37d598890c98d65fc307eb5f00cd8a59c0099 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Tue, 15 Nov 2022 22:52:46 -0500 Subject: [PATCH 20/21] add test for consistency of metric from two differrent calcs --- tests/check_subscriptions_active_totals.sql | 43 +++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/check_subscriptions_active_totals.sql diff --git a/tests/check_subscriptions_active_totals.sql b/tests/check_subscriptions_active_totals.sql new file mode 100644 index 0000000..944fc2d --- /dev/null +++ b/tests/check_subscriptions_active_totals.sql @@ -0,0 +1,43 @@ +with max_created_date as ( + + select max(created_date) as max_created_date from {{ ref('base__recharge_subscriptions') }} +), + +latest_active_subscription_count as ( + + select + count(*) as count + + from {{ ref('base__recharge_subscriptions') }} + where status = 'ACTIVE' +), + +latest_active_subscription_count_from_date_spine as ( + + select + count(*) as count_from_date_spine + + from {{ ref('subscriptions_days') }} + inner join max_created_date + on subscriptions_days.date = max_created_date.max_created_date + where is_subscription_active +), + +cross_joined as ( + + select + * + + from latest_active_subscription_count + cross join latest_active_subscription_count_from_date_spine +), + +meet_condition as ( + + select * + + from cross_joined + where not count = count_from_date_spine +) + +select * from meet_condition From af9b0a57c2e01c92aea29db29a84990c4982c528 Mon Sep 17 00:00:00 2001 From: keelansmithers Date: Wed, 16 Nov 2022 09:31:53 -0500 Subject: [PATCH 21/21] add composite key tests for date spined models --- .../date_spined_fanouts/_date_spined_fanouts.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml diff --git a/models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml b/models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml new file mode 100644 index 0000000..103c0a9 --- /dev/null +++ b/models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml @@ -0,0 +1,16 @@ +version: 2 + +models: + - name: subscriptions_days + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - subscription_id + + - name: subscribers_days + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - customer_id