diff --git a/models/base/_base.yml b/models/base/_base.yml new file mode 100644 index 0000000..8d648cd --- /dev/null +++ b/models/base/_base.yml @@ -0,0 +1,21 @@ +version: 2 + +models: + - name: base__recharge_subscriptions + columns: + - name: subscription_id + description: > + the primary key id of the subscription source table. Re-aliased for clarity downstream. + tests: + - unique + - not_null + + - name: date_spine + description: > + a utility table built using dbt_utils. this can be used downstream to fan out other tables + to build a picture of daily activity from timestamp/date fields. + columns: + - name: date_day + tests: + - unique + - not_null diff --git a/models/base/_source.yml b/models/base/_source.yml new file mode 100644 index 0000000..853156c --- /dev/null +++ b/models/base/_source.yml @@ -0,0 +1,9 @@ +version: 2 + +sources: + - name: raw_data_sandbox + database: wise-weaver-282922 + schema: raw_data_sandbox + + tables: + - name: acme1_recharge_subscriptions diff --git a/models/base/base__recharge_subscriptions.sql b/models/base/base__recharge_subscriptions.sql new file mode 100644 index 0000000..bc4dca7 --- /dev/null +++ b/models/base/base__recharge_subscriptions.sql @@ -0,0 +1,50 @@ +-- intent of this base model: +---- logically organize the columns in the source table for readability +---- light cleaning + re-aliasing where necessary +---- derive fields that will be helpful downstream +---- filter out records that should not be included, e.g. subscriptions that are +------ status = 'CANCELLED' but don't have a cancelled_at timestamp. + +select + id as subscription_id, + customer_id, + + -- subscription product details + shopify_product_id, + recharge_product_id, + shopify_variant_id, + sku, + product_title, + price, + + -- status and other info + status, + lower(cancellation_reason) as cancellation_reason, + + -- derived + + -- business definition for cancelled subscriptions does not count instances + -- where cancellation reason is due to max number of charge attempts + case + when status = 'CANCELLED' + and lower(cancellation_reason) not like '%max number of charge attempts%' + then true + else false + end as was_cancelled, + + -- window functions + row_number() over (partition by customer_id order by created_at asc) as customer_subscription_number, + + -- timestamps + created_at, + cancelled_at, + updated_at, + + -- dates + date(created_at) as created_date, + date(cancelled_at) as cancelled_date + +from {{ source('raw_data_sandbox', 'acme1_recharge_subscriptions') }} + +-- filtering out these records allows for final daily metrics to match more closely. +where not (status = 'CANCELLED' and cancelled_at is null) diff --git a/models/base/date_spine.sql b/models/base/date_spine.sql new file mode 100644 index 0000000..74f16f0 --- /dev/null +++ b/models/base/date_spine.sql @@ -0,0 +1,8 @@ +-- To Do: look into using Jinja to set these parameters using a SQL statement in a macro +-- https://stackoverflow.com/questions/64007239/hi-how-do-we-define-select-statement-as-a-variable-in-dbt +{{ dbt_utils.date_spine( + datepart="day", + start_date="'2019-01-01'", + end_date="'2022-04-08'" + ) +}} diff --git a/models/final/_final.yml b/models/final/_final.yml new file mode 100644 index 0000000..65b9547 --- /dev/null +++ b/models/final/_final.yml @@ -0,0 +1,10 @@ +version: 2 + +models: + - name: subscriptions_daily_metrics + + columns: + - name: date + tests: + - unique + - not_null diff --git a/models/final/subscriptions_daily_metrics.sql b/models/final/subscriptions_daily_metrics.sql new file mode 100644 index 0000000..572ac26 --- /dev/null +++ b/models/final/subscriptions_daily_metrics.sql @@ -0,0 +1,29 @@ +select + date_spine.date_day as date, + + coalesce(subscriptions_new.subscriptions_new,0) as subscriptions_new, + coalesce(subscriptions_returning.subscriptions_returning,0) as subscriptions_returning, + coalesce(subscriptions_cancelled.subscriptions_cancelled,0) as subscriptions_cancelled, + coalesce(subscriptions_active.subscriptions_active,0) as subscriptions_active, + coalesce(subscriptions_cancelled.subscriptions_churned,0) as subscriptions_churned, + + coalesce(subscribers_new.subscribers_new,0) as subscribers_new, + coalesce(subscribers_cancelled.subscribers_cancelled,0) as subscribers_cancelled, + coalesce(subscribers_active.subscribers_active,0) as subscribers_active, + coalesce(subscribers_cancelled.subscribers_churned,0) as subscribers_churned + +from {{ ref('date_spine') }} +left join {{ ref('subscriptions_new') }} + on date_spine.date_day = subscriptions_new.date +left join {{ ref('subscriptions_returning') }} + on date_spine.date_day = subscriptions_returning.date +left join {{ ref('subscriptions_cancelled') }} + on date_spine.date_day = subscriptions_cancelled.date +left join {{ ref('subscriptions_active') }} + on date_spine.date_day = subscriptions_active.date +left join {{ ref('subscribers_new') }} + on date_spine.date_day = subscribers_new.date +left join {{ ref('subscribers_cancelled') }} + on date_spine.date_day = subscribers_cancelled.date +left join {{ ref('subscribers_active') }} + on date_spine.date_day = subscribers_active.date diff --git a/models/intermediate/_intermediate.yml b/models/intermediate/_intermediate.yml new file mode 100644 index 0000000..88215e1 --- /dev/null +++ b/models/intermediate/_intermediate.yml @@ -0,0 +1,18 @@ +version: 2 + +models: + - name: subscriptions_new + + columns: + - name: date + tests: + - unique + - not_null + + - name: subscriptions_cancelled + + columns: + - name: date + tests: + - unique + - not_null diff --git a/models/intermediate/date_spine_derivatives/subscribers_active.sql b/models/intermediate/date_spine_derivatives/subscribers_active.sql new file mode 100644 index 0000000..481ec43 --- /dev/null +++ b/models/intermediate/date_spine_derivatives/subscribers_active.sql @@ -0,0 +1,9 @@ +-- this model takes the date-spined subscriptions model and aggregates to the date grain. +-- from there we calculate the number of subscriptions and subscribers that were considered that +-- day. +select + date, + count(distinct customer_id) as subscribers_active + +from {{ ref('subscriptions_days') }} +group by 1 diff --git a/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql b/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql new file mode 100644 index 0000000..92173b3 --- /dev/null +++ b/models/intermediate/date_spine_derivatives/subscribers_cancelled.sql @@ -0,0 +1,68 @@ +-- build off the customer * day grain model with cascading cte's to self derive +-- churn events. + +with customers_days_with_lead as ( + -- use lead() to find the next date that this model shows the customer has an + -- active subscription. + select + *, + date_diff( + lead(date) over ( + partition by customer_id + order by date asc + ), + date, + day + ) as days_to_next_customer_subscription_day + + from {{ ref('subscribers_days') }} +), + +customers_days_next_day_churn as ( + -- if the customer's next subscription date is more than 1 day beyond the given date + -- or if the customer doesn't have a next active subscription date, then the subscriber + -- will churn the following day. + select + *, + + case + when days_to_next_customer_subscription_day > 1 then true + when days_to_next_customer_subscription_day is null then true + else false + end as is_churn_next_day + + from customers_days_with_lead +), + +customers_days_churn_date_added as ( + -- based on the churn logic in the prompt, the churn date is the first day + -- that a subscriber does not have an active subscription, so we derive that + -- here based on the date in the given row where is_churn_next_day = true. + select + *, + + case + when is_churn_next_day + then date_add(date, interval 1 day) + else null + end as churned_date + + from customers_days_next_day_churn +), + +subscriber_cancellations as ( + + select + churned_date as date, + count(*) as subscribers_cancelled + + from customers_days_churn_date_added + where is_churn_next_day + group by 1 +) + +select + *, + sum(subscribers_cancelled) over (order by date asc) as subscribers_churned + +from subscriber_cancellations diff --git a/models/intermediate/date_spine_derivatives/subscriptions_active.sql b/models/intermediate/date_spine_derivatives/subscriptions_active.sql new file mode 100644 index 0000000..d8b924d --- /dev/null +++ b/models/intermediate/date_spine_derivatives/subscriptions_active.sql @@ -0,0 +1,7 @@ +select + date, + count(*) as subscriptions_active + +from {{ ref('subscriptions_days') }} +where is_subscription_active = true +group by 1 diff --git a/models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml b/models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml new file mode 100644 index 0000000..103c0a9 --- /dev/null +++ b/models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml @@ -0,0 +1,16 @@ +version: 2 + +models: + - name: subscriptions_days + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - subscription_id + + - name: subscribers_days + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - customer_id diff --git a/models/intermediate/date_spined_fanouts/subscribers_days.sql b/models/intermediate/date_spined_fanouts/subscribers_days.sql new file mode 100644 index 0000000..4b96cd6 --- /dev/null +++ b/models/intermediate/date_spined_fanouts/subscribers_days.sql @@ -0,0 +1,10 @@ +-- first step for many subscriber metrics, get a model that is as the customer*day grain for +-- every day they had at least one active subscription. +select + date, + customer_id, + count(subscription_id) as customer_subscriptions_count, + max(cancelled_date) as cancelled_date + +from {{ ref('subscriptions_days') }} +group by 1,2 diff --git a/models/intermediate/date_spined_fanouts/subscriptions_days.sql b/models/intermediate/date_spined_fanouts/subscriptions_days.sql new file mode 100644 index 0000000..96c1897 --- /dev/null +++ b/models/intermediate/date_spined_fanouts/subscriptions_days.sql @@ -0,0 +1,27 @@ +-- this model fans out subscriptions by days between created date and cancelled date to identify +-- all the days where this subscription was active and contributing to the overall active +-- subscriptions count. downstream we can aggregate and filter this in multiple ways to calculate +-- different daily metrics. + +select + date_spine.date_day as date, + subscriptions.subscription_id, + subscriptions.customer_id, + + subscriptions.created_date, + subscriptions.cancelled_date, + + -- identify the days that the subscription was active. + case + when subscriptions.cancelled_date is null then true + when date_day < subscriptions.cancelled_date then true + when date_day = subscriptions.cancelled_date then false + end as is_subscription_active + +from {{ ref('base__recharge_subscriptions') }} as subscriptions +left join {{ ref('date_spine') }} + on subscriptions.created_date <= date_spine.date_day + and ( + subscriptions.cancelled_date >= date_spine.date_day + or subscriptions.cancelled_date is null + ) diff --git a/models/intermediate/subscribers_new.sql b/models/intermediate/subscribers_new.sql new file mode 100644 index 0000000..2e99827 --- /dev/null +++ b/models/intermediate/subscribers_new.sql @@ -0,0 +1,7 @@ +select + created_date as date, + count(*) as subscribers_new + +from {{ ref('base__recharge_subscriptions') }} +where customer_subscription_number = 1 +group by 1 diff --git a/models/intermediate/subscriptions_cancelled.sql b/models/intermediate/subscriptions_cancelled.sql new file mode 100644 index 0000000..5a8535d --- /dev/null +++ b/models/intermediate/subscriptions_cancelled.sql @@ -0,0 +1,32 @@ +-- this model gets the daily subscriptions cancelled count, then date spines this +-- data to fill in any cases where there were no cancellations in a given day, +-- then uses a sum window function to get the running total of how many subscriptions +-- have been cancelled up to a given date. + +with subscriptions_cancelled as ( + select + cancelled_date as date, + count(*) as subscriptions_cancelled + + from {{ ref('base__recharge_subscriptions') }} + where was_cancelled = true + group by 1 +), + +subscriptions_cancelled_date_spine as ( + -- spine allows for days with no cancellations to be coalesced to 0 and therefore + -- incorporated into the sum() over () window function following this cte + select + subscriptions_cancelled.date, + coalesce(subscriptions_cancelled.subscriptions_cancelled,0) as subscriptions_cancelled + + from subscriptions_cancelled + left join {{ ref('date_spine') }} + on subscriptions_cancelled.date = date_spine.date_day +) + +select + *, + sum(subscriptions_cancelled) over (order by date asc) as subscriptions_churned + +from subscriptions_cancelled_date_spine diff --git a/models/intermediate/subscriptions_new.sql b/models/intermediate/subscriptions_new.sql new file mode 100644 index 0000000..928e780 --- /dev/null +++ b/models/intermediate/subscriptions_new.sql @@ -0,0 +1,6 @@ +select + created_date as date, + count(*) as subscriptions_new + +from {{ ref('base__recharge_subscriptions') }} +group by 1 diff --git a/models/intermediate/subscriptions_returning.sql b/models/intermediate/subscriptions_returning.sql new file mode 100644 index 0000000..0033289 --- /dev/null +++ b/models/intermediate/subscriptions_returning.sql @@ -0,0 +1,20 @@ +with base_lagged as ( + + select + *, + lag(was_cancelled) over ( + + partition by customer_id, recharge_product_id + order by created_at + ) as was_previous_subscription_cancelled + + from {{ ref('base__recharge_subscriptions') }} +) + +select + date(created_at) as date, + count(*) as subscriptions_returning + +from base_lagged +where was_previous_subscription_cancelled = true +group by 1 diff --git a/models/test.sql b/models/test.sql deleted file mode 100644 index aee96f8..0000000 --- a/models/test.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * -from `wise-weaver-282922.raw_data_sandbox.acme1_recharge_subscriptions` diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..c681086 --- /dev/null +++ b/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 0.9.2 diff --git a/service_account.json b/service_account.json index 8b13789..b471968 100644 --- a/service_account.json +++ b/service_account.json @@ -1 +1,12 @@ - +{ + "type": "service_account", + "project_id": "wise-weaver-282922", + "private_key_id": "9d46c694ba5d9a9b0e3568dbeef37e41236190e1", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDPp92OgQkcgcdo\nrRoMywbPF9omFAyGCS6IqzEeN+ENrKNMXLCb7LzPo/mxTqYUO8rqzVAUDUkmDaRX\nR6iD2bqS2VQdeXidxLjEq+xSvUcm3edVfz628a/0jrJNODUWQ0BTV9oDk3i9gI0/\npvFzPsMGxZA51SzrM+7k1jSvkfAVSEA7lsbYi6yiuMRDfgxmMRCii1/abYBlAL5q\nYYpmrCzevPgZjmG+TytHDkriQaZyMZ8DAkbE73Ywh6JCupwiGYO3SdGFn3/BCKmG\npj35+PEhFIKsQcGVgN+jd+1qz/znktkm6t4saZm1PQSagyRyLGHZgxFnPyRGRfOW\nUyQvPhONAgMBAAECgf98GtdPU88eOnHzHYF0I+Lt4lN+bn0Nnm8xPx8b8EmQ4py2\nBq+GgPZXIqkYTujzLskXi51QXqBZHc4aW7lbE8Vt/nVHKFHt+KR05ktXJ8heckDP\n2DyjGc8PWjk17WHM7GSFavvhJ0IB/9JrOFsVhW0JnwmThtRRvbgJC/xw8ceK74+M\nB5IKp/0lMJwUgPUm6sRnxHlfoYW4kLt/Z8454lItJi6Dqniy0Ou7qBei6ZRMCHSq\nsCJKhQvbkyCJYUaKsum2rOF4PO+cz4Gs7r7m03Ya/cY4aTt6twl0hgQl+alfza7Z\nE1DSyTgJXl1X8nv/9Q2byOgxv2OdVGYOWO19AGECgYEA8cNXwCSfvpxXtZ7ZrePc\nlj6ZB/ZlNnIuCE4U7pKtzkhq9PDn5WSWsYWyudRp/J2pJOS3gsu67T3kxb1uf3OT\nJOhbxBLiM3S/RsJDLv3aAi6O3jhyzA9YPEkAeyPJIBdJWJCABo1yMKKZ28PnRyZz\nqi1C8ueYvZ9ObuE70WNWd7kCgYEA2+JX7XxTngsZhP0hy9fffTKHdcW9Kz34b7sF\n06kCeUHfoq0WnTRt0YQn/VJvkGG7Fjp+9+CvyZI5eZ2MAgqYA8PoCdvz1cApZUza\n0wbgauEIWnow66sOrRue0IafcSTXoPYxrfXNgne4PWX7hmC7+aeAguz8bmmHZEVp\nVmr6PHUCgYEA60CGdvYsa7J+tsCoWUlNMPYgu1rMCjr5CCPSwoQAlKZrTYXOYa2L\n5ZqP27wSljpkiP32hFPyxUBhCPnYODUijdt80EL/0cpdBmGqNmPWbCEZ7KtJKcAU\nwDTZPvoHkC6YtJGHwtiIsULILM1olBeanZIFf5BVVM9b5xFMv+fGECECgYBQeqi6\nGH42Y1Ok1E1xYYLAPSVNWuGhGR58LpS0YrCwFwc2cGnNgioLvHYyhuCklNC4ExLz\nwP42Jdn5hEuBV8VJ44NjUGkPvDSqihWxlVvAYrr++qg7KbHGdEyNgCiNbfGqTVHj\nnQrqDM2XU2NbTL6gYslrojfGZcOTVMB4S9FpHQKBgQC2vBJcrAmAy10nB91BVgOd\nQRE6Evc8QPrjFsNoUoD/8yVEMVCcdf8BkycpPGtvVkBrf6KRIEgMAOUrEq0vmteA\nN+4IYDqBI+blIwnpmRrekt/Z2X5Pb3nK9XOvWiBqr32UFtL++V5xJfE35zUgroBe\nUk/bnJoEKKQFg6VP7IQPIA==\n-----END PRIVATE KEY-----\n", + "client_email": "test-project@wise-weaver-282922.iam.gserviceaccount.com", + "client_id": "102252416935570843819", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-project%40wise-weaver-282922.iam.gserviceaccount.com" +} diff --git a/tests/check_subscriptions_active_totals.sql b/tests/check_subscriptions_active_totals.sql new file mode 100644 index 0000000..944fc2d --- /dev/null +++ b/tests/check_subscriptions_active_totals.sql @@ -0,0 +1,43 @@ +with max_created_date as ( + + select max(created_date) as max_created_date from {{ ref('base__recharge_subscriptions') }} +), + +latest_active_subscription_count as ( + + select + count(*) as count + + from {{ ref('base__recharge_subscriptions') }} + where status = 'ACTIVE' +), + +latest_active_subscription_count_from_date_spine as ( + + select + count(*) as count_from_date_spine + + from {{ ref('subscriptions_days') }} + inner join max_created_date + on subscriptions_days.date = max_created_date.max_created_date + where is_subscription_active +), + +cross_joined as ( + + select + * + + from latest_active_subscription_count + cross join latest_active_subscription_count_from_date_spine +), + +meet_condition as ( + + select * + + from cross_joined + where not count = count_from_date_spine +) + +select * from meet_condition