Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
cfe5d13
update service_account.json for access to bigquery
keelansmithers Nov 11, 2022
fc01331
create first pass base table
keelansmithers Nov 15, 2022
71135bf
clean up base model, remove test.sql
keelansmithers Nov 15, 2022
550bf3a
rename base model
keelansmithers Nov 15, 2022
389b233
establish final model and include easy metrics (subscriptions new and…
keelansmithers Nov 15, 2022
68257a5
add row_number to base model to derive subscribers_new
keelansmithers Nov 15, 2022
a8187a6
first pass subscription date spine model
keelansmithers Nov 15, 2022
a087b0d
add tests to base models
keelansmithers Nov 15, 2022
dfa398d
edit subscriptions date spine model and create active subs models
keelansmithers Nov 15, 2022
be1f6ee
quick logic fix
keelansmithers Nov 15, 2022
375b235
rearrange final model to all join to date spine, add coalesces
keelansmithers Nov 15, 2022
c5356f0
fix typo
keelansmithers Nov 15, 2022
f02313d
add final model PK test
keelansmithers Nov 15, 2022
ecadcca
filter out bad records where status is cancelled but not cancelled at
keelansmithers Nov 16, 2022
402c4a9
losing the plot, need to capture work now before changing more
keelansmithers Nov 16, 2022
9c6df21
rename model to cancelled
keelansmithers Nov 16, 2022
b51c263
reorganize models, clean up dependencies
keelansmithers Nov 16, 2022
ccf1c14
create subscribers_days model
keelansmithers Nov 16, 2022
029ef96
add subscribers churned metric
keelansmithers Nov 16, 2022
e4f37d5
add test for consistency of metric from two differrent calcs
keelansmithers Nov 16, 2022
af9b0a5
add composite key tests for date spined models
keelansmithers Nov 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions models/base/_base.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: 2

models:
- name: base__recharge_subscriptions
columns:
- name: subscription_id
description: >
the primary key id of the subscription source table. Re-aliased for clarity downstream.
tests:
- unique
- not_null

- name: date_spine
description: >
a utility table built using dbt_utils. this can be used downstream to fan out other tables
to build a picture of daily activity from timestamp/date fields.
columns:
- name: date_day
tests:
- unique
- not_null
9 changes: 9 additions & 0 deletions models/base/_source.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: 2

sources:
- name: raw_data_sandbox
database: wise-weaver-282922
schema: raw_data_sandbox

tables:
- name: acme1_recharge_subscriptions
50 changes: 50 additions & 0 deletions models/base/base__recharge_subscriptions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
-- intent of this base model:
---- logically organize the columns in the source table for readability
---- light cleaning + re-aliasing where necessary
---- derive fields that will be helpful downstream
---- filter out records that should not be included, e.g. subscriptions that are
------ status = 'CANCELLED' but don't have a cancelled_at timestamp.

select
id as subscription_id,
customer_id,

-- subscription product details
shopify_product_id,
recharge_product_id,
shopify_variant_id,
sku,
product_title,
price,

-- status and other info
status,
lower(cancellation_reason) as cancellation_reason,

-- derived

-- business definition for cancelled subscriptions does not count instances
-- where cancellation reason is due to max number of charge attempts
case
when status = 'CANCELLED'
and lower(cancellation_reason) not like '%max number of charge attempts%'
then true
else false
end as was_cancelled,

-- window functions
row_number() over (partition by customer_id order by created_at asc) as customer_subscription_number,

-- timestamps
created_at,
cancelled_at,
updated_at,

-- dates
date(created_at) as created_date,
date(cancelled_at) as cancelled_date

from {{ source('raw_data_sandbox', 'acme1_recharge_subscriptions') }}

-- filtering out these records allows for final daily metrics to match more closely.
where not (status = 'CANCELLED' and cancelled_at is null)
8 changes: 8 additions & 0 deletions models/base/date_spine.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- To Do: look into using Jinja to set these parameters using a SQL statement in a macro
-- https://stackoverflow.com/questions/64007239/hi-how-do-we-define-select-statement-as-a-variable-in-dbt
{{ dbt_utils.date_spine(
datepart="day",
start_date="'2019-01-01'",
end_date="'2022-04-08'"
)
}}
10 changes: 10 additions & 0 deletions models/final/_final.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: 2

models:
- name: subscriptions_daily_metrics

columns:
- name: date
tests:
- unique
- not_null
29 changes: 29 additions & 0 deletions models/final/subscriptions_daily_metrics.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
select
date_spine.date_day as date,

coalesce(subscriptions_new.subscriptions_new,0) as subscriptions_new,
coalesce(subscriptions_returning.subscriptions_returning,0) as subscriptions_returning,
coalesce(subscriptions_cancelled.subscriptions_cancelled,0) as subscriptions_cancelled,
coalesce(subscriptions_active.subscriptions_active,0) as subscriptions_active,
coalesce(subscriptions_cancelled.subscriptions_churned,0) as subscriptions_churned,

coalesce(subscribers_new.subscribers_new,0) as subscribers_new,
coalesce(subscribers_cancelled.subscribers_cancelled,0) as subscribers_cancelled,
coalesce(subscribers_active.subscribers_active,0) as subscribers_active,
coalesce(subscribers_cancelled.subscribers_churned,0) as subscribers_churned

from {{ ref('date_spine') }}
left join {{ ref('subscriptions_new') }}
on date_spine.date_day = subscriptions_new.date
left join {{ ref('subscriptions_returning') }}
on date_spine.date_day = subscriptions_returning.date
left join {{ ref('subscriptions_cancelled') }}
on date_spine.date_day = subscriptions_cancelled.date
left join {{ ref('subscriptions_active') }}
on date_spine.date_day = subscriptions_active.date
left join {{ ref('subscribers_new') }}
on date_spine.date_day = subscribers_new.date
left join {{ ref('subscribers_cancelled') }}
on date_spine.date_day = subscribers_cancelled.date
left join {{ ref('subscribers_active') }}
on date_spine.date_day = subscribers_active.date
18 changes: 18 additions & 0 deletions models/intermediate/_intermediate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
version: 2

models:
- name: subscriptions_new

columns:
- name: date
tests:
- unique
- not_null

- name: subscriptions_cancelled

columns:
- name: date
tests:
- unique
- not_null
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- this model takes the date-spined subscriptions model and aggregates to the date grain.
-- from there we calculate the number of subscriptions and subscribers that were considered that
-- day.
select
date,
count(distinct customer_id) as subscribers_active

from {{ ref('subscriptions_days') }}
group by 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
-- build off the customer * day grain model with cascading cte's to self derive
-- churn events.

with customers_days_with_lead as (
-- use lead() to find the next date that this model shows the customer has an
-- active subscription.
select
*,
date_diff(
lead(date) over (
partition by customer_id
order by date asc
),
date,
day
) as days_to_next_customer_subscription_day

from {{ ref('subscribers_days') }}
),

customers_days_next_day_churn as (
-- if the customer's next subscription date is more than 1 day beyond the given date
-- or if the customer doesn't have a next active subscription date, then the subscriber
-- will churn the following day.
select
*,

case
when days_to_next_customer_subscription_day > 1 then true
when days_to_next_customer_subscription_day is null then true
else false
end as is_churn_next_day

from customers_days_with_lead
),

customers_days_churn_date_added as (
-- based on the churn logic in the prompt, the churn date is the first day
-- that a subscriber does not have an active subscription, so we derive that
-- here based on the date in the given row where is_churn_next_day = true.
select
*,

case
when is_churn_next_day
then date_add(date, interval 1 day)
else null
end as churned_date

from customers_days_next_day_churn
),

subscriber_cancellations as (

select
churned_date as date,
count(*) as subscribers_cancelled

from customers_days_churn_date_added
where is_churn_next_day
group by 1
)

select
*,
sum(subscribers_cancelled) over (order by date asc) as subscribers_churned

from subscriber_cancellations
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
select
date,
count(*) as subscriptions_active

from {{ ref('subscriptions_days') }}
where is_subscription_active = true
group by 1
16 changes: 16 additions & 0 deletions models/intermediate/date_spined_fanouts/_date_spined_fanouts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: 2

models:
- name: subscriptions_days
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- subscription_id

- name: subscribers_days
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- customer_id
10 changes: 10 additions & 0 deletions models/intermediate/date_spined_fanouts/subscribers_days.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- first step for many subscriber metrics, get a model that is as the customer*day grain for
-- every day they had at least one active subscription.
select
date,
customer_id,
count(subscription_id) as customer_subscriptions_count,
max(cancelled_date) as cancelled_date

from {{ ref('subscriptions_days') }}
group by 1,2
27 changes: 27 additions & 0 deletions models/intermediate/date_spined_fanouts/subscriptions_days.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- this model fans out subscriptions by days between created date and cancelled date to identify
-- all the days where this subscription was active and contributing to the overall active
-- subscriptions count. downstream we can aggregate and filter this in multiple ways to calculate
-- different daily metrics.

select
date_spine.date_day as date,
subscriptions.subscription_id,
subscriptions.customer_id,

subscriptions.created_date,
subscriptions.cancelled_date,

-- identify the days that the subscription was active.
case
when subscriptions.cancelled_date is null then true
when date_day < subscriptions.cancelled_date then true
when date_day = subscriptions.cancelled_date then false
end as is_subscription_active

from {{ ref('base__recharge_subscriptions') }} as subscriptions
left join {{ ref('date_spine') }}
on subscriptions.created_date <= date_spine.date_day
and (
subscriptions.cancelled_date >= date_spine.date_day
or subscriptions.cancelled_date is null
)
7 changes: 7 additions & 0 deletions models/intermediate/subscribers_new.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
select
created_date as date,
count(*) as subscribers_new

from {{ ref('base__recharge_subscriptions') }}
where customer_subscription_number = 1
group by 1
32 changes: 32 additions & 0 deletions models/intermediate/subscriptions_cancelled.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- this model gets the daily subscriptions cancelled count, then date spines this
-- data to fill in any cases where there were no cancellations in a given day,
-- then uses a sum window function to get the running total of how many subscriptions
-- have been cancelled up to a given date.

with subscriptions_cancelled as (
select
cancelled_date as date,
count(*) as subscriptions_cancelled

from {{ ref('base__recharge_subscriptions') }}
where was_cancelled = true
group by 1
),

subscriptions_cancelled_date_spine as (
-- spine allows for days with no cancellations to be coalesced to 0 and therefore
-- incorporated into the sum() over () window function following this cte
select
subscriptions_cancelled.date,
coalesce(subscriptions_cancelled.subscriptions_cancelled,0) as subscriptions_cancelled

from subscriptions_cancelled
left join {{ ref('date_spine') }}
on subscriptions_cancelled.date = date_spine.date_day
)

select
*,
sum(subscriptions_cancelled) over (order by date asc) as subscriptions_churned

from subscriptions_cancelled_date_spine
6 changes: 6 additions & 0 deletions models/intermediate/subscriptions_new.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
select
created_date as date,
count(*) as subscriptions_new

from {{ ref('base__recharge_subscriptions') }}
group by 1
20 changes: 20 additions & 0 deletions models/intermediate/subscriptions_returning.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
with base_lagged as (

select
*,
lag(was_cancelled) over (

partition by customer_id, recharge_product_id
order by created_at
) as was_previous_subscription_cancelled

from {{ ref('base__recharge_subscriptions') }}
)

select
date(created_at) as date,
count(*) as subscriptions_returning

from base_lagged
where was_previous_subscription_cancelled = true
group by 1
2 changes: 0 additions & 2 deletions models/test.sql

This file was deleted.

3 changes: 3 additions & 0 deletions packages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
packages:
- package: dbt-labs/dbt_utils
version: 0.9.2
13 changes: 12 additions & 1 deletion service_account.json
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@

{
"type": "service_account",
"project_id": "wise-weaver-282922",
"private_key_id": "9d46c694ba5d9a9b0e3568dbeef37e41236190e1",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDPp92OgQkcgcdo\nrRoMywbPF9omFAyGCS6IqzEeN+ENrKNMXLCb7LzPo/mxTqYUO8rqzVAUDUkmDaRX\nR6iD2bqS2VQdeXidxLjEq+xSvUcm3edVfz628a/0jrJNODUWQ0BTV9oDk3i9gI0/\npvFzPsMGxZA51SzrM+7k1jSvkfAVSEA7lsbYi6yiuMRDfgxmMRCii1/abYBlAL5q\nYYpmrCzevPgZjmG+TytHDkriQaZyMZ8DAkbE73Ywh6JCupwiGYO3SdGFn3/BCKmG\npj35+PEhFIKsQcGVgN+jd+1qz/znktkm6t4saZm1PQSagyRyLGHZgxFnPyRGRfOW\nUyQvPhONAgMBAAECgf98GtdPU88eOnHzHYF0I+Lt4lN+bn0Nnm8xPx8b8EmQ4py2\nBq+GgPZXIqkYTujzLskXi51QXqBZHc4aW7lbE8Vt/nVHKFHt+KR05ktXJ8heckDP\n2DyjGc8PWjk17WHM7GSFavvhJ0IB/9JrOFsVhW0JnwmThtRRvbgJC/xw8ceK74+M\nB5IKp/0lMJwUgPUm6sRnxHlfoYW4kLt/Z8454lItJi6Dqniy0Ou7qBei6ZRMCHSq\nsCJKhQvbkyCJYUaKsum2rOF4PO+cz4Gs7r7m03Ya/cY4aTt6twl0hgQl+alfza7Z\nE1DSyTgJXl1X8nv/9Q2byOgxv2OdVGYOWO19AGECgYEA8cNXwCSfvpxXtZ7ZrePc\nlj6ZB/ZlNnIuCE4U7pKtzkhq9PDn5WSWsYWyudRp/J2pJOS3gsu67T3kxb1uf3OT\nJOhbxBLiM3S/RsJDLv3aAi6O3jhyzA9YPEkAeyPJIBdJWJCABo1yMKKZ28PnRyZz\nqi1C8ueYvZ9ObuE70WNWd7kCgYEA2+JX7XxTngsZhP0hy9fffTKHdcW9Kz34b7sF\n06kCeUHfoq0WnTRt0YQn/VJvkGG7Fjp+9+CvyZI5eZ2MAgqYA8PoCdvz1cApZUza\n0wbgauEIWnow66sOrRue0IafcSTXoPYxrfXNgne4PWX7hmC7+aeAguz8bmmHZEVp\nVmr6PHUCgYEA60CGdvYsa7J+tsCoWUlNMPYgu1rMCjr5CCPSwoQAlKZrTYXOYa2L\n5ZqP27wSljpkiP32hFPyxUBhCPnYODUijdt80EL/0cpdBmGqNmPWbCEZ7KtJKcAU\nwDTZPvoHkC6YtJGHwtiIsULILM1olBeanZIFf5BVVM9b5xFMv+fGECECgYBQeqi6\nGH42Y1Ok1E1xYYLAPSVNWuGhGR58LpS0YrCwFwc2cGnNgioLvHYyhuCklNC4ExLz\nwP42Jdn5hEuBV8VJ44NjUGkPvDSqihWxlVvAYrr++qg7KbHGdEyNgCiNbfGqTVHj\nnQrqDM2XU2NbTL6gYslrojfGZcOTVMB4S9FpHQKBgQC2vBJcrAmAy10nB91BVgOd\nQRE6Evc8QPrjFsNoUoD/8yVEMVCcdf8BkycpPGtvVkBrf6KRIEgMAOUrEq0vmteA\nN+4IYDqBI+blIwnpmRrekt/Z2X5Pb3nK9XOvWiBqr32UFtL++V5xJfE35zUgroBe\nUk/bnJoEKKQFg6VP7IQPIA==\n-----END PRIVATE KEY-----\n",
"client_email": "test-project@wise-weaver-282922.iam.gserviceaccount.com",
"client_id": "102252416935570843819",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-project%40wise-weaver-282922.iam.gserviceaccount.com"
}
Loading