Skip to content

Instantly share code, notes, and snippets.

@noahmanion
Created November 19, 2024 22:35
Show Gist options
  • Save noahmanion/3e23e882667332e74e542ab61c2ab88c to your computer and use it in GitHub Desktop.
Save noahmanion/3e23e882667332e74e542ab61c2ab88c to your computer and use it in GitHub Desktop.
Growth Accounting Query for BigQuery
dau as (
-- This part of the query can be pretty much anything.
-- The only requirement is that it have three columns:
-- dt, user_id, inc_amt
-- Where dt is a date and user_id is some unique identifier for a user.
-- Each dt-user_id pair should be unique in this table.
-- inc_amt represents the amount of value that this user created on dt.
-- The most common case is
-- inc_amt = incremental revenue from the user on dt
-- If you want to do L28 growth accounting, user inc_amt=1.
-- The version here derives everything from the tutorial.yammer_events
-- data set provided for free by Mode.
-- If you edit just this part to represent your data, the rest
-- of the query should run just fine.
-- The query here is a sample that works in a BigQuery Data Set
select
user_id,
date as dt,
daily_revenue as inc_amt
from `your_BQ_dataset.analytics_tables.formatted_data_table`
group by 1,2,3
),
-- First, set up WAU and MAU tables for future use
wau as (
select
date_trunc(dt, week) as week,
user_id,
sum(inc_amt) as inc_amt
from dau
group by 1,2
),
mau as (
select
date_trunc(dt, month) as month,
user_id,
sum(inc_amt) as inc_amt
from dau
group by 1,2
),
-- This determines the cohort date of each user. In this case we are
-- deriving it from DAU data but you can feel free to replace it with
-- registration date if that's more appropriate.
first_dt as (
select
user_id,
min(dt) as first_dt,
date_trunc(min(dt), week) as first_week,
date_trunc(min(dt), month) as first_month
from dau
group by 1
),
-- I edited this query for an app that is best measured using DAU,
-- so I added several other temp tables here
dau_decorated as (
select
d.dt,
d.user_id,
d.inc_amt,
f.first_dt
from dau d join first_dt f on d.user_id = f.user_id
where inc_amt > 0
),
dau_growth_accounting as (
select
coalesce(tm.dt, date_add(lm.dt, interval 1 day)) as day,
count(distinct tm.user_id) as dau,
count(distinct case when lm.user_id is not null then tm.user_id else null end) as retained,
count(distinct case when tm.first_dt = tm.dt then tm.user_id else null end) as new_users,
count(distinct case when tm.first_dt != tm.dt and lm.user_id is null then tm.user_id else null end
) as resurrected,
-1*count(distinct case when tm.user_id is null then lm.user_id else null end) as churned
from dau_decorated tm full outer join dau_decorated lm on (
tm.user_id = lm.user_id and tm.dt = date_add(lm.dt, interval 1 day))
group by 1
),
mau_decorated as (
select
d.month,
d.user_id,
d.inc_amt,
f.first_month
from mau d join first_dt f on d.user_id = f.user_id
where inc_amt > 0
),
-- This is MAU growth accounting. Note that this does not require any
-- information about inc_amt. As discussed in the articles, these
-- quantities satisfy some identities:
-- MAU(t) = retained(t) + new(t) + resurrected(t)
-- MAU(t - 1 month) = retained(t) + churned(t)
mau_growth_accounting as (
select
coalesce(tm.month, date_add(lm.month, interval 1 month)) as month,
count(distinct tm.user_id) as dau,
count(distinct case when lm.user_id is not null then tm.user_id else null end) as retained,
count(distinct case when tm.first_month = tm.month then tm.user_id else null end) as new_users,
count(distinct case when tm.first_month != tm.month and lm.user_id is null then tm.user_id else null end
) as resurrected,
-1*count(distinct case when tm.user_id is null then lm.user_id else null end) as churned
from mau_decorated tm full outer join mau_decorated lm on (
tm.user_id = lm.user_id and tm.month = date_add(lm.month, interval 1 month))
group by 1
),
-- This generates the familiar monthly cohort retention dataset.
mau_retention_by_cohort as (
select
first_month,
month,
date_diff(month, first_month, month) as months_since_first,
count(1) as active_users,
sum(inc_amt) as inc_amt
from mau_decorated
group by 1,2
order by 1,2
),
-- This is the MRR growth accounting (or growth accounting of whatever
-- value you put in inc_amt). These also satisfy some identities:
-- MRR(t) = retained(t) + new(t) + resurrected(t) + expansion(t)
-- MAU(t - 1 month) = retained(t) + churned(t) + contraction(t)
mrr_growth_accounting as (
select
coalesce(tm.month, date_add(lm.month, interval 1 month)) as month,
sum(tm.inc_amt) as rev,
sum(
case
when tm.user_id is not null and lm.user_id is not null
and tm.inc_amt >= lm.inc_amt then lm.inc_amt
when tm.user_id is not null and lm.user_id is not null
and tm.inc_amt < lm.inc_amt then tm.inc_amt
else 0
end
) as retained,
sum(
case
when tm.first_month = tm.month then inc_amt
else 0 end
) as new,
sum(
case
when tm.month != tm.first_month and tm.user_id is not null
and lm.user_id is not null and tm.inc_amt > lm.inc_amt
)
)
-- These next tables are to compute LTV via the cohorts_cumulative table.
-- The LTV here is being computed for weekly cohorts on weekly intervals.
-- The queries can be modified to compute it for cohorts of any size
-- on any time window frequency.
wau_decorated as (
select
week,
w.user_id,
w.inc_amt,
f.first_week
from wau w join first_dt f on w.user_id = f.user_id
),
cohorts as (
select
first_week,
week as active_week,
date_diff(week, first_week, week) as weeks_since_first,
count(distinct user_id) as users,
sum(inc_amt) as inc_amt
from wau_decorated
group by 1,2,3
order by 1,2
),
cohort_sizes as (
select
first_week,
users,
inc_amt
from cohorts
where weeks_since_first = 0
),
cohorts_cumulative as (
-- A semi-cartesian join accomplishes the cumulative behavior.
select
c1.first_week,
c1.active_week,
c1.weeks_since_first,
c1.users,
cs.users as cohort_num_users,
1.0 * c1.users/cs.users as retained_pctg,
c1.inc_amt,
sum(c2.inc_amt) as cum_amt,
1.0*sum(c2.inc_amt)/cs.users as cum_amt_per_user
from cohorts c1 join cohort_sizes cs on cs.first_week = c1.first_week
join cohorts c2 on c1.first_week = c2.first_week and c1.weeks_since_first = c2.weeks_since_first
group by 1,2,3,4,5,6,7
order by 1,2
),
-- monthly cumulative cohorts
cohorts_m as (
select
first_month,
month as active_month,
date_diff(month, first_month, week) as months_since_first,
count(distinct user_id) as users,
sum(inc_amt) as inc_amt
from mau_decorated
group by 1,2,3
order by 1,2
),
cohort_sizes_m as (
select
first_week,
users,
inc_amt
from cohorts_m
where months_since_first = 0
),
chorts_cumulative_m as (
-- A semi-cartesian join accomplishes the cumulative behavior.
select
c1.first_month,
c1.active_month,
c1.months_since_first,
c1.users,
cs.users as cohort_num_users,
1.0 * c1.users/cs.users as retained_pctg,
c1.inc_amt,
sum(c2.inc_amt) as cum_amt,
1.0*sum(c2.inc_amt)/cs.users as cum_amt_per_user
from cohorts_m c1 join cohort_sizes_m cs on cs.first_month = c1.first_month
join cohorts_m c2 on c1.first_month = c2.first_month and c1.months_since_first = c2.months_since_first
group by 1,2,3,4,5,6,7
order by 1,2
)
-- For MAU retention by cohort, useful for the standard retention heatmap
select * from mau_retention_by_cohort
-- For cumulative LTV data use this
select * from cohorts_cumulative
-- For cumulative LTV with monthly cohorts use this
select * from cohorts_cumulative_m
-- For DAU growth accuonting use this
select * from dau_growth_accounting
-- For MAU growth accuonting use this
select * from mau_growth_accounting
-- For MRR growth accuonting use this
select * from mrr_growth_accounting
-- For use as weekly input in the 8-ball tool use this
select
first_week as cohort_week,
active_week as activity_week,
users,
inc_amt as revenue
from cohorts_cumulative
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment