Last active
September 6, 2023 16:55
-
-
Save danthegoodman1/a8148382996fbb4b300155f011d9f353 to your computer and use it in GitHub Desktop.
ClickHouse rolling net dollar retention
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Rolling ndr | |
with tbl as ( | |
-- fake table | |
select c1 as usr, c2 as price, c3 as d from VALUES ( | |
('a', 10, toDate('2023-08-09')) | |
, ('a', 10, toDate('2023-08-10')) | |
, ('a', 8, toDate('2023-08-11')) | |
, ('a', 11, toDate('2023-08-12')) | |
, ('a', 12, toDate('2023-08-13')) | |
, ('b', 10, toDate('2023-08-09')) | |
, ('b', 10, toDate('2023-08-10')) | |
, ('b', 9, toDate('2023-08-11')) | |
, ('b', 11, toDate('2023-08-12')) | |
, ('b', 12, toDate('2023-08-13')) | |
, ('c', 11, toDate('2023-08-11')) | |
, ('c', 11, toDate('2023-08-12')) | |
, ('c', 15, toDate('2023-08-13')) | |
, ('c', 15, toDate('2023-08-15')) -- a non-contig day | |
) | |
) | |
, mins as ( | |
-- get the first date and price we saw them at (aggregate down if needed) | |
select usr, min(d) as first_d | |
from tbl | |
group by usr | |
order by usr asc | |
) | |
, buckets as ( | |
-- aggregate the days based on the delta to the first date | |
select usr, sum(price) as price, mins.first_d, dateDiff('day', mins.first_d, d) as delta | |
from tbl | |
left join mins on mins.usr = tbl.usr | |
group by usr, delta, mins.first_d | |
) | |
-- select * from first_deltas; | |
, coalesce(lagInFrame(price, 1) over (partition by buckets.usr order by delta), 1) as lag_price -- reusable statements :D | |
, per_user as ( | |
-- get each user's ndr | |
select buckets.usr as usr | |
, mins.first_d as start | |
, delta | |
, if (lag_price = 0, 1, price/lag_price) as ndr -- prevent divide by 0 (filtered out later) | |
from buckets | |
left join mins on mins.usr = buckets.usr | |
order by buckets.usr asc, start asc, delta asc with fill | |
) | |
-- select * from per_user where delta != 0; | |
, aggs as ( | |
-- get the average ndr for each start date | |
-- could use this as final select to have tabular format | |
select | |
start -- could do toStartOfInterval here since that only changes the shown start, but not the deltas | |
, delta | |
-- , uniq(if(ndr = 0, null, usr)) as users -- don't count the user if they didn't do anything this delta | |
, uniq(usr) as users -- count them regardless of whether the earned anything | |
, round(avg(ndr), 2) as ndr_a | |
from per_user | |
where delta != 0 | |
group by start, delta | |
order by start asc, delta asc | |
) | |
-- select * from aggs; | |
-- group up in arrays for each start date | |
select start, groupArray(ndr_a) as ndr_a, groupArray(users) as users_a, groupArray(delta) as delta_a | |
from aggs | |
group by start | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is used when you are observing the delta between the previous interval and the current, rather than the first and the current.
This makes it resistant to the "honeymoon period". You can verify the values by observing that
delta=1
is the same value as the relative chart, but the following columns have different values.delta=0
is omitted as it's always1