danthegoodman1 · September 6, 2023 16:54 · danthegoodman1 · Aug 27, 2023
diff --git a/ch-ndr.sql b/ch-ndr.sql
 with tbl as (
 -- fake table
 select c1 as usr, c2 as price, c3 as d from VALUES (
 	('a', 10, toDate('2023-08-09'))
 	, ('a', 10, toDate('2023-08-10'))
 	, ('a', 8, toDate('2023-08-11'))
 	, ('a', 11, toDate('2023-08-12'))
 	, ('a', 12, toDate('2023-08-13'))
 	, ('b', 10, toDate('2023-08-09'))
 	, ('b', 10, toDate('2023-08-10'))
 	, ('b', 9, toDate('2023-08-11'))
 	, ('b', 11, toDate('2023-08-12'))
 	, ('b', 12, toDate('2023-08-13'))
 	, ('c', 11, toDate('2023-08-11'))
 	, ('c', 11, toDate('2023-08-12'))
 	, ('c', 15, toDate('2023-08-13'))
 	, ('c', 15, toDate('2023-08-15')) -- a non-contig day
 )
 )
 , mins as (
 -- get the first date and price we saw them at (aggregate down if needed)
 select usr, min(d) as first_d
 from tbl
 group by usr
 order by usr asc
 )
 , buckets as (
 -- aggregate the days based on the delta to the first date
 select usr, sum(price) as price, mins.first_d, dateDiff('day', mins.first_d, d) as delta
 from tbl
 left join mins on mins.usr = tbl.usr
 group by usr, delta, mins.first_d
 )
 , first_deltas as (
 select usr, argMin(price, delta) as first_delta_price
 from buckets
 group by usr
 )
 -- select * from first_deltas;
 , per_user as (
 -- get each user's ndr
 select buckets.usr as usr
 , mins.first_d as start
 , delta
 , price/first_deltas.first_delta_price as ndr -- could do multiple quantiles here, etc.
 from buckets
 left join mins on mins.usr = buckets.usr
 left join first_deltas on first_deltas.usr = buckets.usr
 order by buckets.usr asc, start asc, delta asc with fill
 )
 -- select * from per_user;
 , aggs as (
 -- get the average ndr for each start date
 -- could use this as final select to have tabular format
 select
 start -- could do toStartOfInterval here since that only changes the shown start, but not the deltas
 , delta
 -- , uniq(if(ndr = 0, null, usr)) as users -- don't count the user if they didn't do anything this delta
 , uniq(usr) as users -- count them regardless of whether the earned anything
 , round(avg(ndr), 2) as ndr_a
 from per_user
 group by start, delta
 order by start asc, delta asc
 )
 -- select * from aggs;
 -- group up in arrays for each start date
 select start, groupArray(ndr_a) as ndr_a, groupArray(users) as users_a, groupArray(delta) as delta_a
 from aggs
 group by start
 ;
	with tbl as (
	-- fake table
	select c1 as usr, c2 as price, c3 as d from VALUES (
	('a', 10, toDate('2023-08-09'))
	, ('a', 10, toDate('2023-08-10'))
	, ('a', 8, toDate('2023-08-11'))
	, ('a', 11, toDate('2023-08-12'))
	, ('a', 12, toDate('2023-08-13'))
	, ('b', 10, toDate('2023-08-09'))
	, ('b', 10, toDate('2023-08-10'))
	, ('b', 9, toDate('2023-08-11'))
	, ('b', 11, toDate('2023-08-12'))
	, ('b', 12, toDate('2023-08-13'))
	, ('c', 11, toDate('2023-08-11'))
	, ('c', 11, toDate('2023-08-12'))
	, ('c', 15, toDate('2023-08-13'))
	, ('c', 15, toDate('2023-08-15')) -- a non-contig day
	)
	)
	, mins as (
	-- get the first date and price we saw them at (aggregate down if needed)
	select usr, min(d) as first_d
	from tbl
	group by usr
	order by usr asc
	)
	, buckets as (
	-- aggregate the days based on the delta to the first date
	select usr, sum(price) as price, mins.first_d, dateDiff('day', mins.first_d, d) as delta
	from tbl
	left join mins on mins.usr = tbl.usr
	group by usr, delta, mins.first_d
	)
	, first_deltas as (
	select usr, argMin(price, delta) as first_delta_price
	from buckets
	group by usr
	)
	-- select * from first_deltas;
	, per_user as (
	-- get each user's ndr
	select buckets.usr as usr
	, mins.first_d as start
	, delta
	, price/first_deltas.first_delta_price as ndr -- could do multiple quantiles here, etc.
	from buckets
	left join mins on mins.usr = buckets.usr
	left join first_deltas on first_deltas.usr = buckets.usr
	order by buckets.usr asc, start asc, delta asc with fill
	)
	-- select * from per_user;
	, aggs as (
	-- get the average ndr for each start date
	-- could use this as final select to have tabular format
	select
	start -- could do toStartOfInterval here since that only changes the shown start, but not the deltas
	, delta
	-- , uniq(if(ndr = 0, null, usr)) as users -- don't count the user if they didn't do anything this delta
	, uniq(usr) as users -- count them regardless of whether the earned anything
	, round(avg(ndr), 2) as ndr_a
	from per_user
	group by start, delta
	order by start asc, delta asc
	)
	-- select * from aggs;
	-- group up in arrays for each start date
	select start, groupArray(ndr_a) as ndr_a, groupArray(users) as users_a, groupArray(delta) as delta_a
	from aggs
	group by start
	;
No results found