Last active
February 2, 2023 13:54
-
-
Save chespinoza/12f2523ef3ea103d62faa152c03e8411 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
preprocessed_orders AS ( | |
SELECT | |
* | |
FROM ( | |
SELECT | |
nozzle_customer_name, | |
customer_id, | |
amazon_order_id, | |
item_promotion_discount, | |
item_price, | |
quantity, | |
DATETIME(TIMESTAMP(purchase_date), "America/Los_Angeles") AS purchase_date, | |
DATE_TRUNC(DATETIME(TIMESTAMP(purchase_date), "America/Los_Angeles"), month) AS purchase_window, | |
asin, | |
-- "value" column is necessary to calculate the "highest value" for NTB. | |
-- Find more context about it in the "ntb" CTE comments. | |
quantity * item_price AS value | |
FROM | |
`nozzle.rotterdam.combined_orders` | |
WHERE | |
nozzle_customer_name = "A22QNUQNA4QHN8" -- Wake - unybrands | |
AND marketplace_id = "ATVPDKIKX0DER" | |
AND customer_id IS NOT NULL ) | |
WHERE | |
purchase_window < DATE_TRUNC("2023-01-01", month) ), | |
-- NTB = New To brand | |
-- | |
-- This actually means: | |
-- "The first order a customer made with a given Amazon merchant." | |
-- | |
-- If there is more than 1 ASIN on such "first order", then a specific ASIN | |
-- with the "highest value" is considered an NTB. And the formula for value is: | |
-- | |
-- quantity * NON-DISCOUNTED item price | |
-- | |
-- As a result of the logic described above, there can be only one ASIN | |
-- considered as NTB. | |
ntb AS ( | |
-- What's important in the context of the LTV query, a customer is | |
-- NTB for the whole period. So if a customer made their first order | |
-- on 2022-01-01, it is considered NTB for the entire 2022-01 period. | |
-- That's also why ntb CTE should return both customer_id and period. | |
SELECT | |
DISTINCT nozzle_customer_name, | |
customer_id, | |
purchase_date, | |
cohort | |
FROM ( | |
SELECT | |
nozzle_customer_name, | |
customer_id, | |
purchase_date, | |
purchase_window AS cohort, | |
asin, | |
RANK() OVER(PARTITION BY customer_id ORDER BY purchase_date, value DESC) AS ntb_rank | |
FROM | |
preprocessed_orders ) AS p -- The "p" alias is necessary to make the filter clause work in both of its | |
-- query contexts. The gist is that otherwise the "asin" column would be | |
-- ambiguous. See the second {asin_filter} call to understand why fully. | |
WHERE | |
ntb_rank = 1 | |
AND cohort >= DATE_TRUNC("2022-07-01", month) | |
AND cohort <= DATE_TRUNC("2023-01-01", month) ), | |
second_purchases AS ( | |
-- This query returns all the 2nd purchases per customer id | |
-- now I need to count and group by cohort | |
SELECT | |
customer_id, | |
amazon_order_id, | |
asin, | |
cohort | |
FROM ( | |
SELECT | |
po.customer_id AS customer_id, | |
po.amazon_order_id AS amazon_order_id, | |
po.asin AS asin, | |
ntb.cohort AS cohort, | |
RANK() OVER(PARTITION BY po.customer_id ORDER BY po.purchase_date ASC) AS second_purchase_rank | |
FROM | |
preprocessed_orders po | |
JOIN | |
ntb | |
ON | |
ntb.customer_id = po.customer_id | |
WHERE | |
po.purchase_date > ntb.purchase_date ) | |
WHERE | |
second_purchase_rank = 1 ), | |
repeat_customers AS ( | |
SELECT | |
cohort, | |
COUNT(*) AS repeat_customers | |
FROM | |
second_purchases | |
GROUP BY | |
cohort | |
ORDER BY | |
cohort ), | |
total_customers AS ( | |
SELECT | |
po.purchase_window AS cohort, | |
COUNT(*) AS total_customers | |
FROM ( | |
SELECT | |
po.*, | |
ROW_NUMBER() OVER (PARTITION BY po.customer_id ORDER BY po.purchase_date ASC) AS seqnum | |
FROM | |
preprocessed_orders po) po | |
WHERE | |
po.seqnum =1 | |
AND po.purchase_window >= DATE_TRUNC("2022-07-01", month) | |
AND po.purchase_window <= DATE_TRUNC("2023-01-01", month) | |
GROUP BY | |
po.purchase_window | |
ORDER BY | |
po.purchase_window ) | |
SELECT | |
tc.cohort, | |
tc.total_customers, | |
rc.repeat_customers, | |
SAFE_DIVIDE(rc.repeat_customers, tc.total_customers) * 100 AS retention_rate, | |
FROM | |
total_customers tc | |
JOIN | |
repeat_customers rc | |
ON | |
tc.cohort = rc.cohort | |
ORDER BY | |
tc.cohort |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment