Created
July 9, 2019 11:27
-
-
Save mkowoods/c16db71b9c1c3081bef62c6fab66841d to your computer and use it in GitHub Desktop.
Notes for Will Buy on Return visit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Source Data Set: https://console.cloud.google.com/bigquery?p=data-to-insights&d=ecommerce&t=web_analytics&page=table | |
## TRAIN | |
CREATE OR REPLACE MODEL `ecommerce.classification_model_2` | |
OPTIONS | |
(model_type='logistic_reg', labels = ['will_buy_on_return_visit']) AS | |
WITH all_visitor_stats AS ( | |
SELECT | |
fullvisitorid, | |
IF(COUNTIF(totals.transactions > 0 AND totals.newVisits IS NULL) > 0, 1, 0) AS will_buy_on_return_visit | |
FROM `data-to-insights.ecommerce.web_analytics` | |
GROUP BY fullvisitorid | |
) | |
# add in new features | |
SELECT * EXCEPT(unique_session_id) FROM ( | |
SELECT | |
CONCAT(fullvisitorid, CAST(visitId AS STRING)) AS unique_session_id, | |
# labels | |
will_buy_on_return_visit, | |
MAX(CAST(h.eCommerceAction.action_type AS INT64)) AS latest_ecommerce_progress, | |
# behavior on the site | |
IFNULL(totals.bounces, 0) AS bounces, | |
IFNULL(totals.timeOnSite, 0) AS time_on_site, | |
totals.pageviews, | |
# where the visitor came from | |
trafficSource.source, | |
trafficSource.medium, | |
channelGrouping, | |
# mobile or desktop | |
device.deviceCategory, | |
# geographic | |
IFNULL(geoNetwork.country, "") AS country | |
FROM `data-to-insights.ecommerce.web_analytics`, | |
UNNEST(hits) AS h | |
JOIN all_visitor_stats USING(fullvisitorid) | |
WHERE 1=1 | |
# only predict for new visits | |
AND totals.newVisits = 1 | |
AND date BETWEEN '20160801' AND '20170430' # train 9 months | |
GROUP BY | |
unique_session_id, | |
will_buy_on_return_visit, | |
bounces, | |
time_on_site, | |
totals.pageviews, | |
trafficSource.source, | |
trafficSource.medium, | |
channelGrouping, | |
device.deviceCategory, | |
country | |
); | |
##EVALUATION | |
#standardSQL | |
SELECT | |
roc_auc, | |
CASE | |
WHEN roc_auc > .9 THEN 'good' | |
WHEN roc_auc > .8 THEN 'fair' | |
WHEN roc_auc > .7 THEN 'not great' | |
ELSE 'poor' END AS model_quality | |
FROM | |
ML.EVALUATE(MODEL ecommerce.classification_model_2, ( | |
WITH all_visitor_stats AS ( | |
SELECT | |
fullvisitorid, | |
IF(COUNTIF(totals.transactions > 0 AND totals.newVisits IS NULL) > 0, 1, 0) AS will_buy_on_return_visit | |
FROM `data-to-insights.ecommerce.web_analytics` | |
GROUP BY fullvisitorid | |
) | |
# add in new features | |
SELECT * EXCEPT(unique_session_id) FROM ( | |
SELECT | |
CONCAT(fullvisitorid, CAST(visitId AS STRING)) AS unique_session_id, | |
# labels | |
will_buy_on_return_visit, | |
MAX(CAST(h.eCommerceAction.action_type AS INT64)) AS latest_ecommerce_progress, | |
# behavior on the site | |
IFNULL(totals.bounces, 0) AS bounces, | |
IFNULL(totals.timeOnSite, 0) AS time_on_site, | |
totals.pageviews, | |
# where the visitor came from | |
trafficSource.source, | |
trafficSource.medium, | |
channelGrouping, | |
# mobile or desktop | |
device.deviceCategory, | |
# geographic | |
IFNULL(geoNetwork.country, "") AS country | |
FROM `data-to-insights.ecommerce.web_analytics`, | |
UNNEST(hits) AS h | |
JOIN all_visitor_stats USING(fullvisitorid) | |
WHERE 1=1 | |
# only predict for new visits | |
AND totals.newVisits = 1 | |
AND date BETWEEN '20170501' AND '20170630' # eval 2 months | |
GROUP BY | |
unique_session_id, | |
will_buy_on_return_visit, | |
bounces, | |
time_on_site, | |
totals.pageviews, | |
trafficSource.source, | |
trafficSource.medium, | |
channelGrouping, | |
device.deviceCategory, | |
country | |
) | |
)); | |
## PREDICTION | |
SELECT | |
* | |
FROM | |
ml.PREDICT(MODEL `ecommerce.classification_model_2`, | |
( | |
WITH all_visitor_stats AS ( | |
SELECT | |
fullvisitorid, | |
IF(COUNTIF(totals.transactions > 0 AND totals.newVisits IS NULL) > 0, 1, 0) AS will_buy_on_return_visit | |
FROM `data-to-insights.ecommerce.web_analytics` | |
GROUP BY fullvisitorid | |
) | |
SELECT | |
CONCAT(fullvisitorid, '-',CAST(visitId AS STRING)) AS unique_session_id, | |
# labels | |
will_buy_on_return_visit, | |
MAX(CAST(h.eCommerceAction.action_type AS INT64)) AS latest_ecommerce_progress, | |
# behavior on the site | |
IFNULL(totals.bounces, 0) AS bounces, | |
IFNULL(totals.timeOnSite, 0) AS time_on_site, | |
totals.pageviews, | |
# where the visitor came from | |
trafficSource.source, | |
trafficSource.medium, | |
channelGrouping, | |
# mobile or desktop | |
device.deviceCategory, | |
# geographic | |
IFNULL(geoNetwork.country, "") AS country | |
FROM `data-to-insights.ecommerce.web_analytics`, | |
UNNEST(hits) AS h | |
JOIN all_visitor_stats USING(fullvisitorid) | |
WHERE | |
# only predict for new visits | |
totals.newVisits = 1 | |
AND date BETWEEN '20170701' AND '20170801' # test 1 month | |
GROUP BY | |
unique_session_id, | |
will_buy_on_return_visit, | |
bounces, | |
time_on_site, | |
totals.pageviews, | |
trafficSource.source, | |
trafficSource.medium, | |
channelGrouping, | |
device.deviceCategory, | |
country | |
) | |
) | |
ORDER BY | |
predicted_will_buy_on_return_visit DESC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment