Forked from krisjan-oldekamp/google_analytics_bigquery_user_mapping_table_customer.sql
Last active
October 12, 2021 07:50
-
-
Save MarkEdmondson1234/990270f3190b5c675c895c7b066be82a to your computer and use it in GitHub Desktop.
How to create a user mapping table (or Identity Graph) based on all the available user identifiers in the Google Analytics 4 BigQuery exports (like device-IDs or customer-IDs). Full article on stacktonic.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Author: Krisjan Oldekamp | |
-- https://stacktonic.com/article/create-a-user-mapping-table-based-on-the-google-analytics-4-big-query-dataset | |
DECLARE | |
lookback_window INT64 DEFAULT 90; -- How many days to lookback into the dataset to search for IDs (compared to today) | |
-- Deduplicate array of struct | |
CREATE TEMP FUNCTION | |
DEDUP(val ANY TYPE) AS (( | |
SELECT | |
ARRAY_AGG(t) | |
FROM ( | |
SELECT | |
MAX(v.timestamp) AS timestamp, | |
v.id | |
FROM | |
UNNEST(val) v | |
GROUP BY | |
v.id | |
ORDER BY | |
timestamp DESC | |
LIMIT | |
100) t )); | |
WITH | |
ga_user_ids AS ( | |
-- Select all user-IDs from the GA4 dataset in the specified time period | |
SELECT | |
* | |
FROM ( | |
SELECT | |
user_pseudo_id AS ga_client_id, | |
-- Device-ID | |
user_id AS customer_id, | |
-- Custom defined User-ID (e.g. Customer-ID) | |
# no user_properties in public GA4 dataset so this line breaks, will work in 'real' export | |
#(SELECT value.string_value FROM UNNEST(user_properties) WHERE key = "relay_id") AS customer_id_secondary, -- Secondary custom user-ID | |
'missing' AS customer_id_secondary, | |
( | |
SELECT | |
value.string_value | |
FROM | |
UNNEST(event_params) | |
WHERE | |
event_name = "page_view" | |
AND key = "gclid") AS gclid, | |
-- Click-ID | |
ecommerce.transaction_id AS transaction_id, | |
-- Order-IDs | |
MAX(event_timestamp) AS event_timestamp -- Timestamps | |
FROM | |
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*` | |
WHERE | |
_TABLE_SUFFIX BETWEEN FORMAT_DATE("%Y%m%d",DATE_SUB(CURRENT_DATE(), INTERVAL lookback_window DAY)) | |
AND FORMAT_DATE("%Y%m%d",DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)) | |
GROUP BY | |
ga_client_id, | |
customer_id, | |
customer_id_secondary, | |
gclid, | |
transaction_id ) | |
-- Filter out users without any additional user-IDs besides ga_client_id | |
WHERE | |
customer_id IS NOT NULL | |
OR customer_id_secondary IS NOT NULL | |
OR gclid IS NOT NULL | |
OR transaction_id IS NOT NULL ) | |
-- Select all identifiers and group on customer-ID. | |
SELECT | |
customer_id, | |
MAX(TIMESTAMP_MICROS(event_timestamp)) AS timestamp_last_visit, | |
-- Aggegrate customer-IDs to an array of structs | |
DEDUP(ARRAY_AGG( | |
IF | |
(ga_client_id IS NOT NULL, | |
STRUCT(TIMESTAMP_MICROS(event_timestamp) AS timestamp, | |
ga_client_id AS id), | |
NULL ) IGNORE NULLS | |
ORDER BY | |
event_timestamp DESC )) AS ga_client_id, | |
-- Aggegrate secondary customer-IDs to an array of structs | |
DEDUP(ARRAY_AGG( | |
IF | |
(customer_id_secondary IS NOT NULL, | |
STRUCT(TIMESTAMP_MICROS(event_timestamp) AS timestamp, | |
customer_id_secondary AS id), | |
NULL ) IGNORE NULLS | |
ORDER BY | |
event_timestamp DESC )) AS customer_id_secondary, | |
-- Aggegrate GCLIDs to an array of structs | |
DEDUP(ARRAY_AGG( | |
IF | |
(gclid IS NOT NULL, | |
STRUCT(TIMESTAMP_MICROS(event_timestamp) AS timestamp, | |
gclid AS id), | |
NULL ) IGNORE NULLS | |
ORDER BY | |
event_timestamp DESC )) AS gclid, | |
-- Aggegrate transaction-IDs to an array of structs | |
DEDUP(ARRAY_AGG( | |
IF | |
(transaction_id IS NOT NULL, | |
STRUCT(TIMESTAMP_MICROS(event_timestamp) AS timestamp, | |
transaction_id AS id), | |
NULL ) IGNORE NULLS | |
ORDER BY | |
event_timestamp DESC )) AS transaction_id, | |
FROM | |
ga_user_ids | |
WHERE | |
customer_id IS NOT NULL | |
GROUP BY | |
customer_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment