Last active
February 18, 2023 03:23
-
-
Save lalitkapoor/5bf25af193df27c1b60a6bafb5158a53 to your computer and use it in GitHub Desktop.
TimescaleDB hierarchical continuous aggregate performance drop by ~20x for large datasets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP MATERIALIZED VIEW IF EXISTS hca_daily; | |
DROP MATERIALIZED VIEW IF EXISTS hca_hourly; | |
DROP MATERIALIZED VIEW IF EXISTS hca_minutely; | |
DROP TABLE IF EXISTS hca_test_events; | |
CREATE TABLE hca_test_events ( | |
customer_id TEXT NOT NULL, | |
ts timestamptz NOT NULL, | |
measures JSONB | |
); | |
CREATE INDEX idx_hca_test_events_customer_id_ts ON hca_test_events(customer_id, ts); | |
SELECT create_hypertable('hca_test_events', 'ts', chunk_time_interval => INTERVAL '1 day'); | |
CREATE UNIQUE INDEX idx_hca_test_events_unique ON hca_test_events(customer_id, ts); | |
ALTER TABLE hca_test_events SET ( | |
timescaledb.compress, | |
timescaledb.compress_orderby = 'ts DESC', | |
timescaledb.compress_segmentby = 'customer_id' | |
); | |
SELECT add_compression_policy('hca_test_events', INTERVAL '90 days'); | |
INSERT INTO hca_test_events | |
SELECT | |
(array['A', 'B', 'C'])[floor(random() * 3 + 1)] as customer_id, | |
gs.ts as ts, | |
jsonb_build_object('milliseconds', (random()*5000)::int) as measures | |
FROM generate_series('2022-01-01 00:00:00'::timestamptz, '2023-02-17 00:00:00', '1 second') AS gs(ts) | |
; | |
DROP MATERIALIZED VIEW IF EXISTS hca_minutely; | |
CREATE MATERIALIZED VIEW hca_minutely WITH (timescaledb.continuous) AS | |
SELECT | |
customer_id, | |
time_bucket('1 minute', ts) AS ts, | |
ROUND(SUM((measures->'milliseconds')::decimal(19, 6)), 2) AS value | |
FROM hca_test_events | |
WHERE | |
measures->>'milliseconds' IS NOT NULL | |
GROUP BY 1, 2 | |
; | |
SELECT add_continuous_aggregate_policy('hca_minutely', | |
start_offset => INTERVAL '1 day', | |
end_offset => INTERVAL '1 hour', | |
schedule_interval => INTERVAL '1 hour' | |
); | |
DROP MATERIALIZED VIEW IF EXISTS hca_hourly; | |
CREATE MATERIALIZED VIEW hca_hourly WITH (timescaledb.continuous) AS | |
SELECT | |
customer_id, | |
time_bucket('1 hour', ts) AS ts, | |
SUM(value) AS value | |
FROM hca_minutely | |
GROUP BY 1, 2 | |
; | |
SELECT add_continuous_aggregate_policy('hca_hourly', | |
start_offset => INTERVAL '1 day', | |
end_offset => INTERVAL '1 hour', | |
schedule_interval => INTERVAL '1 hour' | |
); | |
DROP MATERIALIZED VIEW IF EXISTS hca_daily; | |
CREATE MATERIALIZED VIEW hca_daily WITH (timescaledb.continuous) AS | |
SELECT | |
customer_id, | |
time_bucket('1 day', ts) AS ts, | |
SUM(value) AS value | |
FROM hca_hourly | |
GROUP BY 1, 2 | |
; | |
SELECT add_continuous_aggregate_policy('hca_daily', | |
start_offset => INTERVAL '3 day', | |
end_offset => INTERVAL '1 hour', | |
schedule_interval => INTERVAL '1 hour' | |
); | |
SELECT * FROM hca_minutely WHERE ts > '2023-02-01' AND customer_id = 'A' ORDER BY ts ASC; -- look at performance | |
SELECT * FROM hca_hourly WHERE ts > '2023-02-01' AND customer_id = 'A' ORDER BY ts ASC; -- look at performance | |
SELECT * FROM hca_daily WHERE ts > '2023-02-01' AND customer_id = 'A' ORDER BY ts ASC; -- loook at performance |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment