Skip to content

Instantly share code, notes, and snippets.

@lalitkapoor
Last active February 18, 2023 03:23
Show Gist options
  • Save lalitkapoor/5bf25af193df27c1b60a6bafb5158a53 to your computer and use it in GitHub Desktop.
Save lalitkapoor/5bf25af193df27c1b60a6bafb5158a53 to your computer and use it in GitHub Desktop.
TimescaleDB hierarchical continuous aggregate performance drop by ~20x for large datasets
DROP MATERIALIZED VIEW IF EXISTS hca_daily;
DROP MATERIALIZED VIEW IF EXISTS hca_hourly;
DROP MATERIALIZED VIEW IF EXISTS hca_minutely;
DROP TABLE IF EXISTS hca_test_events;
CREATE TABLE hca_test_events (
customer_id TEXT NOT NULL,
ts timestamptz NOT NULL,
measures JSONB
);
CREATE INDEX idx_hca_test_events_customer_id_ts ON hca_test_events(customer_id, ts);
SELECT create_hypertable('hca_test_events', 'ts', chunk_time_interval => INTERVAL '1 day');
CREATE UNIQUE INDEX idx_hca_test_events_unique ON hca_test_events(customer_id, ts);
ALTER TABLE hca_test_events SET (
timescaledb.compress,
timescaledb.compress_orderby = 'ts DESC',
timescaledb.compress_segmentby = 'customer_id'
);
SELECT add_compression_policy('hca_test_events', INTERVAL '90 days');
INSERT INTO hca_test_events
SELECT
(array['A', 'B', 'C'])[floor(random() * 3 + 1)] as customer_id,
gs.ts as ts,
jsonb_build_object('milliseconds', (random()*5000)::int) as measures
FROM generate_series('2022-01-01 00:00:00'::timestamptz, '2023-02-17 00:00:00', '1 second') AS gs(ts)
;
DROP MATERIALIZED VIEW IF EXISTS hca_minutely;
CREATE MATERIALIZED VIEW hca_minutely WITH (timescaledb.continuous) AS
SELECT
customer_id,
time_bucket('1 minute', ts) AS ts,
ROUND(SUM((measures->'milliseconds')::decimal(19, 6)), 2) AS value
FROM hca_test_events
WHERE
measures->>'milliseconds' IS NOT NULL
GROUP BY 1, 2
;
SELECT add_continuous_aggregate_policy('hca_minutely',
start_offset => INTERVAL '1 day',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour'
);
DROP MATERIALIZED VIEW IF EXISTS hca_hourly;
CREATE MATERIALIZED VIEW hca_hourly WITH (timescaledb.continuous) AS
SELECT
customer_id,
time_bucket('1 hour', ts) AS ts,
SUM(value) AS value
FROM hca_minutely
GROUP BY 1, 2
;
SELECT add_continuous_aggregate_policy('hca_hourly',
start_offset => INTERVAL '1 day',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour'
);
DROP MATERIALIZED VIEW IF EXISTS hca_daily;
CREATE MATERIALIZED VIEW hca_daily WITH (timescaledb.continuous) AS
SELECT
customer_id,
time_bucket('1 day', ts) AS ts,
SUM(value) AS value
FROM hca_hourly
GROUP BY 1, 2
;
SELECT add_continuous_aggregate_policy('hca_daily',
start_offset => INTERVAL '3 day',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour'
);
SELECT * FROM hca_minutely WHERE ts > '2023-02-01' AND customer_id = 'A' ORDER BY ts ASC; -- look at performance
SELECT * FROM hca_hourly WHERE ts > '2023-02-01' AND customer_id = 'A' ORDER BY ts ASC; -- look at performance
SELECT * FROM hca_daily WHERE ts > '2023-02-01' AND customer_id = 'A' ORDER BY ts ASC; -- loook at performance
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment