You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
-- EXCLUDE: remove columns from SELECT *SELECT* EXCLUDE (id, created_at) FROM users
-- REPLACE: modify specific columns, keep othersSELECT* REPLACE (price *1.1AS price) FROM products
-- COLUMNS(): regex or lambda selectionSELECT COLUMNS('.*revenue.*') FROM sales
SELECT COLUMNS(c -> c LIKE'%total%') FROM orders
SELECTmax(COLUMNS('.*amount.*')) FROM transactions
-- GROUP BY ALL: groups by all non-aggregate columns-- ORDER BY ALL: sorts by all SELECT columns left-to-right
Joins
-- ASOF: find most recent match (time-series)FROM trades t ASOF JOIN prices p
ONt.symbol=p.symbolANDt.time>=p.time-- SEMI: rows with matches (EXISTS alternative)SELECT*FROM customers SEMI JOIN orders USING (customer_id)
-- ANTI: rows without matches (NOT EXISTS alternative) SELECT*FROM products ANTI JOIN order_items USING (product_id)
-- POSITIONAL: match by row positionSELECT*FROM t1 POSITIONAL JOIN t2
-- LATERAL: automatic, no keyword needed
Structs & Lists
-- Struct accessSELECTuser.name, user.address.city FROM users
SELECT struct.*FROM table -- expands all fieldsSELECT struct['field'] FROM table -- bracket notation-- Lists (1-indexed)SELECT list[2:4] FROM table -- elements 2-3SELECT list[-1] FROM table -- last elementSELECT [1, 2, 3] AS my_list -- list creation-- List comprehensionSELECT [x *2 FOR x IN numbers IF x >0]
-- String slicing (1-indexed)SELECT'Hello'[1:3] -- 'Hel'
Function Chaining
-- Dot notation instead of nestingSELECTname.upper().replace('', '_').length()
-- Instead of: length(replace(upper(name), ' ', '_'))-- Works with any compatible return typeSELECTlist.filter(x -> x >0).sum()
FROM-first Syntax
-- SELECT becomes optionalFROM users WHERE age >21SELECT name, email
FROM'data.parquet'WHERE year =2024-- Lateral column aliases (reuse in same SELECT)SELECT price * qty AS total, total *0.1AS tax
File Operations
-- Direct file queryingFROM'data.parquet'FROM'logs/2024-*.csv'-- glob patternsFROM read_csv('data.csv', header=true)
-- UNION BY NAME: match columns by nameSELECT*FROM t1 UNION BY NAME SELECT*FROM t2
Table Transformations
-- PIVOT: long to wide (auto-detect columns)
PIVOT sales ON year USING sum(amount) GROUP BY product
PIVOT sales ON year IN (2023, 2024) USING sum(amount)
-- Multiple pivots
PIVOT data
ON country, product -- creates country_product columns
USING sum(sales) AS total, avg(sales) AS avg
-- UNPIVOT: wide to long
UNPIVOT wide_table ON jan, feb, mar
INTO NAME month VALUE sales
-- Dynamic unpivot with COLUMNS
UNPIVOT wide_table
ON COLUMNS(* EXCLUDE (id, name))
INTO NAME metric VALUE value
-- SQL standard with nulls
UNPIVOT INCLUDE NULLS (
sales FOR month IN (jan, feb, mar)
)
Other Features
-- Trailing commas allowedSELECT col1, col2, FROM table
-- Underscores in numbersSELECT 1_000_000 AS million
-- Percentage LIMITSELECT*FROM large_table LIMIT10%
-- Top-N aggregatesSELECT customer_id, max(amount, 3) FROM orders GROUP BY1
Notes
EXCLUDE can't use COLUMNS('regex'), use lambda instead
based on the markdown version of https://duckdb.org/docs/stable/sql/dialect/friendly_sql, created with Claude