Skip to content

Instantly share code, notes, and snippets.

@mojzis
Created August 5, 2025 23:44
Show Gist options
  • Save mojzis/b8dfd13561b0ccc0912a6e7676739201 to your computer and use it in GitHub Desktop.
Save mojzis/b8dfd13561b0ccc0912a6e7676739201 to your computer and use it in GitHub Desktop.
duckdb friendly sql described for LLMs

DuckDB Friendly SQL Quick Reference

Column Selection

-- EXCLUDE: remove columns from SELECT *
SELECT * EXCLUDE (id, created_at) FROM users

-- REPLACE: modify specific columns, keep others
SELECT * REPLACE (price * 1.1 AS price) FROM products

-- COLUMNS(): regex or lambda selection
SELECT COLUMNS('.*revenue.*') FROM sales
SELECT COLUMNS(c -> c LIKE '%total%') FROM orders
SELECT max(COLUMNS('.*amount.*')) FROM transactions

-- GROUP BY ALL: groups by all non-aggregate columns
-- ORDER BY ALL: sorts by all SELECT columns left-to-right

Joins

-- ASOF: find most recent match (time-series)
FROM trades t ASOF JOIN prices p 
ON t.symbol = p.symbol AND t.time >= p.time

-- SEMI: rows with matches (EXISTS alternative)
SELECT * FROM customers SEMI JOIN orders USING (customer_id)

-- ANTI: rows without matches (NOT EXISTS alternative)  
SELECT * FROM products ANTI JOIN order_items USING (product_id)

-- POSITIONAL: match by row position
SELECT * FROM t1 POSITIONAL JOIN t2

-- LATERAL: automatic, no keyword needed

Structs & Lists

-- Struct access
SELECT user.name, user.address.city FROM users
SELECT struct.* FROM table  -- expands all fields
SELECT struct['field'] FROM table  -- bracket notation

-- Lists (1-indexed)
SELECT list[2:4] FROM table  -- elements 2-3
SELECT list[-1] FROM table   -- last element
SELECT [1, 2, 3] AS my_list  -- list creation

-- List comprehension
SELECT [x * 2 FOR x IN numbers IF x > 0]

-- String slicing (1-indexed)
SELECT 'Hello'[1:3]  -- 'Hel'

Function Chaining

-- Dot notation instead of nesting
SELECT name.upper().replace(' ', '_').length()
-- Instead of: length(replace(upper(name), ' ', '_'))

-- Works with any compatible return type
SELECT list.filter(x -> x > 0).sum()

FROM-first Syntax

-- SELECT becomes optional
FROM users WHERE age > 21 SELECT name, email
FROM 'data.parquet' WHERE year = 2024

-- Lateral column aliases (reuse in same SELECT)
SELECT price * qty AS total, total * 0.1 AS tax

File Operations

-- Direct file querying
FROM 'data.parquet'
FROM 'logs/2024-*.csv'  -- glob patterns
FROM read_csv('data.csv', header=true)

-- UNION BY NAME: match columns by name
SELECT * FROM t1 UNION BY NAME SELECT * FROM t2

Table Transformations

-- PIVOT: long to wide (auto-detect columns)
PIVOT sales ON year USING sum(amount) GROUP BY product
PIVOT sales ON year IN (2023, 2024) USING sum(amount)

-- Multiple pivots
PIVOT data 
ON country, product  -- creates country_product columns
USING sum(sales) AS total, avg(sales) AS avg

-- UNPIVOT: wide to long
UNPIVOT wide_table ON jan, feb, mar 
INTO NAME month VALUE sales

-- Dynamic unpivot with COLUMNS
UNPIVOT wide_table 
ON COLUMNS(* EXCLUDE (id, name))
INTO NAME metric VALUE value

-- SQL standard with nulls
UNPIVOT INCLUDE NULLS (
  sales FOR month IN (jan, feb, mar)
)

Other Features

-- Trailing commas allowed
SELECT col1, col2, FROM table

-- Underscores in numbers
SELECT 1_000_000 AS million

-- Percentage LIMIT
SELECT * FROM large_table LIMIT 10%

-- Top-N aggregates
SELECT customer_id, max(amount, 3) FROM orders GROUP BY 1

Notes

  • EXCLUDE can't use COLUMNS('regex'), use lambda instead
  • Struct fields: case-insensitive. Map keys: case-sensitive
  • Lists/strings: 1-indexed (SQL), not 0-indexed (Python)
  • SUMMARIZE table_name for quick stats
@mojzis
Copy link
Author

mojzis commented Aug 5, 2025

based on the markdown version of https://duckdb.org/docs/stable/sql/dialect/friendly_sql, created with Claude

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment