Skip to content

Instantly share code, notes, and snippets.

@joebowbeer
Last active February 4, 2025 09:29
Show Gist options
  • Save joebowbeer/f347eb18b676a8536afed058a64ef14e to your computer and use it in GitHub Desktop.
Save joebowbeer/f347eb18b676a8536afed058a64ef14e to your computer and use it in GitHub Desktop.
Queries from KùzuDB's Benchmark study ported to DuckPGQ
-- Adapted from https://github.com/prrao87/kuzudb-study
-- Load vertices and edges
CREATE TABLE City AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/nodes/cities.parquet';
CREATE TABLE Country AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/nodes/countries.parquet';
CREATE TABLE Interest AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/nodes/interests.parquet';
CREATE TABLE Person AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/nodes/persons.parquet';
CREATE TABLE State AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/nodes/states.parquet';
CREATE TABLE CityIn AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/edges/city_in.parquet';
CREATE TABLE Follows AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/edges/follows.parquet';
CREATE TABLE HasInterest AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/edges/interested_in.parquet';
CREATE TABLE LivesIn AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/edges/lives_in.parquet';
CREATE TABLE StateIn AS SELECT * FROM
'https://github.com/prrao87/kuzudb-study/raw/main/data/output/edges/state_in.parquet';
-- Create property graph
INSTALL duckpgq FROM community;
LOAD duckpgq;
CREATE PROPERTY GRAPH snb
VERTEX TABLES (
City,
Country,
Interest,
Person,
State
)
EDGE TABLES (
CityIn SOURCE KEY ('from') REFERENCES City (id)
DESTINATION KEY ('to') REFERENCES State (id),
Follows SOURCE KEY ('from') REFERENCES Person (id)
DESTINATION KEY ('to') REFERENCES Person (id),
HasInterest SOURCE KEY ('from') REFERENCES Person (id)
DESTINATION KEY ('to') REFERENCES Interest (id),
LivesIn SOURCE KEY ('from') REFERENCES Person (id)
DESTINATION KEY ('to') REFERENCES City (id),
StateIn SOURCE KEY ('from') REFERENCES State (id)
DESTINATION KEY ('to') REFERENCES Country (id)
);
-- Queries
-- PRAGMA enable_profiling;
-- 1. Who are the top 3 most-followed persons?
FROM GRAPH_TABLE (snb
MATCH (follower:Person)-[follows:Follows]->(person:Person)
COLUMNS (person.id AS personID, person.name, follower.id AS followerID)
)
SELECT personID, name, COUNT(followerID) AS numFollowers
GROUP BY ALL ORDER BY numFollowers DESC LIMIT 3;
-- 2. In which city does the most-followed person live?
WITH
mfp AS (
FROM GRAPH_TABLE (snb
MATCH (follower:Person)-[follows:Follows]->(person:Person)
COLUMNS (person.id AS personID, person.name, follower.id AS followerID)
)
SELECT personID, name, COUNT(followerID) AS numFollowers
GROUP BY ALL ORDER BY numFollowers DESC LIMIT 1
)
FROM
mfp,
GRAPH_TABLE (snb
MATCH (person:Person)-[li:LivesIn]->(city:City)
COLUMNS (person.id AS personID, city.city, city.state, city.country)
) addr
SELECT mfp.personID, mfp.name, mfp.numFollowers, addr.city, addr.state, addr.country
WHERE mfp.personID = addr.personID;
-- 3. Which 5 cities in a particular country have the lowest average age in the network?
FROM GRAPH_TABLE (snb
MATCH (p:Person)-[li:LivesIn]->(c:City)-[ci:CityIn]->(s:State)-[si:StateIn]->(co:Country)
WHERE co.country = 'United States' /* $country */
COLUMNS (c.city, p.age)
)
SELECT city, AVG(age) AS averageAge
GROUP BY ALL ORDER BY averageAge LIMIT 5;
-- 4. How many persons between ages 30-40 are there in each country?
FROM GRAPH_TABLE (snb
MATCH (p:Person)-[li:LivesIn]->(c:City)-[ci:CityIn]->(s:State)-[si:StateIn]->(country:Country)
WHERE p.age >= 30 AND p.age <= 40 /* $age_lower, $age_upper */
COLUMNS (country.country)
)
SELECT country AS countries, COUNT(country) AS personCounts
GROUP BY ALL ORDER BY personCounts LIMIT 3;
-- 5. How many men in London, United Kingdom have an interest in skiing?
FROM GRAPH_TABLE (snb
MATCH (i:Interest)<-[hi:HasInterest]-(p:Person)-[li:LivesIn]->(c:City)
WHERE lower(i.interest) = lower('Skiing') /* $interest */
AND lower(p.gender) = lower('Male') /* $gender */
AND c.city = 'London' /* $city */
AND c.country = 'United Kingdom' /* $country */
)
SELECT COUNT(*) as numPersons;
-- 6. Which city has the maximum number of women that like Tennis?
FROM GRAPH_TABLE (snb
MATCH (i:Interest)<-[hi:HasInterest]-(p:Person)-[li:LivesIn]->(c:City)
WHERE lower(i.interest) = lower('Tennis') /* $interest */
AND lower(p.gender) = lower('Female') /* $gender */
COLUMNS (c.city, c.country)
)
SELECT COUNT(*) as numPersons, city, country
GROUP BY ALL ORDER BY numPersons DESC LIMIT 5;
-- 7. Which U.S. state has the maximum number of persons between the age 23-30 who enjoy photography?
FROM GRAPH_TABLE (snb
MATCH (i:Interest)<-[hi:HasInterest]-(p:Person)-[li:LivesIn]->(c:City)-[ci:CityIn]->(s:State)
WHERE s.country = 'United States' /* $country */
AND p.age >= 25 AND p.age <= 30 /* $age_lower, $age_upper */
AND lower(i.interest) = lower('Photography') /* $interest */
COLUMNS (c.state)
)
SELECT COUNT(*) as numPersons, state
GROUP BY ALL ORDER BY numPersons DESC LIMIT 1;
-- 8. How many second-degree paths exist in the graph?
FROM GRAPH_TABLE (snb MATCH (a:Person)-[r1:Follows]->(b:Person)-[r2:Follows]->(c:Person))
SELECT COUNT(*) AS numPaths;
-- 9. How many paths exist in the graph through persons age 50 to persons above age 25?
FROM GRAPH_TABLE (snb
MATCH (a:Person)-[r1:Follows]->(b:Person)-[r2:Follows]->(c:Person)
WHERE b.age < 50 AND c.age > 25 /* $age_1, $age_2 */
)
SELECT COUNT(*) AS numPaths;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment