-
-
Save futoase/6173194 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 6.1 共起分析:Simpson係数の高いブックペア上位20 -- | |
td query -w -d book_crossing_dataset " | |
SELECT r2.title AS title1, | |
r2.year_of_publication AS year1, | |
r2.cnt AS cnt1, | |
r3.title AS title2, | |
r3.year_of_publication AS year2, | |
r3.cnt AS cnt2, | |
r1.cnt AS intersection, | |
r1.cnt/IF(r2.cnt<r3.cnt,r2.cnt,r3.cnt)*100 AS simpson | |
FROM | |
( | |
SELECT o1.isbn AS isbn1, o2.isbn AS isbn2, COUNT(*) AS cnt | |
FROM | |
( | |
SELECT t1.isbn AS isbn, user_id | |
FROM | |
( | |
SELECT isbn, user_id | |
FROM ratings | |
GROUP BY isbn, user_id | |
) t1 | |
JOIN | |
( | |
SELECT isbn, COUNT(book_rating) | |
FROM ratings | |
GROUP BY isbn | |
HAVING 500 < COUNT(book_rating) | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
) o1 | |
JOIN | |
( | |
SELECT t1.isbn AS isbn, user_id | |
FROM | |
( | |
SELECT isbn, user_id | |
FROM ratings | |
GROUP BY isbn, user_id | |
) t1 | |
JOIN | |
( | |
SELECT isbn, COUNT(book_rating) | |
FROM ratings | |
GROUP BY isbn | |
HAVING 500 < COUNT(book_rating) | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
) o2 | |
ON | |
(o1.user_id=o2.user_id) | |
GROUP BY o1.isbn, o2.isbn | |
HAVING o1.isbn != o2.isbn | |
) r1 | |
JOIN | |
( | |
SELECT t1.isbn AS isbn, year_of_publication,title, cnt | |
FROM | |
( | |
SELECT isbn, COUNT(book_rating) AS cnt | |
FROM ratings | |
GROUP BY isbn | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
year_of_publication, | |
v['book_title'] AS title | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
) r2 | |
ON | |
(r1.isbn1=r2.isbn) | |
JOIN | |
( | |
SELECT t1.isbn AS isbn, year_of_publication,title, cnt | |
FROM | |
( | |
SELECT isbn, COUNT(book_rating) AS cnt | |
FROM ratings | |
GROUP BY isbn | |
) t1 | |
JOIN | |
( | |
SELECT isbn, | |
year_of_publication, | |
v['book_title'] AS title | |
FROM books | |
) t2 | |
ON | |
(t1.isbn=t2.isbn) | |
) r3 | |
ON | |
(r1.isbn2=r3.isbn) | |
ORDER BY simpson DESC | |
LIMIT 40 | |
" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment