-
-
Save futoase/6173199 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 4.1.1 ユーザーの評価回数平均 -- | |
td query -w -d book_crossing_dataset " | |
SELECT ROUND(AVG(cnt)) AS avg | |
FROM | |
( | |
SELECT user_id, COUNT(book_rating) AS cnt | |
FROM ratings | |
GROUP BY user_id | |
) t1 | |
" | |
-- 4.1.2 ユーザーの評価回数分布 -- | |
td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_count.csv " | |
SELECT cnt, user_cnt, ROUND(user_cnt/total_user_cnt*1000)/10 AS rate | |
FROM | |
( | |
SELECT cnt, COUNT(*) AS user_cnt, 1 AS one | |
FROM | |
( | |
SELECT user_id, COUNT(book_rating) AS cnt | |
FROM ratings | |
GROUP BY user_id | |
) t1 | |
GROUP BY cnt | |
ORDER BY cnt | |
LIMIT 50 | |
) o1 | |
JOIN | |
( | |
SELECT COUNT(distinct user_id) AS total_user_cnt, 1 AS one | |
FROM ratings | |
) o2 | |
ON | |
(o1.one=o2.one) | |
" | |
-- 4.2 ユーザーの評価平均分布 (0<book_rating, 5<=count(book_rating)) -- | |
td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_avg.csv " | |
SELECT avg, COUNT(*) | |
FROM | |
( | |
SELECT user_id, ROUND(AVG(book_rating)/0.5)*0.5 AS avg, COUNT(book_rating) AS cnt | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY user_id | |
HAVING 5 <= COUNT(book_rating) | |
) t1 | |
GROUP BY avg | |
ORDER BY avg | |
" | |
-- 4.3 ユーザーの年代別×評価平均分布 (0<book_rating, 5<=count(book_rating)) -- | |
td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_avg_by_generation.csv " | |
SELECT o1.generation, avg, ROUND(cnt/total_cnt*1000)/10 AS rate | |
FROM | |
( | |
SELECT generation, avg, COUNT(*) AS cnt | |
FROM | |
( | |
SELECT user_id, ROUND(AVG(book_rating)/0.5)*0.5 AS avg, COUNT(book_rating) AS cnt | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY user_id | |
HAVING 5 <= COUNT(book_rating) | |
) t1 | |
JOIN | |
( | |
SELECT user_id, CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation | |
FROM users | |
WHERE 10 < IF(age='NULL',0,CAST(age AS INT)) | |
AND IF(age='NULL',0,CAST(age AS INT)) <= 65 | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
GROUP BY generation,avg | |
) o1 | |
JOIN | |
( | |
SELECT generation, COUNT(*) AS total_cnt | |
FROM | |
( | |
SELECT user_id, COUNT(book_rating) | |
FROM ratings | |
WHERE 0 < book_rating | |
GROUP BY user_id | |
HAVING 5 <= COUNT(book_rating) | |
) t1 | |
JOIN | |
( | |
SELECT user_id, CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation | |
FROM users | |
WHERE 10 < IF(age='NULL',0,CAST(age AS INT)) | |
AND IF(age='NULL',0,CAST(age AS INT)) <= 65 | |
) t2 | |
ON | |
(t1.user_id=t2.user_id) | |
GROUP BY generation | |
) o2 | |
ON | |
(o1.generation=o2.generation) | |
ORDER BY generation, avg | |
" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment