-
-
Save futoase/6173197 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 2.1.1 ユーザーの居住国分布 -- | |
td query -w -d book_crossing_dataset -f csv -o user_dist_of_country.csv " | |
SELECT country, cnt, ROUND(cnt/total_cnt*10000)/100 AS rate | |
FROM | |
( | |
SELECT country, COUNT(*) as cnt, 1 AS one | |
FROM users | |
WHERE country != '' AND country != 'n/a' | |
GROUP BY country | |
ORDER BY cnt DESC | |
LIMIT 30 | |
) t1 | |
JOIN | |
( | |
SELECT COUNT(country) as total_cnt, 1 AS one | |
FROM users | |
WHERE country != '' AND country != 'n/a' | |
) t2 | |
ON | |
(t1.one=t2.one) | |
" | |
-- 2.1.2 ユーザーの居住地域分布 -- | |
td query -w -d book_crossing_dataset -f csv -o user_dist_of_location.csv " | |
SELECT location1 AS state, cnt, ROUND(cnt/total_cnt*10000)/100 AS rate | |
FROM | |
( | |
SELECT location1, COUNT(*) as cnt, 1 AS one | |
FROM users | |
WHERE country = 'usa' | |
GROUP BY location1 | |
ORDER BY cnt DESC | |
LIMIT 30 | |
) t1 | |
JOIN | |
( | |
SELECT COUNT(*) as total_cnt, 1 AS one | |
FROM users | |
WHERE country = 'usa' | |
) t2 | |
ON | |
(t1.one=t2.one) | |
" | |
-- 2.2.1 ユーザーの年代分布 -- | |
td query -w -d book_crossing_dataset " | |
SELECT generation, cnt | |
FROM | |
( | |
SELECT CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation, COUNT(*) as cnt | |
FROM users | |
WHERE IF(age='NULL',0,CAST(age AS INT)) <= 65 | |
GROUP BY CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 | |
UNION ALL | |
SELECT CAST(65 AS BIGINT) AS generation, COUNT(*) as cnt | |
FROM users | |
WHERE 65 < CAST(age AS INT) | |
) t1 | |
ORDER BY generation | |
" | |
-- 2.2.2 ユーザーの年代分布 -- | |
td query -w -d book_crossing_dataset -f csv -o user_dist_of_generation.csv " | |
SELECT generation, cnt, ROUND(cnt/total_cnt*1000)/10 AS rate | |
FROM | |
( | |
SELECT CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation, COUNT(*) as cnt, 1 AS one | |
FROM users | |
WHERE 10 < IF(age='NULL',0,CAST(age AS INT)) | |
AND IF(age='NULL',0,CAST(age AS INT)) <= 65 | |
GROUP BY CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 | |
) t1 | |
JOIN | |
( | |
SELECT COUNT(*) as total_cnt, 1 AS one | |
FROM users | |
WHERE 10 < IF(age='NULL',0,CAST(age AS INT)) | |
AND IF(age='NULL',0,CAST(age AS INT)) <= 65 | |
) t2 | |
ON | |
(t1.one=t2.one) | |
ORDER BY generation | |
" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment