Last active
April 4, 2020 08:02
-
-
Save n0531m/0c542252da8474dcf342db8add0ab88f to your computer and use it in GitHub Desktop.
covic19_publicdataset_locationname_fluctuation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#DECLARE # PRECISION INT64 DEFAULT 6; | |
DECLARE PRECISION_GEOHASH INT64 DEFAULT 2; | |
WITH # first table : only aggregating the rows with exact match on country_region, province_state, longitude, latitude | |
# also rounding lat/lng to a set precision. (defined in declard param) | |
# also adding a geohash value based point point | |
t1 AS | |
( SELECT CASE | |
WHEN country_region IN ("South Korea", | |
"Korea, South", | |
"Republic of Korea") THEN "South Korea" | |
WHEN country_region IN ("Mainland China") THEN "China" | |
WHEN country_region IN ("Vietnam", | |
"Viet Nam") THEN "Vietnam" | |
WHEN country_region IN ("Bahamas", | |
"The Bahamas") THEN "Bahamas" | |
WHEN country_region IN ("Russia", | |
"Russian Federation") THEN "Russia" | |
WHEN country_region IN ("Gambia", | |
"The Gambia") THEN "Gambia" | |
WHEN country_region IN ("Moldova", | |
"Republic of Moldova") THEN "Moldova" | |
WHEN country_region IN ("Gambia", | |
"Gambia, The") THEN "Gambia" | |
WHEN country_region IN ("Taiwan", | |
"Taiwan*", | |
"Taipei and environs") THEN "Taiwan" | |
WHEN country_region IN ("Republic of Ireland", | |
"Ireland") THEN "Ireland" | |
WHEN country_region IN ("Iran (Islamic Republic of)", | |
"Iran") THEN "Iran" | |
WHEN country_region IN ("Czechia", | |
"Czech Republic") THEN "Czech Republic" | |
WHEN province_state IN ("Hong Kong") THEN "Hong Kong" | |
WHEN province_state IN ("Macau") THEN "Macau" | |
WHEN country_region IN ("Guam") | |
AND province_state IS NULL THEN "US" | |
WHEN country_region IN ("Cayman Islands") | |
AND province_state IS NULL THEN "United Kingdom" | |
WHEN country_region IN ("Aruba") | |
AND province_state IS NULL THEN "Netherlands" | |
WHEN country_region IN ("Curacao") | |
AND province_state IS NULL THEN "Netherlands" | |
WHEN country_region IN ("Guadeloupe") | |
AND province_state IS NULL THEN "France" | |
WHEN country_region IN ("Mayotte") | |
AND province_state IS NULL THEN "France" | |
WHEN country_region IN ("French Guiana") | |
AND province_state IS NULL THEN "France" | |
WHEN country_region IN ("Saint Barthelemy") | |
AND province_state IS NULL THEN "France" | |
WHEN country_region IN ("Gibraltar") | |
AND province_state IS NULL THEN "United Kingdom" | |
WHEN country_region IN ("US") | |
AND province_state IN ("Puerto Rico") THEN "Puerto Rico" | |
ELSE trim(country_region) | |
END AS country_region, | |
CASE | |
WHEN country_region = province_state THEN NULL | |
WHEN country_region IN ("US") | |
AND province_state IN ("Virgin Islands", | |
"Virgin Islands, U.S.", | |
"United States Virgin Islands") THEN "Virgin Islands" | |
WHEN country_region IN ("US") | |
AND province_state IN ("Santa Clara, CA", | |
"Santa Clara County, CA") THEN "Santa Clara Country, CA" | |
WHEN country_region IN ("US") | |
AND province_state IN ("Orange, CA", | |
"Orange County, CA") THEN "Orange Country, CA" | |
WHEN province_state IN ("Fench Guiana") THEN "French Guiana" | |
WHEN country_region IN ("Guam") | |
AND province_state IS NULL THEN "Guam" | |
WHEN country_region IN ("Cayman Islands") | |
AND province_state IS NULL THEN "Cayman Islands" | |
WHEN country_region IN ("Aruba") | |
AND province_state IS NULL THEN "Aruba" | |
WHEN country_region IN ("Curacao") | |
AND province_state IS NULL THEN "Curacao" | |
WHEN country_region IN ("Guadeloupe") | |
AND province_state IS NULL THEN "Guadeloupe" | |
WHEN country_region IN ("Mayotte") | |
AND province_state IS NULL THEN "Mayotte" | |
WHEN country_region IN ("French Guiana") | |
AND province_state IS NULL THEN "French Guiana" | |
WHEN country_region IN ("US") | |
AND province_state IN ("Puerto Rico") THEN NULL | |
WHEN country_region IN ("Saint Barthelemy") | |
AND province_state IS NULL THEN "Saint Barthelemy" | |
WHEN country_region IN ("Gibraltar") | |
AND province_state IS NULL THEN "Gibraltar" | |
WHEN country_region IN ("United Kingdom") | |
AND province_state IN ("UK") THEN NULL | |
WHEN province_state IN ("Hong Kong") THEN NULL | |
WHEN province_state IN ("Macau") THEN NULL | |
WHEN province_state IN ("Taiwan") THEN NULL #when province_state IN ("", | |
"") THEN "" | |
ELSE trim(province_state) | |
END AS province_state, | |
#ROUND(latitude,PRECISION) AS lat, | |
latitude, | |
longitude, | |
#ROUND(longitude,PRECISION) AS lng, | |
ST_GEOGPOINT(longitude,latitude) AS point, | |
ST_GEOHASH(ST_GEOGPOINT(longitude, latitude), PRECISION_GEOHASH) AS geohash, | |
COUNT(*) AS numOfRows | |
FROM `bigquery-public-data.covid19_jhu_csse.summary` | |
GROUP BY country_region, | |
province_state, | |
longitude, | |
latitude), # second table aggregated based on rounded numbers | |
t2 AS | |
( SELECT country_region, | |
province_state, # lat AS latitude, | |
# lng AS longitude, | |
latitude, | |
longitude, | |
ST_ASTEXT(point) AS point, | |
geohash , | |
SUM(numOfRows) AS numOfRows | |
FROM t1 | |
GROUP BY country_region, | |
province_state, | |
latitude, | |
longitude, | |
geohash, | |
ST_ASTEXT(point)), | |
t3 AS | |
( SELECT * | |
FROM t2 | |
WHERE # excluding datapoints with missing lat/lng | |
geohash IS NOT NULL # excluding datapoints with latlng of (0,0) | |
AND geohash!=st_geohash(st_geogpoint(0, 0), PRECISION_GEOHASH) #WHERE # country_region LIKE "%Mal%" #where province_state is not null and country_region is null | |
#WHERE # country_region LIKE "%Korea%" | |
ORDER BY geohash), | |
t4 AS | |
( SELECT * | |
FROM t3), | |
t5 AS ( | |
SELECT geohash, | |
#ARRAY_AGG( STRUCT(country_region, # province_state)) as labels | |
ARRAY_AGG(DISTINCT CONCAT( IF (country_region IS NULL, "[null]", country_region), # " || ", IF (province_state IS NULL, "[null]", province_state))) AS labels # ORDER BY | |
# country_region, province_state) AS labels | |
, sum(numOfRows) AS numOfRows | |
FROM t4 | |
GROUP BY geohash #, #country_region, #province_state ) | |
SELECT geohash, | |
labels, | |
numOfRows | |
FROM t5 | |
WHERE ARRAY_LENGTH(labels)>1 | |
ORDER BY geohash |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment