Created
November 7, 2015 14:20
-
-
Save rawkintrevo/6e9b28b1ae02a2aaa534 to your computer and use it in GitHub Desktop.
Download NOAA Weather Data, then Compute the average high temperature for each station
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// For parsing Stations | |
// ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt | |
// For parsing Observations | |
// ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/readme.txt | |
case class Observation(station_id: String, date: String, observation_type: String, observation_value: Float, observation_time: String) | |
val obsText = sc.textFile("ftp://anonymous:[email protected]/pub/data/ghcn/daily/by_year/1768.csv.gz") | |
val observations = obsText.map(s=>s.split(","))map(s=> Observation(s(0), s(1), s(2), s(3).toFloat, s(4))) | |
val avgTemps = observations.filter(s => s.observation_type == "TMAX").map(s => (s.station_id, (s.observation_value,1))).reduceByKey((running,next_val) => (running._1 + (next_val._1-running._1)/ (running._2 + 1), running._2 + next_val._2)) | |
avgTemps.map(s => (s._1, s._2._1/10 * 9/5+32 )).take(5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment