Created
November 10, 2015 10:09
-
-
Save kakakazuma/d6977b8fbd7c48c39c65 to your computer and use it in GitHub Desktop.
Spark sample application, wordcount from S3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package sample | |
import org.apache.spark.{SparkConf, SparkContext} | |
object Sample1 { | |
val myAccessKey = "xxxxxxx" | |
val mySecretKey = "yyyyyyy" | |
val bucket = "your_bucket" | |
val filepath = "your_path" | |
def main(args: Array[String]) { | |
val conf = new SparkConf().setAppName("sample") | |
val sc = new SparkContext(conf) | |
val hadoopConf = sc.hadoopConfiguration; | |
hadoopConf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem") | |
hadoopConf.set("fs.s3n.awsAccessKeyId",myAccessKey) | |
hadoopConf.set("fs.s3n.awsSecretAccessKey",mySecretKey) | |
val s3data = sc.textFile("s3n://" + bucket + "/" + filepath) | |
val numDog = s3data.filter( line => line.contains("Dog")).count() | |
val numCat = s3data.filter( line => line.contains("Cat")).count() | |
val total = s3data.count() | |
println("total lines: %s".format(total)) | |
println("Lines with Dog: %s, Lines with Cat: %s".format(numDog, numCat)) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment