Last active
June 5, 2017 01:39
-
-
Save akiatoji/969f4fb6f3290e88e850 to your computer and use it in GitHub Desktop.
Sample build.sbt for building a Spark Job that processes complex Avro data types and Hive tables.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sbt._ | |
name := "CatSpark" | |
version := "1.0" | |
scalaVersion := "2.10.4" | |
val spark_gid = "org.apache.spark" | |
val spark_version = "1.3.0" | |
val hadoop_gid = "org.apache.hadoop" | |
val hadoop_version = "2.5.0-cdh5.3.2" | |
val hive_gid = "org.apache.hive" | |
val hive_version = "0.13.1-cdh5.3.2" | |
val excludeServletApi = ExclusionRule(organization = "javax.servlet", artifact = "servlet-api") | |
val excludeEclipseJetty = ExclusionRule(organization = "org.eclipse.jetty") | |
val excludeChill = ExclusionRule(organization = "com.twitter", artifact = "chill_2.10") | |
val excludeMinLog = ExclusionRule(organization = "com.esotericsoftware", artifact = "minlog") | |
resolvers ++= Seq( | |
"Typesafe repository snapshots" at "http://repo.typesafe.com/typesafe/snapshots/", | |
"Typesafe repository releases" at "http://repo.typesafe.com/typesafe/releases/", | |
"Sonatype repo" at "https://oss.sonatype.org/content/groups/scala-tools/", | |
"Sonatype releases" at "https://oss.sonatype.org/content/repositories/releases", | |
"Sonatype snapshots" at "https://oss.sonatype.org/content/repositories/snapshots", | |
"Sonatype staging" at "http://oss.sonatype.org/content/repositories/staging", | |
"Java.net Maven2 Repository" at "http://download.java.net/maven/2/", | |
"Twitter Repository" at "http://maven.twttr.com", | |
"Websudos releases" at "http://maven.websudos.co.uk/ext-release-local", | |
"rediscala" at "http://dl.bintray.com/etaty/maven", | |
"sbt-pack" at "http://repo1.maven.org/maven2/org/xerial/sbt/", | |
"Cloudera Repository" at "http://repository.cloudera.com/artifactory/cloudera-repos", | |
Resolver.mavenLocal | |
) | |
// NOTE: Spark < 1.3 has a bug with class loading. | |
// You *MUST* use Spark 1.3 to use Kryo with Avro specific record classes. | |
libraryDependencies ++= Seq( | |
hadoop_gid % "hadoop-common" % hadoop_version % "provided" excludeAll(excludeServletApi), | |
spark_gid %% "spark-core" % spark_version % "provided", | |
spark_gid %% "spark-sql" % spark_version % "provided", | |
spark_gid %% "spark-streaming" % spark_version % "provided", | |
spark_gid %% "spark-mllib" % spark_version % "provided", | |
spark_gid %% "spark-hive" % spark_version % "provided" excludeAll(excludeServletApi, excludeEclipseJetty), | |
"mysql" % "mysql-connector-java" % "5.1.16" % "provided", | |
"org.apache.avro" % "avro" % "1.7.7", | |
"com.twitter" %% "chill-avro" % "0.5.1", | |
"com.databricks" %% "spark-avro" % "0.2.0", | |
"com.github.scopt" %% "scopt" % "3.3.0", | |
"joda-time" % "joda-time" % "2.7" | |
// , | |
// hive_gid % "hive-common" % hive_version exclude( "javax.servlet", "servlet-api"), | |
// hive_gid % "hive-jdbc" % hive_version exclude( "javax.servlet", "servlet-api"), | |
// hive_gid % "hive-metastore" % hive_version exclude( "javax.servlet", "servlet-api"), | |
// hive_gid % "hive-exec" % hive_version | |
) | |
libraryDependencies ++= Seq( | |
"com.chuusai" % "shapeless_2.10.4" % "2.1.0", | |
compilerPlugin("org.scalamacros" % "paradise_2.10.4" % "2.0.1") | |
) | |
assemblyMergeStrategy in assembly := | |
{ | |
case PathList("META-INF", _*) => MergeStrategy.discard | |
case PathList("com", "esotericsoftware", _*) => MergeStrategy.first | |
case x => | |
val oldStrategy = (assemblyMergeStrategy in assembly).value | |
oldStrategy(x) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment