Skip to content

Instantly share code, notes, and snippets.

@akiatoji
Last active June 5, 2017 01:39
Show Gist options
  • Save akiatoji/969f4fb6f3290e88e850 to your computer and use it in GitHub Desktop.
Save akiatoji/969f4fb6f3290e88e850 to your computer and use it in GitHub Desktop.
Sample build.sbt for building a Spark Job that processes complex Avro data types and Hive tables.
import sbt._
name := "CatSpark"
version := "1.0"
scalaVersion := "2.10.4"
val spark_gid = "org.apache.spark"
val spark_version = "1.3.0"
val hadoop_gid = "org.apache.hadoop"
val hadoop_version = "2.5.0-cdh5.3.2"
val hive_gid = "org.apache.hive"
val hive_version = "0.13.1-cdh5.3.2"
val excludeServletApi = ExclusionRule(organization = "javax.servlet", artifact = "servlet-api")
val excludeEclipseJetty = ExclusionRule(organization = "org.eclipse.jetty")
val excludeChill = ExclusionRule(organization = "com.twitter", artifact = "chill_2.10")
val excludeMinLog = ExclusionRule(organization = "com.esotericsoftware", artifact = "minlog")
resolvers ++= Seq(
"Typesafe repository snapshots" at "http://repo.typesafe.com/typesafe/snapshots/",
"Typesafe repository releases" at "http://repo.typesafe.com/typesafe/releases/",
"Sonatype repo" at "https://oss.sonatype.org/content/groups/scala-tools/",
"Sonatype releases" at "https://oss.sonatype.org/content/repositories/releases",
"Sonatype snapshots" at "https://oss.sonatype.org/content/repositories/snapshots",
"Sonatype staging" at "http://oss.sonatype.org/content/repositories/staging",
"Java.net Maven2 Repository" at "http://download.java.net/maven/2/",
"Twitter Repository" at "http://maven.twttr.com",
"Websudos releases" at "http://maven.websudos.co.uk/ext-release-local",
"rediscala" at "http://dl.bintray.com/etaty/maven",
"sbt-pack" at "http://repo1.maven.org/maven2/org/xerial/sbt/",
"Cloudera Repository" at "http://repository.cloudera.com/artifactory/cloudera-repos",
Resolver.mavenLocal
)
// NOTE: Spark < 1.3 has a bug with class loading.
// You *MUST* use Spark 1.3 to use Kryo with Avro specific record classes.
libraryDependencies ++= Seq(
hadoop_gid % "hadoop-common" % hadoop_version % "provided" excludeAll(excludeServletApi),
spark_gid %% "spark-core" % spark_version % "provided",
spark_gid %% "spark-sql" % spark_version % "provided",
spark_gid %% "spark-streaming" % spark_version % "provided",
spark_gid %% "spark-mllib" % spark_version % "provided",
spark_gid %% "spark-hive" % spark_version % "provided" excludeAll(excludeServletApi, excludeEclipseJetty),
"mysql" % "mysql-connector-java" % "5.1.16" % "provided",
"org.apache.avro" % "avro" % "1.7.7",
"com.twitter" %% "chill-avro" % "0.5.1",
"com.databricks" %% "spark-avro" % "0.2.0",
"com.github.scopt" %% "scopt" % "3.3.0",
"joda-time" % "joda-time" % "2.7"
// ,
// hive_gid % "hive-common" % hive_version exclude( "javax.servlet", "servlet-api"),
// hive_gid % "hive-jdbc" % hive_version exclude( "javax.servlet", "servlet-api"),
// hive_gid % "hive-metastore" % hive_version exclude( "javax.servlet", "servlet-api"),
// hive_gid % "hive-exec" % hive_version
)
libraryDependencies ++= Seq(
"com.chuusai" % "shapeless_2.10.4" % "2.1.0",
compilerPlugin("org.scalamacros" % "paradise_2.10.4" % "2.0.1")
)
assemblyMergeStrategy in assembly :=
{
case PathList("META-INF", _*) => MergeStrategy.discard
case PathList("com", "esotericsoftware", _*) => MergeStrategy.first
case x =>
val oldStrategy = (assemblyMergeStrategy in assembly).value
oldStrategy(x)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment