tmcgrath · January 6, 2016 17:32 · tmcgrath · Jan 6, 2016 · Arvind282 · Jul 4, 2016
diff --git a/Spark SQL with Scala using mySQL (JDBC) data source b/Spark SQL with Scala using mySQL (JDBC) data source
 todd-mcgraths-macbook-pro:spark-1.4.1-bin-hadoop2.4 toddmcgrath$ bin/spark-shell --jars mysql-connector-java-5.1.38-bin.jar 
 Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 1.4.1
      /_/

 Using Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_66)
 Type in expressions to have them evaluated.
 Type :help for more information.
 Spark context available as sc.
 SQL context available as sqlContext.

 scala> val dataframe_mysql = sqlContext.read.format("jdbc").option("url", "jdbc:mysql://localhost/sparksql").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "baby_names").option("user", "root").option("password", "root").load()
 dataframe_mysql: org.apache.spark.sql.DataFrame = [id: bigint, year: int, first_name: string, county: string, sex: string, count: int]

 scala> dataframe_mysql.show
 +--+----+----------+-----------+---+-----+
 |id|year|first_name|     county|sex|count|
 +--+----+----------+-----------+---+-----+
 | 1|2012|   DOMINIC|     CAYUGA|  M|    6|
 | 2|2012|   ADDISON|   ONONDAGA|  F|   14|
 | 3|2012|     JULIA|   ONONDAGA|  F|   15|
 | 4|2012|   WILLIAM|WESTCHESTER|  M|   41|
 | 5|2012|    AMELIA|   ONONDAGA|  F|   15|
 | 6|2012|      LILY|   ONONDAGA|  F|   15|
 | 7|2012|     AIDAN|WESTCHESTER|  M|   16|
 | 8|2012|    STEVEN|WESTCHESTER|  M|   16|
 | 9|2012|       MAX|WESTCHESTER|  M|   16|
 |10|2012|  BENJAMIN|WESTCHESTER|  M|   40|
 |11|2012|    JUSTIN|WESTCHESTER|  M|   39|
 |12|2012|   ABIGAIL|   ONONDAGA|  F|   15|
 |13|2012| SEBASTIAN|WESTCHESTER|  M|   39|
 |14|2012|   BRANDON|WESTCHESTER|  M|   38|
 |15|2012|    JOSHUA|    SUFFOLK|  M|   56|
 |16|2012|       MIA|   ONONDAGA|  F|   16|
 |17|2012|     EMILY|   ONONDAGA|  F|   16|
 |18|2012|     GRACE|   ONONDAGA|  F|   17|
 |19|2012|     JASON|    SUFFOLK|  M|   55|
 |20|2012|    THOMAS|    SUFFOLK|  M|   55|
 +--+----+----------+-----------+---+-----+


 scala> dataframe_mysql.registerTempTable("names")

 scala> dataframe_mysql.sqlContext.sql("select * from names limit 10").collect.foreach(println)
 [1,2012,DOMINIC,CAYUGA,M,6]
 [2,2012,ADDISON,ONONDAGA,F,14]
 [3,2012,JULIA,ONONDAGA,F,15]
 [4,2012,WILLIAM,WESTCHESTER,M,41]
 [5,2012,AMELIA,ONONDAGA,F,15]
 [6,2012,LILY,ONONDAGA,F,15]
 [7,2012,AIDAN,WESTCHESTER,M,16]
 [8,2012,STEVEN,WESTCHESTER,M,16]
 [9,2012,MAX,WESTCHESTER,M,16]
 [10,2012,BENJAMIN,WESTCHESTER,M,40]
	todd-mcgraths-macbook-pro:spark-1.4.1-bin-hadoop2.4 toddmcgrath$ bin/spark-shell --jars mysql-connector-java-5.1.38-bin.jar
	Welcome to
	____ __
	/ __/__ ___ _____/ /__
	_\ \/ _ \/ _ `/ __/ '_/
	/___/ .__/\_,_/_/ /_/\_\ version 1.4.1
	/_/

	Using Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_66)
	Type in expressions to have them evaluated.
	Type :help for more information.
	Spark context available as sc.
	SQL context available as sqlContext.

	scala> val dataframe_mysql = sqlContext.read.format("jdbc").option("url", "jdbc:mysql://localhost/sparksql").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "baby_names").option("user", "root").option("password", "root").load()
	dataframe_mysql: org.apache.spark.sql.DataFrame = [id: bigint, year: int, first_name: string, county: string, sex: string, count: int]

	scala> dataframe_mysql.show
	+--+----+----------+-----------+---+-----+
	\|id\|year\|first_name\| county\|sex\|count\|
	+--+----+----------+-----------+---+-----+
	\| 1\|2012\| DOMINIC\| CAYUGA\| M\| 6\|
	\| 2\|2012\| ADDISON\| ONONDAGA\| F\| 14\|
	\| 3\|2012\| JULIA\| ONONDAGA\| F\| 15\|
	\| 4\|2012\| WILLIAM\|WESTCHESTER\| M\| 41\|
	\| 5\|2012\| AMELIA\| ONONDAGA\| F\| 15\|
	\| 6\|2012\| LILY\| ONONDAGA\| F\| 15\|
	\| 7\|2012\| AIDAN\|WESTCHESTER\| M\| 16\|
	\| 8\|2012\| STEVEN\|WESTCHESTER\| M\| 16\|
	\| 9\|2012\| MAX\|WESTCHESTER\| M\| 16\|
	\|10\|2012\| BENJAMIN\|WESTCHESTER\| M\| 40\|
	\|11\|2012\| JUSTIN\|WESTCHESTER\| M\| 39\|
	\|12\|2012\| ABIGAIL\| ONONDAGA\| F\| 15\|
	\|13\|2012\| SEBASTIAN\|WESTCHESTER\| M\| 39\|
	\|14\|2012\| BRANDON\|WESTCHESTER\| M\| 38\|
	\|15\|2012\| JOSHUA\| SUFFOLK\| M\| 56\|
	\|16\|2012\| MIA\| ONONDAGA\| F\| 16\|
	\|17\|2012\| EMILY\| ONONDAGA\| F\| 16\|
	\|18\|2012\| GRACE\| ONONDAGA\| F\| 17\|
	\|19\|2012\| JASON\| SUFFOLK\| M\| 55\|
	\|20\|2012\| THOMAS\| SUFFOLK\| M\| 55\|
	+--+----+----------+-----------+---+-----+


	scala> dataframe_mysql.registerTempTable("names")

	scala> dataframe_mysql.sqlContext.sql("select * from names limit 10").collect.foreach(println)
	[1,2012,DOMINIC,CAYUGA,M,6]
	[2,2012,ADDISON,ONONDAGA,F,14]
	[3,2012,JULIA,ONONDAGA,F,15]
	[4,2012,WILLIAM,WESTCHESTER,M,41]
	[5,2012,AMELIA,ONONDAGA,F,15]
	[6,2012,LILY,ONONDAGA,F,15]
	[7,2012,AIDAN,WESTCHESTER,M,16]
	[8,2012,STEVEN,WESTCHESTER,M,16]
	[9,2012,MAX,WESTCHESTER,M,16]
	[10,2012,BENJAMIN,WESTCHESTER,M,40]