val pdata = sqlContext.load("/home/rxin/ints.parquet").select("a", "num") val sum_count = pdata.map { row => (row.getInt(0), (row.getInt(1), 1)) } .reduceByKey { (a, b) => (a._1 + b._1, a._2 + b._2) }.collect() sum_count.foreach { case (a, (sum, count)) => println(s"$a: ${sum/count}") }