Skip to content

Instantly share code, notes, and snippets.

@hhbyyh
Created February 24, 2017 18:18
Show Gist options
  • Save hhbyyh/06fcf3fdc8f6edda971847bcb5783d99 to your computer and use it in GitHub Desktop.
Save hhbyyh/06fcf3fdc8f6edda971847bcb5783d99 to your computer and use it in GitHub Desktop.
import org.apache.log4j.{Level, Logger}
import org.apache.spark.ml.fpm.FPGrowth
import org.apache.spark.sql.SparkSession
object KosarakTest {
def main(args: Array[String]): Unit = {
Logger.getLogger("org").setLevel(Level.WARN)
val spark = SparkSession.builder().master("local[4]").appName("sss").getOrCreate()
val sc = spark.sparkContext
import spark.implicits._
//http://fimi.ua.ac.be/data/kosarak.dat
val df = sc.textFile("/home/yuhao/workspace/data/fpm/kosarak.dat", 4)
.map(_.trim().split(" ").map(_.toInt).distinct).map(a => (a)).toDF()
df.show()
val model = new FPGrowth()
.setFeaturesCol("value")
.setMinSupport(0.1)
.fit(df)
model.freqItemsets.show()
model.associationRules.show()
val st = System.nanoTime()
model.transform(df).show()
println((System.nanoTime() - st) / 1e9)
spark.stop()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment