ss9qsg
Last Updated: July 11, 2016
·
44
· kalinin84

Apache Spark SQL

case class Doc(alias: String, classId: Int, typeId: Int)

val data = sc.textFile(bigFile).map(_.split("\\|"))
val docs = data.map(p => Doc(p(0).trim, p(1).trim.toInt, p(2).trim.toInt))
val df = docs.toDF()

df.show
df.printSchema 
df.groupBy("classId").count.show

df.registerTempTable("docs")
sqlContext.sql("SELECT COUNT(alias) FROM docs WHERE typeId = 99999").show
Say Thanks
Respond