Last Updated: January 25, 2021
·
30
· kalinin84

Apache Spark

val raw = sc.textFile("main.log")
raw.filter(line => line.contains("delta:")).count()
raw.map(s => (s, 1)).reduceByKey((a, b) => a + b).collect()
object Preprocessing {
    def transform(data: String): (Int, Float, Float) = {
        val items = data.split(':')
        val target = if (items(0) == "delta") 1 else 0
        val alpha = items(1).toFloat / 100
        val beta = items(2).toFloat
        (target, alpha, beta)
    }
}

val data = raw.map(Preprocessing.transform)
data.saveAsTextFile("sample")