Last Updated: February 25, 2016
·
608
· Jeroen Rosenberg

NGram extraction using Stackable traits in Scala

The following code show cases the Stackable Traits pattern in this case to extract Ngrams from an input file of sentences

package com.github.ngram.extractor

object Boot extends App {
 val extractor = new SentenceAnalyzer
   with NGramExtraction
   with Unigrams
   with Bigrams
   with Trigrams

 val sentences =       Source.fromInputStream(getClass.getResourceAsStream("/input.txt")).getLines().toIterable
println(extractor.analyze(sentences))
}

trait NGrams { def arities: List[Int] }

trait NGramExtraction extends NGrams { override def arities = List.empty[Int] }
trait Unigrams extends NGrams { this: NGramExtraction => abstract override def arities = 1 :: super.arities }
trait Bigrams extends NGrams { this: NGramExtraction => abstract override def arities = 2 :: super.arities }
trait Trigrams extends NGrams { this: NGramExtraction => abstract override def arities = 3 :: super.arities }

trait SentenceAnalyzer {
  this: NGrams =>

  def analyze(sentences: Iterable[String]) = {
arities.flatMap { N =>
  val tokenizedSentence = sentences.map(_.split("\\s").toList)
  tokenizedSentence.flatMap(_.sliding(N)).filter(_.size == N)
   }
 }