NGram extraction using Stackable traits in Scala
The following code show cases the Stackable Traits pattern in this case to extract Ngrams from an input file of sentences
package com.github.ngram.extractor
object Boot extends App {
val extractor = new SentenceAnalyzer
with NGramExtraction
with Unigrams
with Bigrams
with Trigrams
val sentences = Source.fromInputStream(getClass.getResourceAsStream("/input.txt")).getLines().toIterable
println(extractor.analyze(sentences))
}
trait NGrams { def arities: List[Int] }
trait NGramExtraction extends NGrams { override def arities = List.empty[Int] }
trait Unigrams extends NGrams { this: NGramExtraction => abstract override def arities = 1 :: super.arities }
trait Bigrams extends NGrams { this: NGramExtraction => abstract override def arities = 2 :: super.arities }
trait Trigrams extends NGrams { this: NGramExtraction => abstract override def arities = 3 :: super.arities }
trait SentenceAnalyzer {
this: NGrams =>
def analyze(sentences: Iterable[String]) = {
arities.flatMap { N =>
val tokenizedSentence = sentences.map(_.split("\\s").toList)
tokenizedSentence.flatMap(_.sliding(N)).filter(_.size == N)
}
}
Written by Jeroen Rosenberg
Related protips
Have a fresh tip? Share with Coderwall community!
Post
Post a tip
Best
#Scala
Authors
Sponsored by #native_company# — Learn More
#native_title#
#native_desc#