Last Updated: August 30, 2016
· ghafran

Setup Spark with Cassandra Connector

Use this script to get Spark 2.0 to work with Cassandra.
It took me a while to figure out how to get this setup.

# install java
sudo apt-get update -y
sudo apt-get install software-properties-common -y
sudo add-apt-repository -y ppa:openjdk-r/ppa
sudo apt-get install wget -y
sudo apt-get install openjdk-8-jdk -y
sudo apt-get update -y

# make serve directory
sudo mkdir -p /srv
cd /srv

# scala 2.11 required for cassandra spark connector
sudo wget
sudo dpkg -i scala-2.11.7.deb

# get spark
sudo wget
sudo tar -zxf spark-2.0.0-bin-hadoop2.7.tgz
sudo mv spark-2.0.0-bin-hadoop2.7 spark

# build spark cassandra connector
echo "deb /" | sudo tee -a /etc/apt/sources.list.d/sbt.list
sudo apt-key adv --keyserver hkp:// --recv 642AC823
sudo apt-get install apt-transport-https -y
sudo apt-get update -y
sudo apt-get install sbt -y
git clone
cd spark-cassandra-connector
git checkout v2.0.0-M2
sudo sbt assembly -Dscala-2.11=true

# move spark cassandra connector to spark jar directory
find . -iname "*.jar" -type f -exec /bin/cp {} /srv/spark/jars/ \;

# start master
/srv/spark/sbin/ --host

# start slave
/srv/spark/sbin/ --host spark://localhost:7077

# start shell
/srv/spark/sbin/spark-shell --driver-class-path $(echo /srv/spark/jars/*.jar |sed 's/ /:/g')

# test
import org.apache.spark
import org.apache.spark._
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.cassandra
import org.apache.spark.sql.cassandra._
import com.datastax.spark
import com.datastax.spark._
import com.datastax.spark.connector
import com.datastax.spark.connector._
import com.datastax.spark.connector.cql
import com.datastax.spark.connector.cql._
import com.datastax.spark.connector.cql.CassandraConnector
import com.datastax.spark.connector.cql.CassandraConnector._

val conf = new SparkConf(true).set("", "cassandraserver")
val sc = new SparkContext("spark://localhost:7077", "test", conf)
val table = sc.cassandraTable("keyspace", "users")