Last Updated: January 27, 2021
·
20
· kalinin84

R: Exploratory data analysis

data <- read.csv("example.csv")
ncol(data)
nrow(data)
head(data, 5)
tail(data, 5)
summary(data)
cor(data)
table(data$target)
aggregate(data, list(data$target), mean)
data$target <- as.factor(data$target)
colors <- c("#5580E2", "#D2381D")
plot(data, col = colors[data$target])
hist(data$alpha, col = "#5D82B9", border = "black")
library(ggplot2)

ggplot(data, aes(alpha, beta, colour = target)) + 
    geom_point()
library(randomForest)

model <- randomForest(
    target ~ ., 
    data = data, 
    importance = TRUE,
    proximity = TRUE,
    ntree = 500
)
model$type
model$confusion
importance(model, type = 2)