Last Updated: April 16, 2021
·
103
· kalinin84

Clustering (Machine learning)

import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

#
# [0]
#
FILE_NAME = '../datasets/corpus_tfidf.json'

#
# [1]
#
with open(FILE_NAME, 'r', encoding='utf-8') as jsonfile:
    data = json.load(jsonfile)

#
# [2]
#
kmeans = KMeans(n_clusters=2, random_state=0).fit(data)

#
# [3]
#
X = PCA(n_components=2).fit_transform(data)

#
# [4]
#
plt.scatter(X[:, 0], X[:, 1], cmap='bwr', c=kmeans.labels_)
plt.show()