ACM-Research-Coding-Challenge/code.py at master · parth23-dev/ACM-Research-Coding-Challenge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
from numpy import unique
from numpy import where
from sklearn.cluster import KMeans
from matplotlib import pyplot
from yellowbrick.cluster import KElbowVisualizer

# Shift data from csv to array
dp = pd.read_csv (r'/Users/Parth/Downloads/Coding-Challenge-master/ClusterPlot.csv')
dp_array = dp[['V1', 'V2']].to_numpy()

# Find the number of clusters
model = KMeans()
visualizer = KElbowVisualizer(model, k=(1,15), timings=False)

# Fit the data to the graph
visualizer.fit(dp_array)
visualizer.show()

# The KMean algorithim below shows the graph with 3 clusters

# Choose the clustering algo
model = KMeans(n_clusters=3)
model.fit(dp_array)

# Find the clusters
cluster_samp = model.predict(dp_array)
clusters = unique(cluster_samp)

# Make scatter plot using clusters
for cluster in clusters:
	find_rows = where(cluster_samp == cluster)
	pyplot.scatter(dp_array[find_rows, 0], dp_array[find_rows, 1])

# Show plot
pyplot.show()