Iris Dataset¶
In [1]:
Copied!
#Uncomment the next line and run this cell to install sorix
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
#Uncomment the next line and run this cell to install sorix
#!pip install 'sorix @ git+https://github.com/Mitchell-Mirano/sorix.git@main'
In [2]:
Copied!
import pandas as pd
import matplotlib.pyplot as plt
from sorix.clustering import Kmeans
from sorix import tensor
import pandas as pd
import matplotlib.pyplot as plt
from sorix.clustering import Kmeans
from sorix import tensor
In [3]:
Copied!
data=pd.read_csv("../data/Iris.csv")
data.head()
data=pd.read_csv("../data/Iris.csv")
data.head()
Out[3]:
| Id | SepalLengthCm | SepalWidthCm | PetalLengthCm | PetalWidthCm | Species | |
|---|---|---|---|---|---|---|
| 0 | 1 | 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
| 1 | 2 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
| 2 | 3 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
| 3 | 4 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
| 4 | 5 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
In [4]:
Copied!
data['labels'] = data.Species.map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})
data.head()
data['labels'] = data.Species.map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})
data.head()
Out[4]:
| Id | SepalLengthCm | SepalWidthCm | PetalLengthCm | PetalWidthCm | Species | labels | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa | 0 |
| 1 | 2 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa | 0 |
| 2 | 3 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa | 0 |
| 3 | 4 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa | 0 |
| 4 | 5 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa | 0 |
In [5]:
Copied!
plt.figure(figsize=(12,8))
plt.scatter(data['PetalLengthCm'],data['PetalWidthCm'],s=80, c=data['labels'])
plt.figure(figsize=(12,8))
plt.scatter(data['PetalLengthCm'],data['PetalWidthCm'],s=80, c=data['labels'])
Out[5]:
<matplotlib.collections.PathCollection at 0x7fd200b0bcb0>
In [6]:
Copied!
X=data[['PetalLengthCm','PetalWidthCm','PetalLengthCm','PetalWidthCm']].to_numpy()
X = tensor(X)
X
X=data[['PetalLengthCm','PetalWidthCm','PetalLengthCm','PetalWidthCm']].to_numpy()
X = tensor(X)
X
Out[6]:
tensor([[1.4, 0.2, 1.4, 0.2],
[1.4, 0.2, 1.4, 0.2],
[1.3, 0.2, 1.3, 0.2],
[1.5, 0.2, 1.5, 0.2],
[1.4, 0.2, 1.4, 0.2],
[1.7, 0.4, 1.7, 0.4],
[1.4, 0.3, 1.4, 0.3],
[1.5, 0.2, 1.5, 0.2],
[1.4, 0.2, 1.4, 0.2],
[1.5, 0.1, 1.5, 0.1],
[1.5, 0.2, 1.5, 0.2],
[1.6, 0.2, 1.6, 0.2],
[1.4, 0.1, 1.4, 0.1],
[1.1, 0.1, 1.1, 0.1],
[1.2, 0.2, 1.2, 0.2],
[1.5, 0.4, 1.5, 0.4],
[1.3, 0.4, 1.3, 0.4],
[1.4, 0.3, 1.4, 0.3],
[1.7, 0.3, 1.7, 0.3],
[1.5, 0.3, 1.5, 0.3],
[1.7, 0.2, 1.7, 0.2],
[1.5, 0.4, 1.5, 0.4],
[1. , 0.2, 1. , 0.2],
[1.7, 0.5, 1.7, 0.5],
[1.9, 0.2, 1.9, 0.2],
[1.6, 0.2, 1.6, 0.2],
[1.6, 0.4, 1.6, 0.4],
[1.5, 0.2, 1.5, 0.2],
[1.4, 0.2, 1.4, 0.2],
[1.6, 0.2, 1.6, 0.2],
[1.6, 0.2, 1.6, 0.2],
[1.5, 0.4, 1.5, 0.4],
[1.5, 0.1, 1.5, 0.1],
[1.4, 0.2, 1.4, 0.2],
[1.5, 0.1, 1.5, 0.1],
[1.2, 0.2, 1.2, 0.2],
[1.3, 0.2, 1.3, 0.2],
[1.5, 0.1, 1.5, 0.1],
[1.3, 0.2, 1.3, 0.2],
[1.5, 0.2, 1.5, 0.2],
[1.3, 0.3, 1.3, 0.3],
[1.3, 0.3, 1.3, 0.3],
[1.3, 0.2, 1.3, 0.2],
[1.6, 0.6, 1.6, 0.6],
[1.9, 0.4, 1.9, 0.4],
[1.4, 0.3, 1.4, 0.3],
[1.6, 0.2, 1.6, 0.2],
[1.4, 0.2, 1.4, 0.2],
[1.5, 0.2, 1.5, 0.2],
[1.4, 0.2, 1.4, 0.2],
[4.7, 1.4, 4.7, 1.4],
[4.5, 1.5, 4.5, 1.5],
[4.9, 1.5, 4.9, 1.5],
[4. , 1.3, 4. , 1.3],
[4.6, 1.5, 4.6, 1.5],
[4.5, 1.3, 4.5, 1.3],
[4.7, 1.6, 4.7, 1.6],
[3.3, 1. , 3.3, 1. ],
[4.6, 1.3, 4.6, 1.3],
[3.9, 1.4, 3.9, 1.4],
[3.5, 1. , 3.5, 1. ],
[4.2, 1.5, 4.2, 1.5],
[4. , 1. , 4. , 1. ],
[4.7, 1.4, 4.7, 1.4],
[3.6, 1.3, 3.6, 1.3],
[4.4, 1.4, 4.4, 1.4],
[4.5, 1.5, 4.5, 1.5],
[4.1, 1. , 4.1, 1. ],
[4.5, 1.5, 4.5, 1.5],
[3.9, 1.1, 3.9, 1.1],
[4.8, 1.8, 4.8, 1.8],
[4. , 1.3, 4. , 1.3],
[4.9, 1.5, 4.9, 1.5],
[4.7, 1.2, 4.7, 1.2],
[4.3, 1.3, 4.3, 1.3],
[4.4, 1.4, 4.4, 1.4],
[4.8, 1.4, 4.8, 1.4],
[5. , 1.7, 5. , 1.7],
[4.5, 1.5, 4.5, 1.5],
[3.5, 1. , 3.5, 1. ],
[3.8, 1.1, 3.8, 1.1],
[3.7, 1. , 3.7, 1. ],
[3.9, 1.2, 3.9, 1.2],
[5.1, 1.6, 5.1, 1.6],
[4.5, 1.5, 4.5, 1.5],
[4.5, 1.6, 4.5, 1.6],
[4.7, 1.5, 4.7, 1.5],
[4.4, 1.3, 4.4, 1.3],
[4.1, 1.3, 4.1, 1.3],
[4. , 1.3, 4. , 1.3],
[4.4, 1.2, 4.4, 1.2],
[4.6, 1.4, 4.6, 1.4],
[4. , 1.2, 4. , 1.2],
[3.3, 1. , 3.3, 1. ],
[4.2, 1.3, 4.2, 1.3],
[4.2, 1.2, 4.2, 1.2],
[4.2, 1.3, 4.2, 1.3],
[4.3, 1.3, 4.3, 1.3],
[3. , 1.1, 3. , 1.1],
[4.1, 1.3, 4.1, 1.3],
[6. , 2.5, 6. , 2.5],
[5.1, 1.9, 5.1, 1.9],
[5.9, 2.1, 5.9, 2.1],
[5.6, 1.8, 5.6, 1.8],
[5.8, 2.2, 5.8, 2.2],
[6.6, 2.1, 6.6, 2.1],
[4.5, 1.7, 4.5, 1.7],
[6.3, 1.8, 6.3, 1.8],
[5.8, 1.8, 5.8, 1.8],
[6.1, 2.5, 6.1, 2.5],
[5.1, 2. , 5.1, 2. ],
[5.3, 1.9, 5.3, 1.9],
[5.5, 2.1, 5.5, 2.1],
[5. , 2. , 5. , 2. ],
[5.1, 2.4, 5.1, 2.4],
[5.3, 2.3, 5.3, 2.3],
[5.5, 1.8, 5.5, 1.8],
[6.7, 2.2, 6.7, 2.2],
[6.9, 2.3, 6.9, 2.3],
[5. , 1.5, 5. , 1.5],
[5.7, 2.3, 5.7, 2.3],
[4.9, 2. , 4.9, 2. ],
[6.7, 2. , 6.7, 2. ],
[4.9, 1.8, 4.9, 1.8],
[5.7, 2.1, 5.7, 2.1],
[6. , 1.8, 6. , 1.8],
[4.8, 1.8, 4.8, 1.8],
[4.9, 1.8, 4.9, 1.8],
[5.6, 2.1, 5.6, 2.1],
[5.8, 1.6, 5.8, 1.6],
[6.1, 1.9, 6.1, 1.9],
[6.4, 2. , 6.4, 2. ],
[5.6, 2.2, 5.6, 2.2],
[5.1, 1.5, 5.1, 1.5],
[5.6, 1.4, 5.6, 1.4],
[6.1, 2.3, 6.1, 2.3],
[5.6, 2.4, 5.6, 2.4],
[5.5, 1.8, 5.5, 1.8],
[4.8, 1.8, 4.8, 1.8],
[5.4, 2.1, 5.4, 2.1],
[5.6, 2.4, 5.6, 2.4],
[5.1, 2.3, 5.1, 2.3],
[5.1, 1.9, 5.1, 1.9],
[5.9, 2.3, 5.9, 2.3],
[5.7, 2.5, 5.7, 2.5],
[5.2, 2.3, 5.2, 2.3],
[5. , 1.9, 5. , 1.9],
[5.2, 2. , 5.2, 2. ],
[5.4, 2.3, 5.4, 2.3],
[5.1, 1.8, 5.1, 1.8]], dtype=sorix.float64)
In [7]:
Copied!
model= Kmeans(n_clusters=3)
model.fit(X)
model= Kmeans(n_clusters=3)
model.fit(X)
In [8]:
Copied!
pred_labels = model.predict(X)
plt.figure(figsize=(12,8))
plt.scatter(data['PetalLengthCm'],data['PetalWidthCm'],s=80, c=pred_labels)
for centroid in model.centroids:
plt.scatter(centroid[0],centroid[1], s=150)
pred_labels = model.predict(X)
plt.figure(figsize=(12,8))
plt.scatter(data['PetalLengthCm'],data['PetalWidthCm'],s=80, c=pred_labels)
for centroid in model.centroids:
plt.scatter(centroid[0],centroid[1], s=150)
In [9]:
Copied!
pred_labels = model.predict(X)
plt.figure(figsize=(12,8))
plt.scatter(data['PetalLengthCm'],data['PetalWidthCm'],s=80, c=pred_labels)
for centroid in model.centroids:
plt.scatter(centroid[0],centroid[1], s=150)
pred_labels = model.predict(X)
plt.figure(figsize=(12,8))
plt.scatter(data['PetalLengthCm'],data['PetalWidthCm'],s=80, c=pred_labels)
for centroid in model.centroids:
plt.scatter(centroid[0],centroid[1], s=150)
In [10]:
Copied!
inertias = []
for i in range(2,11):
model = Kmeans(n_clusters=i)
model.fit(X)
inertias.append(model.get_inertia(X))
import matplotlib.pyplot as plt
plt.plot(range(2,11), inertias, marker='o')
inertias = []
for i in range(2,11):
model = Kmeans(n_clusters=i)
model.fit(X)
inertias.append(model.get_inertia(X))
import matplotlib.pyplot as plt
plt.plot(range(2,11), inertias, marker='o')
Out[10]:
[<matplotlib.lines.Line2D at 0x7fd20033cf50>]