K-means clustering
Parameters:
n_clusters (int): number of clusters
Source code in sorix/clustering/k_means.py
| def __init__(self, n_clusters:int):
"""
Parameters:
n_clusters (int): number of clusters
"""
self.n_clusters = n_clusters
self._centroids = None
self.features_names = None
self.labels = None
|
fit
fit(features, eps=0.001, max_iters=1000)
Fit the model.
Parameters:
-
features
(tensor)
–
-
eps
(float, default:
0.001
)
–
Stop criterion, by default 0.001
-
max_iters
(int, default:
1000
)
–
Maximum number of iterations, by default 1000
Source code in sorix/clustering/k_means.py
| def fit(self,
features: Tensor,
eps:float=0.001,
max_iters:int=1000) -> None:
"""
Fit the model.
Parameters:
features (tensor): Features to predict.
eps (float, optional): Stop criterion, by default 0.001
max_iters (int, optional): Maximum number of iterations, by default 1000
"""
features_train = self._data_preprocessing_train(features)
iters = 0
while True:
iters += 1
distances = self._distances(features_train, self._centroids)
self.labels = self._new_labels(distances)
centroids_before = self._centroids
self._centroids = self._new_centroids(features_train, self.labels)
moviment = self._moviment(centroids_before, self._centroids)
if (moviment < eps) or (iters > max_iters):
break
|
predict
Predict the labels of features
Parameters:
Returns:
Source code in sorix/clustering/k_means.py
| def predict(self, features: Tensor) -> Tensor:
"""
Predict the labels of features
Parameters:
features (tensor): Features to predict.
Returns:
labels (tensor): Labels of features
"""
distances = self._distances(features, self._centroids)
labels = self._new_labels(distances)
return tensor(labels)
|
get_distances
Get distances between features and centroids
Parameters:
Returns:
-
distances ( tensor
) –
Distances betwen features and centroids
Source code in sorix/clustering/k_means.py
| def get_distances(self, features: Tensor) -> Tensor:
"""
Get distances between features and centroids
Parameters:
features (tensor): Features to predict.
Returns:
distances (tensor): Distances betwen features and centroids
"""
return tensor(self._distances(features, self._centroids))
|
get_inertia
Get inertia of features for k-centroids
Parameters:
Returns:
Source code in sorix/clustering/k_means.py
| def get_inertia(self, features: Tensor) -> float:
"""
Get inertia of features for k-centroids
Parameters:
features (tensor): Features to predict.
Returns:
inertia (float): Inertia of features
"""
distances = self._distances(features, self._centroids)
labels = self._new_labels(distances)
return smat.sum((features - self.centroids[labels])**2).item()
|