Source code for reVX.utilities.cluster_methods

# -*- coding: utf-8 -*-
"""
Clustering Methods
"""
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize


[docs]class ClusteringMethods: """ Base class of clustering methods """ @staticmethod def _normalize_values(arr, norm=None, axis=None): """ Normalize values in array by column Parameters ---------- arr : ndarray ndarray of values extracted from meta shape (n samples, with m features) norm : str Normalization method to use (see sklearn.preprocessing.normalize) if None range normalize axis : int Axis to normalize along Returns --------- arr : ndarray array with values normalized by column shape (n samples, with m features) """ if norm: arr = normalize(arr, norm=norm, axis=axis) else: if np.issubdtype(arr.dtype, np.integer): arr = arr.astype(float) min_all = arr.min(axis=axis) max_all = arr.max(axis=axis) range_all = max_all - min_all if axis is not None: pos = range_all == 0 range_all[pos] = 1 arr -= min_all arr /= range_all return arr
[docs] @staticmethod def kmeans(data, **kwargs): """ Cluster based on kmeans methodology """ kmeans = KMeans(random_state=0, **kwargs) results = kmeans.fit(data) labels = results.labels_ # Create deterministic cluster labels based on size label_n, l_size = np.unique(labels, return_counts=True) idx = np.argsort(l_size) l_mapping = dict(zip(label_n[idx], label_n)) sorted_labels = labels.copy() for k, v in l_mapping.items(): sorted_labels[labels == k] = v return sorted_labels