Source code for coclust.evaluation.internal

# -*- coding: utf-8 -*-

"""
The :mod:`coclust.evaluation.internal` module provides functions to evaluate
clustering or co-clustering given internal criteria.
"""

import numpy as np

from ..coclustering import CoclustMod


[docs]def best_modularity_partition(in_data, nbr_clusters_range, n_rand_init=1): """Evaluate the best partition over a range of number of cluster using co-clustering by direct maximization of graph modularity. Parameters ---------- in_data : numpy array or scipy sparse matrix, shape=(n_samples, n_features) Matrix to be analyzed nbr_clusters_range : Number of clusters to be evaluated n_rand_init: Number of time the algorithm will be run with different initializations Returns ------- tmp_best_model: :class:`coclust.coclustering.CoclustMod` model with highest final modularity tmp_max_modularities: list final modularities for all evaluated partitions """ tmp_best_model = None tmp_max_modularities = [np.nan] * len(nbr_clusters_range) eps_best_model = 1e-4 # Set best final modularity to -inf modularity_begin = float("-inf") print("Computing coclust modularity for a range of cluster numbers =") for tmp_n_clusters in nbr_clusters_range: print(" %d ..." % (tmp_n_clusters)) # Create and fit a model with tmp_n_clusters co-clusters tmp_model = CoclustMod(n_clusters=tmp_n_clusters, n_init=n_rand_init, random_state=0) tmp_model.fit(in_data) modularity_end = tmp_model.modularity # Check if the final modularity is better with tolerance if((modularity_end - modularity_begin) > eps_best_model): tmp_best_model = tmp_model modularity_begin = modularity_end tmp_max_modularities[(tmp_n_clusters)-min(nbr_clusters_range)] = tmp_model.modularity print(" All done !") return (tmp_best_model, tmp_max_modularities)