Source code for coclust.io.input_checking

# -*- coding: utf-8 -*-

"""
The :mod:`coclust.io.input_checking` module provides functions to check
input matrices.
"""

import numpy as np
import scipy.sparse as sp
from scipy.sparse.sputils import isdense
from scipy.sparse.dok import dok_matrix
from scipy.sparse.lil import lil_matrix


[docs]def check_array(a, pos=True): """Check if an array contains numeric values with non empty rows nor columns. Parameters ---------- a: The input array pos: bool If ``True``, check if the values are positives Raises ------ TypeError If the array is not a Numpy/SciPy array or matrix or if the values are not numeric. ValueError If the array contains empty rows or columns or contains NaN values, or negative values (if ``pos`` is ``True``). """ if not (type(a) == np.ndarray or type(a) == np.matrix or sp.issparse(a)): raise TypeError("Input data must be a Numpy/SciPy array or matrix") if (not np.issubdtype(a.dtype.type, np.integer) and not np.issubdtype(a.dtype.type, np.floating)): raise TypeError("Input array or matrix must be of a numeric type") if not sp.issparse(a): a = np.matrix(a) if len(np.where(~a.any(axis=0))[0]) > 0: raise ValueError("Zero-valued columns in data") if len(np.where(~a.any(axis=1))[1]) > 0: raise ValueError("Zero-valued rows in data") if pos: if (a < 0).any(): raise ValueError("Negative values in data") if np.isnan(a).any(): raise ValueError("NaN in data")
[docs]def check_positive(X): """Check if all values are positives. Parameters ---------- X: numpy array or scipy sparse matrix Matrix to be analyzed Raises ------ ValueError If the matrix contains negative values. Returns ------- numpy array or scipy sparse matrix X """ if isinstance(X, dok_matrix): values = np.array(list(X.values())) elif isinstance(X, lil_matrix): values = np.array([v for e in X.data for v in e]) elif isdense(X): values = X else: values = X.data if (values < 0).any(): raise ValueError("The matrix contains negative values.") return X
[docs]def check_numbers(matrix, n_clusters): """Check if the given matrix has enough rows and columns for the given number of co-clusters. Parameters ---------- matrix: The input matrix n_clusters: int Number of co-clusters Raises ------ ValueError If the data matrix has not enough rows or columns. """ if matrix.shape[0] < n_clusters or matrix.shape[1] < n_clusters: raise ValueError("data matrix has not enough rows or columns")
[docs]def check_numbers_non_diago(matrix, n_row_clusters, n_col_clusters): """Check if the given matrix has enough rows and columns for the given number of row and column clusters. Parameters ---------- matrix: The input matrix n_row_clusters: int Number of row clusters n_col_clusters: int Number of column clusters Raises ------ ValueError If the data matrix has not enough rows or columns. """ if matrix.shape[0] < n_row_clusters or matrix.shape[1] < n_col_clusters: raise ValueError("data matrix has not enough rows or columns")
[docs]def check_numbers_clustering(matrix, n_clusters): """Check if the given matrix has enough rows and columns for the given number of clusters. Parameters ---------- matrix: The input matrix n_clusters: int Number of clusters Raises ------ ValueError If the data matrix has not enough rows or columns. """ if matrix.shape[0] < n_clusters: raise ValueError("data matrix has not enough rows")