Source code for tropea_clustering._internal.onion_smooth.onion_uni

"""onion-clustering for univariate time-series."""

# Author: Becchi Matteo <bechmath@gmail.com>
# Reference: https://www.pnas.org/doi/abs/10.1073/pnas.2403771121

import numpy as np
from numpy.typing import NDArray

from tropea_clustering._internal.onion_smooth.main import StateUni
from tropea_clustering._internal.onion_smooth.main import _main as _onion_inner


[docs] def onion_uni_smooth( X: NDArray[np.float64], delta_t: int, bins: str | int = "auto", number_of_sigmas: float = 3.0, max_area_overlap: float = 0.8, ) -> tuple[list[StateUni], NDArray[np.int64]]: """ Performs onion clustering on the data array 'X'. Returns an array of integer labels, one for each frame. Unclassified frames are labelled "-1". .. note:: This function is currently in beta testing. The output could change in the future. Use with caution. Parameters ---------- X : ndarray of shape (n_particles, n_frames) The time-series data to cluster. delta_t : int The minimum lifetime required for the clusters. Also referred to as the "time resolution" of the clustering analysis. bins : int, default="auto" The number of bins used for the construction of the histograms. Can be an integer value, or "auto". If "auto", the default of numpy.histogram_bin_edges is used (see https://numpy.org/doc/stable/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges). number_of_sigmas : float, default=3.0 Sets the thresholds for classifing a signal sequence inside a state: the sequence is contained in the state if it is entirely contained inside number_of_sigmas * state.sigmas times from state.mean. max_area_overlap : float, default=0.8 Thresold to consider two Gaussian states overlapping, and thus merge them together. Returns ------- states_list : List[StateUni] The list of the identified states. Refer to the documentation of StateUni for accessing the information on the states. labels : ndarray of shape (n_particles, n_frames) Cluster labels for each frame. Unclassified points are given the label "-1". Example ------- .. testcode:: onion_uni_smooth-test import numpy as np from tropea_clustering import onion_uni_smooth # Select time resolution delta_t = 5 # Create random input data np.random.seed(1234) n_particles = 5 n_steps = 1000 input_data = np.random.rand(n_particles, n_steps) # Run Onion Clustering state_list, labels = onion_uni_smooth(input_data, delta_t) .. testcode:: onion_uni_smooth-test :hide: assert np.isclose(state_list[0].mean, 0.5789299719781493) """ est = OnionUniSmooth( delta_t=delta_t, bins=bins, number_of_sigmas=number_of_sigmas, max_area_overlap=max_area_overlap, ) est.fit(X) return est.state_list_, est.labels_
[docs] class OnionUniSmooth: """ Performs onion clustering on a data array. Returns an array of integer labels, one for each frame. Unclassified frames are labelled "-1". .. note:: This class is currently in beta testing. Its operations could change in the future. Use with caution. Parameters ---------- delta_t : int The minimum lifetime required for the clusters. Also referred to as the "time resolution" of the clustering analysis. bins : int, default="auto" The number of bins used for the construction of the histograms. Can be an integer value, or "auto". If "auto", the default of numpy.histogram_bin_edges is used (see https://numpy.org/doc/stable/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges). number_of_sigmas : float, default=3.0 Sets the thresholds for classifing a signal sequence inside a state: the sequence is contained in the state if it is entirely contained inside number_of_sigmas * state.sigmas times from state.mean. max_area_overlap : float, default=0.8 Thresold to consider two Gaussian states overlapping, and thus merge them together. Attributes ---------- states_list_ : List[StateUni] The list of the identified states. Refer to the documentation of StateUni for accessing the information on the states. labels_ : ndarray of shape (n_particles, n_frames) Cluster labels for each frame. Unclassified points are given the label "-1". Example ------- .. testcode:: OnionUniSmooth-test import numpy as np from tropea_clustering import OnionUniSmooth # Create random input data np.random.seed(1234) n_particles = 5 n_steps = 1000 delta_t = 5 input_data = np.random.rand(n_particles, n_steps) # Run Onion Clustering clusterer = OnionUniSmooth(delta_t) clust_params = {"bins": 100, "number_of_sigmas": 3.0} clusterer.set_params(**clust_params) clusterer.fit(input_data) .. testcode:: OnionUniSmooth-test :hide: assert np.isclose( clusterer.state_list_[0].mean, 0.5789299753284055) """ def __init__( self, delta_t: int, bins: str | int = "auto", number_of_sigmas: float = 3.0, max_area_overlap: float = 0.8, ): self.delta_t = delta_t self.bins = bins self.number_of_sigmas = number_of_sigmas self.max_area_overlap = max_area_overlap
[docs] def fit(self, X, y=None): """Performs onion clustering on the data array 'X'. Parameters ---------- X : ndarray of shape (n_particles, n_frames) The time-series data to cluster. Returns ------- self : object A fitted instance of self. """ if X.ndim != 2: raise ValueError("Expected 2-dimensional input data.") if X.shape[0] == 0: raise ValueError("Empty dataset.") if X.shape[1] <= 1: raise ValueError("n_frames = 1.") # Check for complex input if not ( np.issubdtype(X.dtype, np.floating) or np.issubdtype(X.dtype, np.integer) ): raise ValueError("Complex data not supported.") X = X.copy() # copy to avoid in-place modification self.state_list_, self.labels_ = _onion_inner( X, self.delta_t, self.bins, self.number_of_sigmas, self.max_area_overlap, ) return self
[docs] def fit_predict(self, X, y=None): """Computes clusters on the data array 'X' and returns labels. Parameters ---------- X : ndarray of shape (n_particles, n_frames) The time-series data to cluster. Returns ------- labels : ndarray of shape (n_particles, n_frames) Cluster labels for each frame. Unclassified points are given the label "-1". """ return self.fit(X).labels_
def get_params(self, deep=True): return { "delta_t": self.delta_t, "bins": self.bins, "number_of_sigmas": self.number_of_sigmas, "max_area_overlap": self.max_area_overlap, } def set_params(self, **params): for param, value in params.items(): setattr(self, param, value) return self