Source code for tsseg.algorithms.patss.algorithms.ClaSP


import numpy as np
import time
from baselines.patss.evaluation.utility import convert_to_borders
from baselines.patss.workflows.Logger import Logger
from claspy.segmentation import BinaryClaSPSegmentation


[docs] def run_clasp(univariate_time_series, ground_truth, config, logger: Logger): """ Run ClaSP on the given time series to identify a semantic segmentation. :param univariate_time_series: A list of pandas DataFrames, and each DataFrame consists of two columns: 'average_value' and 'time'. Here, the list should consist of one DataFrame, thus a univariate time series :param ground_truth: A dictionary containing the ground truth window size and ground truth number of segment boundaries :param config: A dictionary containing the settings to use within ClaSP :param logger: A Logger object used for logging the progress of ClaSP :return: A numpy array containing the identified segment boundaries, and the ClaSP object used to segment the time series. """ # ClaSP can not cope with multivariate time series if len(univariate_time_series) > 1: raise Exception('ClaSP only handles univariate time series (05/04/2023)!') # We only need to provide the raw time series values to ClaSP data = np.array(univariate_time_series[0]['average_value']) # Whether to use ground truth number of segments or not n_segments = 'learn' if 'use_ground_truth_n_segments' in config.keys() and config['use_ground_truth_n_segments']: n_segments = len(convert_to_borders(ground_truth['segmentation'], univariate_time_series)) + 1 # Whether to use the ground truth window size or not window_size = 'suss' if 'use_ground_truth_window_size' in config.keys() and config['use_ground_truth_window_size']: window_size = ground_truth['segment_length'] # If the window size is too large (i.e., the window size multiplied with the exclusion radius is # exceeds half of the time series), then no proper classification problem can be constructed. In # this case we set the window size to just fit this constraint. if data.shape[0] < 2 * window_size * 5: # 5 is the default exclusion radius window_size = data.shape[0] // 10 # Start ClaSP start = time.time() clasp = BinaryClaSPSegmentation(n_segments=n_segments, window_size=window_size) try: # The first execution of ClaSP results in a Reference error from Numba # I assume this is due to some part of the code not yet being compiled # If ClaSP is executed again, then there is no problem # (this is similar for future ClaSP runs within the same python call) borders = clasp.fit_predict(data) except ReferenceError: # Log the exception logger.write('>>> Exception in ClaSP\n' + 'A Reference error occurred during execution, restarting ClaSP.\n' + 'Total time (waisted): %f seconds\n\n' % (time.time() - start)) # Restart the timer and try to predict the borders again start = time.time() borders = clasp.fit_predict(data) # Log successful completion of the segmentation logger.write('>>> Fitted ClaSP\n' + 'Borders: %s\n' % str(borders) + 'Total time: %f seconds\n\n' % (time.time() - start)) # Return the segment boundaries and the ClaSP object return borders, clasp