Source code for pyleoclim.utils.tsbase

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Basic functionalities to clean timeseries data prior to analysis
"""

import numpy as np
from typing import OrderedDict

#APIs

__all__ = [
    'clean_ts',
    'dropna',
    'sort_ts',
    'is_evenly_spaced',
    'reduce_duplicated_timestamps',
]


[docs]def clean_ts(ys, ts, verbose=False): ''' Cleaning the timeseries Delete the NaNs in the time series and sort it with time axis ascending, duplicate timestamps will be reduced by averaging the values. Parameters ---------- ys : array A time series, NaNs allowed ts : array The time axis of the time series, NaNs allowed verbose : bool If True, will print a warning message Returns ------- ys : array The time series without nans ts : array The time axis of the time series without nans See also -------- pyleoclim.utils.tsbase.dropna : Drop NaN values pyleoclim.utils.tsbase.sort_ts : Sort timeseries pyleoclim.utils.tsbase.reduce_duplicated_timestamps : Consolidate duplicated timestamps ''' ys, ts = dropna(ys, ts, verbose=verbose) ys, ts = sort_ts(ys, ts, verbose=verbose) ys, ts = reduce_duplicated_timestamps(ys, ts, verbose=verbose) return ys, ts
[docs]def dropna(ys, ts, verbose=False): '''Drop NaN values Remove entries of ys or ts that bear NaNs Parameters ---------- ys : array A time series, NaNs allowed ts : array The time axis of the time series, NaNs allowed verbose : bool If True, will print a warning message Returns ------- ys : array The time series without nans ts : array The time axis of the time series without nans ''' ys = np.asarray(ys, dtype=float) ts = np.asarray(ts, dtype=float) assert ys.size == ts.size, 'The size of time axis and data value should be equal!' ys_tmp = np.copy(ys) ys = ys[~np.isnan(ys_tmp)] ts = ts[~np.isnan(ys_tmp)] ts_tmp = np.copy(ts) ys = ys[~np.isnan(ts_tmp)] ts = ts[~np.isnan(ts_tmp)] if verbose and any(np.isnan(ys_tmp)): print('NaNs have been detected and dropped.') return ys, ts
[docs]def sort_ts(ys, ts, verbose=False): ''' Sort timeseries Sort ts values in ascending order Parameters ---------- ys : array Dependent variable ts : array Independent variable verbose : bool If True, will print a warning message Returns ------- ys : array Dependent variable ts : array Independent variable, sorted in ascending order ''' ys = np.asarray(ys, dtype=float) ts = np.asarray(ts, dtype=float) assert ys.size == ts.size, 'time and value arrays must be of equal length' # sort the time series so that the time axis will be ascending dt = np.median(np.diff(ts)) if dt < 0: sort_ind = np.argsort(ts) ys = ys[sort_ind] ts = ts[sort_ind] if verbose: print('The time axis has been adjusted to be prograde') return ys, ts
[docs]def reduce_duplicated_timestamps(ys, ts, verbose=False): ''' Consolidate duplicated timestamps Reduce duplicated timestamps in a timeseries by averaging the values Parameters ---------- ys : array Dependent variable ts : array Independent variable verbose : bool If True, will print a warning message Returns ------- ys : array Dependent variable ts : array Independent variable, with duplicated timestamps reduced by averaging the values ''' ys = np.asarray(ys, dtype=float) ts = np.asarray(ts, dtype=float) assert ys.size == ts.size, 'The size of time axis and data value should be equal!' if len(ts) != len(set(ts)): value = OrderedDict() for t, y in zip(ts, ys): if t not in value: value[t] = [y] else: value[t].append(y) ts = [] ys = [] for k, v in value.items(): ts.append(k) ys.append(np.mean(v)) ts = np.array(ts) ys = np.array(ys) if verbose: print('Duplicate timestamps have been combined by averaging values.') return ys, ts
[docs]def is_evenly_spaced(ts, tol=1e-4): ''' Check if a time axis is evenly spaced, within a given tolerance Parameters ---------- ts : array The time axis of a time series tol : float64 Numerical tolerance for the relative difference Returns ------- check : bool True - evenly spaced; False - unevenly spaced. ''' if ts is None: check = True else: dts = np.diff(ts) dt_mean = dts.mean() check = all(np.abs((dt - dt_mean)/dt_mean) < tol for dt in np.diff(ts)) # compare relative spacing to the mean return check