#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
SurrogateSeries is a child of EnsembleSeries, designed for parametric and non-parametric Monte Carlo tests
"""
from ..core.ensembleseries import EnsembleSeries
from ..core.series import Series
from ..utils import tsutils, tsmodel
import numpy as np
import warnings
supported_surrogates = frozenset(['ar1sim','phaseran','uar1','CN']) # broadcast all supported surrogates as global variable, for exception handling
[docs]class SurrogateSeries(EnsembleSeries):
''' Object containing surrogate timeseries, obtained either by emulating an
existing series (see `from_series()`) or from a parametric model (see `from_param()`)
Surrogate Series is a child of EnsembleSeries. All methods available for EnsembleSeries are available for surrogate series.
'''
def __init__(self, series_list=[], number=1, method=None, label=None, param=None, seed=None):
'''
Initialize a SurrogateSeries object. The parameters below are then used for series generation and labeling.
Parameters
----------
series_list : list
a list of pyleoclim.Series objects. The default is [].
number : int
The number of surrogates to generate. The default is 1.
method : str {ar1sim, phaseran, uar1, CN}
The name of the method used to generate surrogates of the timeseries (no default)
- 'ar1sim' fits an AR(1) model using the method of moments (MoM) as per [1]
- 'phaseran' applies phase randomization as per [2, 3]
- 'CN' fits a "colored noise" model (power-law spectrum) as per [4] (Eq 15)
param : list
a list of parameters for the method in question. Ignored if the method does not require parameters.
label : str
label of the collection of timeseries (e.g. 'SOI surrogates [AR(1) MoM]')
If not provided, defaults to "series surrogates [{method}]"
Raises
------
ValueError
errors out if an unknown method is specified
References
----------
_ [1] Mudelsee, M. (2002), TAUEST: a computer program for estimating persistence in unevenly spaced weather/climate time series, Computers and Geosciences, 28, 69–72, doi:10.1016/S0098- 3004(01)00041-3.
_ [2] Ebisuzaki, W. (1997), A method to estimate the statistical significance of a correlation when the data are serially correlated, Journal of Climate, 10(9), 2147–2153, doi:10.1175/1520- 0442(1997)010¡2147:AMTETS¿2.0.CO;2.
_ [3] Prichard, D., and J. Theiler (1994), Generating surrogate data for time series with several simultaneously measured variables, Phys. Rev. Lett., 73, 951–954, doi: 10.1103/PhysRevLett.73.951.
_ [4] Kirchner, J. W., Aliasing in 1/f(alpha) noise spectra: origins, consequences, and remedies. Phys Rev E Stat Nonlin Soft Matter Phys 71, 066110 (2005).
'''
self.series_list = series_list
self.method = method
self.param = param
self.number = number
self.seed = seed
if param is not None:
nparam = len(param)
if 'ar1' in method and nparam !=2:
raise ValueError("2 parameters are needed for this model")
elif method == 'phaseran':
warnings.warn("Phase-randomization is a non parametric method; the provided parameters will be ignored")
elif method == 'CN' and nparam >1:
raise ValueError(f"1 parameter is needed for this model; only the first of the provided {nparam} will be used")
# refine the display name
if label is None:
if method == 'ar1sim':
self.label = "AR(1) surrogates (MoM)"
elif method == 'phaseran':
self.label = "Phase-randomized surrogates"
elif method == 'uar1':
self.label = "AR(1) surrogates (MLE)"
elif method == 'CN':
self.label = r'Power-law surrogates ($S(f) \propto f^{-\beta}$)'
else:
raise ValueError(f"Unknown method: {self.method}. Please use one of {supported_surrogates}")
[docs] def from_series(self, target_series):
'''
Fashion the SurrogateSeries object after a target series
Parameters
----------
target_series : Series object
target Series used to infer surrogate properties
seed : int
Control random seed option for reproducibility
Returns
-------
surr : SurrogateSeries
See also
--------
pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator
pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator
pyleoclim.utils.tsutils.phaseran : phase randomization
Examples
--------
SOI = pyleo.utils.load_dataset('SOI').interp()
SOI_surr = pyleo.SurrogateSeries(method='phaseran', number=4)
SOI_surr.from_series(SOI)
fig, ax = SOI_surr.plot_traces()
SOI.plot(ax=ax,color='black',linewidth=1, ylim=[-6,3])
ax.legend(loc='lower center', bbox_to_anchor=(0.5, 0), ncol=2)
ax.set_title(SOI_surr.label)
'''
#settings = {} if settings is None else settings.copy()
if not isinstance(target_series, Series):
raise TypeError(f"Expected pyleo.Series, got: {type(target_series)}")
if self.seed is not None:
np.random.default_rng(self.seed)
# apply surrogate method
if self.method == 'ar1sim':
tsi = target_series if target_series.is_evenly_spaced() else target_series.interp()
mu = tsi.value.mean()
y_surr = tsmodel.ar1_sim(target_series.value, self.number, target_series.time) + mu
elif self.method == 'phaseran':
if target_series.is_evenly_spaced():
y_surr = tsutils.phaseran(target_series.value, self.number)
else:
raise ValueError("Phase-randomization presently requires evenly-spaced series.")
elif self.method == 'uar1':
# estimate theta with MLE
tau, sigma_2 = tsmodel.uar1_fit(target_series.value, target_series.time)
tsi = target_series if target_series.is_evenly_spaced() else target_series.interp()
mu = tsi.value.mean()
self.param = [tau, sigma_2] # assign parameters for future use
# generate time axes according to provided pattern
times = np.squeeze(np.tile(target_series.time, (self.number, 1)).T)
# generate matrix and add the mean
y_surr = tsmodel.uar1_sim(t = times, tau=tau, sigma_2=sigma_2) + mu
elif self.method == 'CN':
tsi = target_series if target_series.is_evenly_spaced() else target_series.interp()
mu = tsi.value.mean()
sigma = tsi.value.std()
alpha = tsi.spectral(method='cwt').beta_est().beta_est_res['beta'] # fit the parameter using the smoothest spectral method
self.param = [alpha]
y_surr = np.empty((len(target_series.time),self.number))
for i in range(self.number):
y_surr[:,i] = tsmodel.colored_noise(alpha=alpha,t=target_series.time, std = sigma) + mu
if self.number > 1:
s_list = []
for i, y in enumerate(y_surr.T):
ts = target_series.copy() # copy Series
ts.value = y
ts.label = str(target_series.label or '') + " #" + str(i+1)
s_list.append(ts)
else:
ts = target_series.copy() # copy Series
ts.value = np.squeeze(y_surr)
ts.label = str(target_series.label or '')
s_list = [ts]
self.series_list = s_list
[docs] def from_param(self, param, length=50, time_pattern = 'even', settings=None):
'''
Simulate the SurrogateSeries from a parametric model
Parameters
----------
param : list
model parameters (e.g. [tau, sigma0] for an AR(1) model, [beta] for colored noise)
length : int
Length of the series. Default: 50
time_pattern : str {even, random, specified}
The pattern used to generate the surrogate time axes
* 'even' uses an evenly-spaced time with spacing `delta_t` specified in settings (if not specified, defaults to 1.0)
* 'random' uses random_time_axis() with specified distribution and parameters
Relevant settings are `delta_t_dist` and `param`. (default: 'exponential' with parameter = 1.0)
* 'specified': uses time axis `time` from `settings`.
seed : int
Control random seed option for reproducibility
settings : dict
Parameters for surrogate generator. See individual methods for details.
Returns
-------
surr : SurrogateSeries
See also
--------
pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator
pyleoclim.utils.tsmodel.colored_noise: simulating from a power law spectrum, $S(f) \propto f^{-\beta}$
pyleoclim.utils.tsmodel.random_time_axis : Generate time increment vector according to a specific probability model
Examples
--------
.. jupyter-execute::
ar1 = pyleo.SurrogateSeries(method='ar1sim', number=10)
ar1.from_param(length=100, param = [2,2])
ar1.plot_envelope(title=rf'AR(1) synthetic series ($\tau={2},\sigma^2={2}$)')
'''
param = param if isinstance(param, list) else [param] # coerce param into a list, no matter the original format
nparam = len(param)
settings = {} if settings is None else settings.copy()
if self.seed is not None:
np.random.seed(self.seed)
# generate time axes according to provided pattern
if time_pattern == "even":
delta_t = settings["delta_t"] if "delta_t" in settings else 1.0
t = np.cumsum([float(delta_t)]*length)
times = np.tile(t, (self.number, 1)).T
elif time_pattern == "random":
times = np.zeros((length, self.number))
for i in range(self.number):
dist_name = settings['delta_t_dist'] if "delta_t_dist" in settings else "exponential"
dist_param = settings['param'] if "param" in settings else [1]
times[:, i] = tsmodel.random_time_axis(length, dist_name,dist_param)
elif time_pattern == 'specified':
if "time" not in settings:
raise ValueError("'time' not found in settings")
else:
if self.number > 1:
times = np.tile(settings["time"], (self.number, 1)).T
else:
times = settings["time"]
else:
raise ValueError(f"Unknown time pattern: {time_pattern}")
# apply surrogate method
y_surr = np.empty_like(times)
if self.method in ['uar1','ar1sim']: # AR(1) models
if nparam != 2:
warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparam} provided. default values used, tau=5, sigma=0.5',UserWarning, stacklevel=2)
param = [5,0.5]
y_surr = tsmodel.uar1_sim(t = times, tau=param[0], sigma_2=param[1])
elif self.method == 'CN': # colored noise
if nparam >1:
raise ValueError(f"1 parameter is needed for this model; only the first of the provided {nparam} will be used")
y_surr = np.empty((length,self.number))
if self.number > 1:
for i in range(self.number):
y_surr[:,i] = tsmodel.colored_noise(alpha=param[0],t=times[:,i])
else:
y_surr = tsmodel.colored_noise(alpha=param[0],t=times)
elif self.method == 'phaseran':
raise ValueError("Phase-randomization is only available in from_series().")
# create the series_list
s_list = []
if self.number > 1:
for i, (t, y) in enumerate(zip(times.T,y_surr.T)):
ts = Series(time=t, value=y,
label = str(self.label or '') + " #" + str(i+1),
verbose=False, auto_time_params=True)
s_list.append(ts)
else:
ts = Series(time=np.squeeze(times), value=np.squeeze(y_surr),
label = str(self.label or '') + " #`",
verbose=False, auto_time_params=True)
s_list.append(ts)
self.series_list = s_list