Source code for pyleoclim.utils.tsmodel

''' Module for timeseries modeling
'''

import numpy as np
# new for statsmodels v0.12
from statsmodels.tsa.arima_process import arma_generate_sample
from statsmodels.tsa.arima.model import ARIMA

from .tsbase import (
    is_evenly_spaced
)
#from .tsutils import preprocess   # no longer used here
from scipy import optimize

__all__ = [
    'ar1_sim',
    'colored_noise',
    'colored_noise_2regimes',
    'gen_ar1_evenly',
]

[docs]def ar1_model(t, tau, output_sigma=1):
    ''' Simulate a (possibly irregularly-sampled) AR(1) process with given decay
        constant tau, à la REDFIT.

    Parameters
    ----------

    t :  array
        time axis of the time series
    tau : float
        the averaged persistence

    Returns
    -------

    y : array
        the AR(1) time series

    References
    ----------

    Schulz, M. & Mudelsee, M. REDFIT: estimating red-noise spectra directly from unevenly spaced
        paleoclimatic time series. Computers & Geosciences 28, 421–426 (2002).

    '''
    n = np.size(t)
    y = np.zeros(n)
    y[0] = 0  # initializing

    for i in range(1, n):
        scaled_dt = (t[i] - t[i-1]) / tau
        rho = np.exp(-scaled_dt)
        err = np.random.normal(0, np.sqrt(1 - rho**2)*output_sigma, 1)
        y[i] = y[i-1]*rho + err

    return y


[docs]def ar1_fit(y, t=None):
    ''' Returns the lag-1 autocorrelation from AR(1) fit OR persistence from tauest.

    Parameters
    ----------

    y : array
        the time series
    t : array
        the time axis of that series

    Returns
    -------

    g : float
        lag-1 autocorrelation coefficient (for evenly-spaced time series)
        OR estimated persistence (for unevenly-spaced time series)
    '''

    if is_evenly_spaced(t):
        g = ar1_fit_evenly(y)
    else:
        #  g = tau_estimation(y, t, detrend=detrend, params=params)
        g = tau_estimation(y, t)

    return g

[docs]def ar1_sim(y, p, t=None):
    ''' Produce p realizations of an AR(1) process of length n with lag-1 autocorrelation g calculated from `y` and (if provided) `t`

    Parameters
    ----------

    y : array
        a time series; NaNs not allowed
    p : int
        column dimension (number of surrogates)
    t : array
        the time axis of the series

    Returns
    -------

    ysim : array
        n by p matrix of simulated AR(1) vector

    See Also
    --------

    pyleoclim.utils.tsmodel.ar1_model : Simulates a (possibly irregularly-sampled) AR(1) process with given decay constant tau, à la REDFIT.

    pyleoclim.utils.tsmodel.ar1_fit : Returns the lag-1 autocorrelation from AR(1) fit OR persistence from tauest.

    pyleoclim.utils.tsmodel.ar1_fit_evenly : Returns the lag-1 autocorrelation from AR(1) fit assuming even temporal spacing.

    pyleoclim.utils.tsmodel.tau_estimation : Estimates the  temporal decay scale of an (un)evenly spaced time series.

    '''
    n = np.size(y)
    ysim = np.empty(shape=(n, p))  # declare array

    sig = np.std(y)
    if is_evenly_spaced(t):
        g = ar1_fit_evenly(y)

        # specify model parameters (statmodel want lag0 coefficent as unity)
        ar = np.r_[1, -g]  # AR model parameter
        ma = np.r_[1, 0.0]  # MA model parameters
        sig_n = sig*np.sqrt(1-g**2)  # theoretical noise variance for the process to achieve the same variance as y

        # simulate AR(1) model for each column
        for i in np.arange(p):
            #ysim[:, i] = sm.tsa.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=50, sigma=sig_n) # old statsmodels syntax
            #ysim[:, i] = sm.tsa.ArmaProcess(ar, ma).generate_sample(nsample=n, scale=sig_n, burnin=50) # statsmodels v0.11.1-?
            ysim[:, i] = arma_generate_sample(ar, ma, nsample=n, scale=sig_n, burnin=50) # statsmodels v0.12+
    else:
        #  tau_est = ar1_fit(y, t=t, detrend=detrend, params=params)
        tau_est = tau_estimation(y, t)
        for i in np.arange(p):
            # the output of ar1_model has unit variance,
            # multiply by sig to be consistent with the original input timeseries
            ysim[:, i] = ar1_model(t, tau_est, output_sigma=sig)

    if p == 1:
        ysim = ysim[:, 0]

    return ysim

[docs]def gen_ar1_evenly(t, g, scale=1, burnin=50):
    ''' Generate AR(1) series samples

    Parameters
    ----------

    t : array
        the time axis
    
    g : float
        lag-1 autocorrelation

    scale : float
        The standard deviation of noise.

    burnin : int
        Number of observation at the beginning of the sample to drop. Used to reduce dependence on initial values.

    Returns
    -------
    y : array
        the generated AR(1) series


    See also
    --------
    statsmodels.tsa.arima_process.arma_generate_sample: Simulate data from an ARMA. (https://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_process.arma_generate_sample.html)

    '''
    ar = np.r_[1, -g]  # AR model parameter
    ma = np.r_[1, 0.0]  # MA model parameters
    y = arma_generate_sample(ar, ma, nsample=np.size(t), scale=scale, burnin=burnin)
    return y


[docs]def ar1_fit_evenly(y):
    ''' Returns the lag-1 autocorrelation from AR(1) fit.

    Parameters
    ----------
    y : array
        vector of (float) numbers as a time series

    Returns
    -------
    g : float
        lag-1 autocorrelation coefficient

    '''
    # syntax compatible with statsmodels v0.11.1
    #ar1_mod = sm.tsa.ARMA(y, (1, 0), missing='drop').fit(trend='nc', disp=0)
    # syntax compatible with statsmodels v0.12
    ar1_mod = ARIMA(y, order = (1, 0, 0), missing='drop',trend='ct').fit()
    g = ar1_mod.params[2]

    if g > 1:
        print('Warning: AR(1) fitted autocorrelation greater than 1; setting to 1-eps^{1/4}')
        eps = np.spacing(1.0)
        g = 1.0 - eps**(1/4)

    return g

[docs]def tau_estimation(y, t):
#  def tau_estimation(y, t, detrend=False, params=["default", 4, 0, 1], gaussianize=False, standardize=True):
    ''' Estimates the  temporal decay scale of an (un)evenly spaced time series.

    Parameters
    ----------

    y : array
        a time series
    t : array
        time axis of the time series

    Returns
    -------

    tau_est : float
        the estimated persistence

    References
    ----------

    Mudelsee, M. TAUEST: A Computer Program for Estimating Persistence in Unevenly Spaced Weather/Climate Time Series.
        Comput. Geosci. 28, 69–72 (2002).

    '''
    #  pd_y = preprocess(y, t, detrend=detrend, params=params, gaussianize=gaussianize, standardize=standardize)
    dt = np.diff(t)
    #  assert dt > 0, "The time point should be increasing!"

    def ar1_fun(a):
        #  return np.sum((pd_y[1:] - pd_y[:-1]*a**dt)**2)
        return np.sum((y[1:] - y[:-1]*a**dt)**2)

    a_est = optimize.minimize_scalar(ar1_fun, bounds=[0, 1], method='bounded').x
    #  a_est = optimize.minimize_scalar(ar1_fun, method='brent').x

    tau_est = -1 / np.log(a_est)

    return tau_est


[docs]def colored_noise(alpha, t, f0=None, m=None, seed=None):
    ''' Generate a colored noise timeseries

    Parameters
    ----------
    alpha : float
        exponent of the 1/f^alpha noise

    t : float
        time vector of the generated noise

    f0 : float
        fundamental frequency

    m : int
        maximum number of the waves, which determines the highest frequency of the components in the synthetic noise

    Returns
    -------

    y : array
        the generated 1/f^alpha noise

    References
    ----------

    Eq. (15) in Kirchner, J. W. Aliasing in 1/f(alpha) noise spectra: origins, consequences, and remedies.
        Phys Rev E Stat Nonlin Soft Matter Phys 71, 066110 (2005).
    '''
    n = np.size(t)  # number of time points
    y = np.zeros(n)

    if f0 is None:
        f0 = 1/n  # fundamental frequency
    if m is None:
        m = n//2

    k = np.arange(m) + 1  # wave numbers

    if seed is not None:
        np.random.seed(seed)

    theta = np.random.rand(int(m))*2*np.pi  # random phase
    for j in range(n):
        coeff = (k*f0)**(-alpha/2)
        sin_func = np.sin(2*np.pi*k*f0*t[j] + theta)
        y[j] = np.sum(coeff*sin_func)

    return y

[docs]def colored_noise_2regimes(alpha1, alpha2, f_break, t, f0=None, m=None, seed=None):
    ''' Generate a colored noise timeseries with two regimes

    Parameters
    ----------

    alpha1, alpha2 : float
        the exponent of the 1/f^alpha noise

    f_break : float
        the frequency where the scaling breaks

    t : float
        time vector of the generated noise
    f0 : float
        fundamental frequency
    m : int
        maximum number of the waves, which determines the highest frequency of the components in the synthetic noise

    Returns
    -------

    y : array
        the generated 1/f^alpha noise

    References
    ----------

     Eq. (15) in Kirchner, J. W. Aliasing in 1/f(alpha) noise spectra: origins, consequences, and remedies.
         Phys Rev E Stat Nonlin Soft Matter Phys 71, 066110 (2005).
    '''
    n = np.size(t)  # number of time points
    y = np.zeros(n)

    if f0 is None:
        f0 = 1/n  # fundamental frequency
    if m is None:
        m = n//2  # so the aliasing is limited

    k = np.arange(m) + 1  # wave numbers

    if seed is not None:
        np.random.seed(seed)

    theta = np.random.rand(int(m))*2*np.pi  # random phase

    f_vec = k*f0
    regime1= k*f0>=f_break
    regime2= k*f0<=f_break
    f_vec1 = f_vec[regime1]
    f_vec2 = f_vec[regime2]
    s = np.exp(alpha1/alpha2*np.log(f_vec1[0])) / f_vec2[-1]

    for j in range(n):
        coeff = np.ndarray((np.size(f_vec)))
        coeff[regime1] = f_vec1**(-alpha1/2)
        coeff[regime2] = (s*f_vec2)**(-alpha2/2)
        sin_func = np.sin(2*np.pi*k*f0*t[j] + theta)
        y[j] = np.sum(coeff*sin_func)

    return y