Source code for pyleoclim.utils.spectral

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Utilities for spectral analysis, including WWZ, CWT, Lomb-Scargle, MTM, and Welch.
Designed for NumPy arrays, either evenly spaced or not (method-dependent).

All spectral methods must return a dictionary containing one vector for the 
frequency axis and the power spectral density (PSD).

Additional utilities help compute an optimal frequency vector or estimate scaling exponents.
"""

import numpy as np
from scipy import signal
import nitime.algorithms as nialg
import statsmodels.api as sm
import collections
import warnings

__all__ = [
    'wwz_psd',
    'cwt_psd',
    'mtm',
    'lomb_scargle',
    'welch',
    'periodogram'
]

from .tsbase import (
    is_evenly_spaced,
    clean_ts
)

from .tsutils import preprocess

from .wavelet import (
    make_freq_vector,
    prepare_wwz,
    wwz,
    wwa2psd,
    cwt,
)

#-----------
#Wrapper
#-----------

#---------
#Main functions
#---------


[docs]def welch(ys, ts, window='hann',nperseg=None, noverlap=None, nfft=None,
           return_onesided=True, detrend = None, sg_kwargs = None,
           gaussianize=False, standardize=True,
           scaling='density', average='mean'):
    '''Estimate power spectral density using Welch's periodogram

    Wrapper for the function implemented in scipy.signal.welch
    See https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html for details.

    Welch's method is an approach for spectral density estimation. It computes 
    an estimate of the power spectral density by dividing the data into overlapping
    segments, computing a modified periodogram for each segment and averaging 
    the periodograms to lower the estimator's variance.

    Parameters
    ----------

    ys : array

        a time series

    ts : array

        time axis of the time series

    window : string or tuple

        Desired window to use. Possible values:

            - boxcar
            - triang
            - blackman
            - hamming
            - hann (default)
            - bartlett
            - flattop
            - parzen
            - bohman
            - blackmanharris
            - nuttail
            - barthann
            - kaiser (needs beta)
            - gaussian (needs standard deviation)
            - general_gaussian (needs power, width)
            - slepian (needs width)
            - dpss (needs normalized half-bandwidth)
            - chebwin (needs attenuation)
            - exponential (needs decay scale)
            - tukey (needs taper fraction)

        If the window requires no parameters, then window can be a string.
        If the window requires parameters, then window must be a tuple with the first argument the string name of the window, and the next arguments the needed parameters.
        If window is a floating point number, it is interpreted as the beta parameter of the kaiser window.

    nperseg : int

        Length of each segment. If none, nperseg=len(ys)/2. Default to None This will give three segments with 50% overlap

    noverlap : int

        Number of points to overlap. If None, noverlap=nperseg//2. Defaults to None, represents 50% overlap

    nfft: int

        Length of the FFT used, if a zero padded FFT is desired. If None, the FFT length is nperseg

    return_onesided : bool

        If True, return a one-sided spectrum for real data. If False return a two-sided spectrum. Defaults to True, but for complex data, a two-sided spectrum is always returned.

    detrend : str

        If None, no detrending is applied. Available detrending methods:

            - None - no detrending will be applied (default);
            - linear - a linear least-squares fit to `ys` is subtracted;
            - constant - the mean of `ys` is subtracted
            - savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
            - emd - Empirical mode decomposition

    sg_kwargs : dict

        The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.

    gaussianize : bool

        If True, gaussianizes the timeseries

    standardize : bool

        If True, standardizes the timeseries

    scaling : {"density,"spectrum}

        Selects between computing the power spectral density (‘density’) where Pxx has units of V**2/Hz and computing the power spectrum (‘spectrum’) where Pxx has units of V**2, if x is measured in V and fs is measured in Hz. Defaults to ‘density'

    average : {'mean','median'}

        Method to use when combining periodograms. Defaults to ‘mean’.

    Returns
    -------
    res_dict : dict

        the result dictionary, including
        
        - freq (array): the frequency vector
        - psd (array): the spectral density vector


    See also
    --------
    pyleoclim.utils.spectral.periodogram : Spectral density estimation using a Blackman-Tukey periodogram
    pyleoclim.utils.spectral.mtm : Spectral density estimation using the multi-taper method
    pyleoclim.utils.spectral.lomb_scargle : Lomb-scargle priodogram
    pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
    pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
    pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
    pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods  
    pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution 
    pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.

    References
    ----------
    P. Welch, “The use of the fast Fourier transform for the estimation of power spectra: 
        A method based on time averaging over short, modified periodograms”, 
        IEEE Trans. Audio Electroacoust. vol. 15, pp. 70-73, 1967.

    '''
    
    if standardize == True:
        warnings.warn('Standardizing the timeseries')

    ts = np.array(ts)
    ys = np.array(ys)

    if len(ts) != len(ys):
        raise ValueError('Time and value axis should be the same length')

    if nperseg == None:
        nperseg = len(ys/2)

    # remove NaNs
    ys, ts = clean_ts(ys,ts)
    # check for evenly-spaced
    check = is_evenly_spaced(ts)
    if check == False:
        raise ValueError('For the Welch method, data should be evenly spaced')
    # preprocessing
    ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
               gaussianize=gaussianize, standardize=standardize)


    # calculate sampling frequency fs
    dt = np.median(np.diff(ts))
    fs = 1 / dt

    # spectral analysis with scipy welch
    freq, psd = signal.welch(ys, fs=fs, window=window,nperseg=nperseg,noverlap=noverlap,
                             nfft=nfft, return_onesided=return_onesided, scaling=scaling,
                             average=average, detrend = False, axis=-1)

    # fix zero frequency point
    if freq[0] == 0:
        psd[0] = np.nan

    # output result
    res_dict = {
        'freq': np.asarray(freq),
        'psd' : np.asarray(psd),
    }

    return res_dict


[docs]def mtm(ys, ts, NW=None, BW=None, detrend = None, sg_kwargs=None,
           gaussianize=False, standardize=True, adaptive=False, jackknife=True,
           low_bias=True, sides='default', nfft=None):
    ''' Spectral density using the multi-taper method.

    If the NW product, or the BW and Fs in Hz are not specified by the user, a bandwidth of 4 times the fundamental frequency, corresponding to NW = 4 will be used.
    
    Based on the nitime package: http://nipy.org/nitime/api/generated/nitime.algorithms.spectral.html

    Parameters
    ----------

    ys : array
        a time series
    ts : array
        time axis of the time series
    NW : float
        The time-bandwidth product NW governs the width (and therefore, height) of a peak, which can take the values [2, 5/2, 3, 7/2, 4]. This product controls the classical bias-variance tradeoff inherent to spectral estimation: a large product limits the variance but increases leakage out of harmonic line. In other words, small values of NW mean high spectral resolution, low bias, but high variance. Large values of the parameter mean lower resolution, higher bias, but reduced variance. There is no automated way to choose this parameter, and the default (NW=4) corresponds to a conservative choice with low variance.
        For a demonstration on the effect of this parameter, see the spectral analysis notebook in our tutorials: https://pyleoclim-util.readthedocs.io/en/master/tutorials.html. 
    BW : float
        The sampling-relative bandwidth of the data tapers
    detrend : str
          If None, no detrending is applied. Available detrending methods:
              - None - no detrending will be applied (default);
              - linear - a linear least-squares fit to `ys` is subtracted;
              - constant - the mean of `ys` is subtracted
              - savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
              - emd - Empirical mode decomposition
      sg_kwargs : dict
          The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.
      gaussianize : bool
          If True, gaussianizes the timeseries
      standardize : bool
          If True, standardizes the timeseries
      adaptive : {True/False}
          Use an adaptive weighting routine to combine the PSD estimates of
          different tapers.
      jackknife : {True/False}
          Use the jackknife method to make an estimate of the PSD variance
          at each point.
      low_bias : {True/False}
          Rather than use 2NW tapers, only use the tapers that have better than
          90% spectral concentration within the bandwidth (still using
          a maximum of 2NW tapers)
      sides : str (optional)   [ 'default' | 'onesided' | 'twosided' ]
          This determines which sides of the spectrum to return.
          For complex-valued inputs, the default is two-sided, for real-valued
          inputs, default is one-sided Indicates whether to return a one-sided
          or two-sided

    Returns
    -------

    res_dict : dict
        the result dictionary, including
        - freq (array): the frequency vector
        - psd (array): the spectral density vector

    See Also
    --------
    pyleoclim.utils.spectral.periodogram : Spectral density estimation using a Blackman-Tukey periodogram
    pyleoclim.utils.spectral.welch : spectral estimation using Welch's periodogram
    pyleoclim.utils.spectral.lomb_scargle : Lomb-scargle priodogram
    pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
    pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
    pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
    pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods  
    pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution 
    pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.

    '''
    
    if standardize == True:
        warnings.warn('Standardizing the timeseries')
        
    # preprocessing
    ts = np.array(ts)
    ys = np.array(ys)

    if len(ts) != len(ys):
        raise ValueError('Time and value axis should be the same length')

    # remove NaNs
    ys, ts = clean_ts(ys,ts)
    # check for evenly-spaced
    check = is_evenly_spaced(ts)
    if check == False:
        raise ValueError('For the MTM method, data should be evenly spaced')
    # preprocessing
    ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
               gaussianize=gaussianize, standardize=standardize)


    # calculate sampling frequency fs
    dt = np.median(np.diff(ts))
    fs = 1 / dt

    # spectral analysis
    freq, psd, nu = nialg.multi_taper_psd(ys, Fs=fs, NW=NW, BW=BW,adaptive=adaptive,
                                          jackknife=jackknife, low_bias=low_bias,
                                          sides=sides,NFFT=nfft)  # call nitime func

    # fix the zero frequency point
    if freq[0] == 0:
        psd[0] = np.nan

    # output result
    res_dict = {
        'freq': np.asarray(freq),
        'psd': np.asarray(psd),
    }

    return res_dict


[docs]def lomb_scargle(ys, ts, freq=None, freq_method='lomb_scargle',
                 freq_kwargs=None, n50=3, window='hann',
                 detrend = None, sg_kwargs=None,
                 gaussianize=False,
                 standardize=True,
                 average='mean'):
    """ Lomb-scargle priodogram

    Appropriate for unevenly-spaced arrays.
    Uses the lomb-scargle implementation from scipy.signal: https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.lombscargle.html

    Parameters
    ----------

    ys : array

        a time series

    ts : array

        time axis of the time series

    freq : str or array

        vector of frequency.
        If string, uses the following method:

    freq_method : str

        Method to generate the frequency vector if not set directly. The following options are avialable:

            - log
            - lomb_scargle (default)
            - welch
            - scale
            - nfft

        See utils.wavelet.make_freq_vector for details

    freq_kwargs : dict

        Arguments for the method chosen in freq_method. See specific functions in utils.wavelet for details
        By default, uses dt=median(ts), ofac=4 and hifac=1 for Lomb-Scargle

    n50: int

        The number of 50% overlapping segment to apply

    window : str or tuple

        Desired window to use. Possible values:

            - boxcar
            - triang
            - blackman
            - hamming
            - hann (default)
            - bartlett
            - flattop
            - parzen
            - bohman
            - blackmanharris
            - nuttail
            - barthann
            - kaiser (needs beta)
            - gaussian (needs standard deviation)
            - general_gaussian (needs power, width)
            - slepian (needs width)
            - dpss (needs normalized half-bandwidth)
            - chebwin (needs attenuation)
            - exponential (needs decay scale)
            - tukey (needs taper fraction)

        If the window requires no parameters, then window can be a string.
        If the window requires parameters, then window must be a tuple with the first argument the string name of the window, and the next arguments the needed parameters.
        If window is a floating point number, it is interpreted as the beta parameter of the kaiser window.

     detrend : str

        If None, no detrending is applied. Available detrending methods:

            - None - no detrending will be applied (default);
            - linear - a linear least-squares fit to `ys` is subtracted;
            - constant - the mean of `ys` is subtracted
            - savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
            - emd - Empirical mode decomposition

      sg_kwargs : dict

          The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.

      gaussianize : bool

          If True, gaussianizes the timeseries

      standardize : bool

          If True, standardizes the timeseriesprep_args : dict

      average : {'mean','median'}

          Method to use when averaging periodograms. Defaults to ‘mean’.

    Returns
    -------

    res_dict : dict

        the result dictionary, including

        - freq (array): the frequency vector
        - psd (array): the spectral density vector

    See Also
    --------
    
    pyleoclim.utils.spectral.periodogram : Estimate power spectral density using a periodogram
    pyleoclim.utils.spectral.mtm : Retuns spectral density using a multi-taper method
    pyleoclim.utils.spectral.welch : Returns power spectral density using the Welch method
    pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
    pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
    pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
    pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods  
    pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution 
    pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.

    References
    ----------
    Lomb, N. R. (1976). Least-squares frequency analysis of unequally spaced data. Astrophysics and Space Science 39, 447-462.

    Scargle, J. D. (1982). Studies in astronomical time series analysis. II. Statistical aspects of spectral analyis of unvenly spaced data. The Astrophysical Journal, 263(2), 835-853.

    Scargle, J. D. (1982). Studies in astronomical time series analysis. II. Statistical aspects of spectral analyis of unvenly spaced data. The Astrophysical Journal, 263(2), 835-853.

    """
    
    if standardize == True:
        warnings.warn('Standardizing the timeseries')
    
    ts = np.array(ts)
    ys = np.array(ys)

    if len(ts) != len(ys):
        raise ValueError('Time and value axis should be the same length')

    if n50<=0:
        raise ValueError('Number of overlapping segments should be greater than 1')

    # remove NaNs
    ys, ts = clean_ts(ys,ts)

    # preprocessing
    ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
               gaussianize=gaussianize, standardize=standardize)

    # divide into segments
    nseg=int(np.floor(2*len(ts)/(n50+1)))
    index=np.array(np.arange(0,len(ts),nseg/2),dtype=int)
    if len(index) == n50+2:
        index[-1] = len(ts)
    else:
        index=np.append(index,len(ts)) #make it ends at the time series

    ts_seg=[]
    ys_seg=[]


    if n50>1:
        for idx,i in enumerate(np.arange(0,len(index)-2,1)):
            ts_seg.append(ts[index[idx]:index[idx+2]])
            ys_seg.append(ys[index[idx]:index[idx+2]])
    else:
        ts_seg.append(ts)
        ys_seg.append(ys)

    if freq is None:
        freq_kwargs = {} if freq_kwargs is None else freq_kwargs.copy()
        if 'dt' not in freq_kwargs.keys():
            dt = np.median(np.diff(ts))
            freq_kwargs.update({'dt':dt})
        freq = make_freq_vector(ts_seg[0],
                                method=freq_method,
                                **freq_kwargs)
            #remove zero freq
    if freq[0]==0:
        freq=np.delete(freq,0)

    freq_angular = 2 * np.pi * freq

    psd_seg=[]

    for idx,item in enumerate(ys_seg):
    # calculate the frequency vector if needed
        win=signal.get_window(window,len(ts_seg[idx]))
        scale = len(ts_seg[idx])*2*np.mean(np.diff(ts_seg[idx]))/((win*win).sum())
        psd_seg.append(signal.lombscargle(ts_seg[idx],
                                          item*win,
                                          freq_angular,precenter=True)*scale)
    # average them up
    if average=='mean':
        psd=np.mean(psd_seg,axis=0)
    elif average=='median':
        psd=np.median(psd_seg,axis=0)
    else:
        raise ValueError('Average should either be set to mean or median')

    # Fix possible problems at the edge
    if psd[0]<psd[1]:
        if abs(1-abs(psd[1]-psd[0])/psd[1])<1.e-2:
            # warnings.warn("Unstability at the beginning of freq vector, removing point")
            # psd=psd[1:]
            # freq=freq[1:]
            warnings.warn("Unstability at the beginning of freq vector, setting the point to NaN")
            psd[0] = np.nan
    else:
        if abs(1-abs(psd[0]-psd[1])/psd[0])<1.e-2:
            # warnings.warn("Unstability at the beginning of freq vector, removing point")
            # psd=psd[1:]
            # freq=freq[1:]
            warnings.warn("Unstability at the beginning of freq vector, setting the point to NaN")
            psd[0] = np.nan
    if psd[-1]>psd[-2]:
        if abs(1-abs(psd[-1]-psd[-2])/psd[-1])<1.e-2:
            warnings.warn("Unstability at the end of freq vector, removing point")
            # psd=psd[0:-2]
            # freq=freq[0:-2]
            psd[-1] = np.nan
            psd[-2] = np.nan
    else:
        if abs(1-abs(psd[-2]-psd[-1])/psd[-2])<1.e-2:
            # warnings.warn("Unstability at the end of freq vector, removing point")
            # psd=psd[0:-2]
            # freq=freq[0:-2]
            warnings.warn("Unstability at the end of freq vector, setting the point point to NaN")
            psd[-1] = np.nan
            psd[-2] = np.nan

    # output result
    res_dict = {
        'freq': np.asarray(freq),
        'psd': np.asarray(psd),
    }

    return res_dict


[docs]def periodogram(ys, ts, window='hann', nfft=None,
           return_onesided=True, detrend = None, sg_kwargs=None,
           gaussianize=False, standardize=True,
           scaling='density'):
    ''' Spectral density estimation using a Blackman-Tukey periodogram

    Based on the `function from scipy <https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.periodogram.html>`_.

    Parameters
    ----------

    ys : array

        a time series

    ts : array

        time axis of the time series

    window : string or tuple

        Desired window to use. Possible values:

            - boxcar (default)
            - triang
            - blackman
            - hamming
            - hann
            - bartlett
            - flattop
            - parzen
            - bohman
            - blackmanharris
            - nuttail
            - barthann
            - kaiser (needs beta)
            - gaussian (needs standard deviation)
            - general_gaussian (needs power, width)
            - slepian (needs width)
            - dpss (needs normalized half-bandwidth)
            - chebwin (needs attenuation)
            - exponential (needs decay scale)
            - tukey (needs taper fraction)

        If the window requires no parameters, then window can be a string.
        If the window requires parameters, then window must be a tuple with the first argument the string name of the window, and the next arguments the needed parameters.
        If window is a floating point number, it is interpreted as the beta parameter of the kaiser window.

    nfft: int

        Length of the FFT used, if a zero padded FFT is desired. If None, the FFT length is nperseg

    return_onesided : bool

        If True, return a one-sided spectrum for real data. If False return a two-sided spectrum. Defaults to True, but for complex data, a two-sided spectrum is always returned.

    detrend : str

        If None, no detrending is applied. Available detrending methods:

            - None - no detrending will be applied (default);
            - linear - a linear least-squares fit to `ys` is subtracted;
            - constant - the mean of `ys` is subtracted
            - savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
            - emd - Empirical mode decomposition

    sg_kwargs : dict

        The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.

    gaussianize : bool

        If True, gaussianizes the timeseries

    standardize : bool

        If True, standardizes the timeseries

    scaling : {"density,"spectrum}

        Selects between computing the power spectral density (‘density’) where Pxx has units of V**2/Hz and computing the power spectrum (‘spectrum’) where Pxx has units of V**2, if x is measured in V and fs is measured in Hz. Defaults to ‘density'

    Returns
    -------

    res_dict : dict

        the result dictionary, including

        - freq (array): the frequency vector
        - psd (array): the spectral density vector

    See Also
    --------
    pyleoclim.utils.spectral.welch : Estimate power spectral density using the welch method
    pyleoclim.utils.spectral.mtm : Retuns spectral density using a multi-taper method
    pyleoclim.utils.spectral.lomb_scargle : Return the computed periodogram using lomb-scargle algorithm
    pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
    pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
    pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
    pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods  
    pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution 
    pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.

    '''
    
    if standardize == True:
        warnings.warn('Standardizing the timeseries')
    
    ts = np.array(ts)
    ys = np.array(ys)

    if len(ts) != len(ys):
        raise ValueError('Time and value axis should be the same length')

        # remove NaNs
    ys, ts = clean_ts(ys,ts)
    # check for evenly-spaced
    check = is_evenly_spaced(ts)
    if check == False:
        raise ValueError('For the Periodogram method, data should be evenly spaced')
    # preprocessing
    ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
               gaussianize=gaussianize, standardize=standardize)

    # calculate sampling frequency fs
    dt = np.median(np.diff(ts))
    fs = 1 / dt

    # spectral analysis
    freq, psd = signal.periodogram(ys, fs, window=window, nfft=nfft,
                                   detrend=False, return_onesided=return_onesided,
                                   scaling=scaling, axis=-1)

    # fix the zero frequency point
    if freq[0] == 0:
        psd[0] = np.nan

    # output result
    res_dict = {
        'freq': np.asarray(freq),
        'psd': np.asarray(psd),
    }

    return res_dict


[docs]def wwz_psd(ys, ts, freq=None, freq_method='log', freq_kwargs=None,
            tau=None, c=1e-3, nproc=8,
            detrend=False, sg_kwargs=None, gaussianize=False,
            standardize=True, Neff_threshold=3, anti_alias=False, avgs=2,
            method='Kirchner_numba', wwa=None, wwz_Neffs=None, wwz_freq=None):
    ''' Spectral estimation using the Weighted Wavelet Z-transform
    
    The Weighted wavelet Z-transform (WWZ) is based on Morlet wavelet spectral estimation, using
    least squares minimization to suppress the energy leakage caused by data gaps.
    WWZ does not rely on interpolation or detrending, and is appropriate for unevenly-spaced datasets.
    In particular, we use the variant of Kirchner & Neal (2013), in which basis rotations mitigate the
    numerical instability that occurs in pathological cases with the original algorithm (Foster, 1996).
    The WWZ method has one adjustable parameter, a decay constant `c` that balances the time and frequency
    resolutions of the analysis. The smaller this constant is, the sharper the peaks.
    The default value is 1e-3 to obtain smooth spectra that lend themselves to better scaling exponent
    estimation, while still capturing the main periodicities. 

     Note that scalogram applications use the larger value (8π2)−1, justified elsewhere (Foster, 1996).

    Parameters
    ----------

    ys : array

        a time series, NaNs will be deleted automatically

    ts : array

        the time points, if `ys` contains any NaNs, some of the time points will be deleted accordingly

    freq : array

        vector of frequency

    freq_method : str, {'log', 'lomb_scargle', 'welch', 'scale', 'nfft'}

        Method to generate the frequency vector if not set directly. The following options are avialable:

        - 'log' (default)
        - 'lomb_scargle'
        - 'welch'
        - 'scale'
        - 'nfft'

        See :func:`pyleoclim.utils.wavelet.make_freq_vector` for details

    freq_kwargs : dict

        Arguments for the method chosen in freq_method. See specific functions in pyleoclim.utils.wavelet for details

    tau : array

        the evenly-spaced time vector for the analysis, namely the time shift for wavelet analysis

    c : float

        the decay constant that will determine the analytical resolution of frequency for analysis, the smaller the higher resolution;
        the default value 1e-3 is good for most of the spectral analysis cases

    nproc : int

        the number of processes for multiprocessing

    detrend : str, {None, 'linear', 'constant', 'savitzy-golay'}

        Methods for detrending include:

        - None: the original time series is assumed to have no trend;
        - 'linear': a linear least-squares fit to `ys` is subtracted;
        - 'constant': the mean of `ys` is subtracted
        - 'savitzy-golay': ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.

    sg_kwargs : dict

        The parameters for the Savitzky-Golay filters. See :func:`pyleoclim.utils.filter.savitzky_golay()` for details.

    gaussianize : bool

        If True, gaussianizes the timeseries

    standardize : bool

        If True, standardizes the timeseries

    method : string, {'Foster', 'Kirchner', 'Kirchner_f2py', 'Kirchner_numba'}

        Available specific implementation of WWZ include:

        - 'Foster': the original WWZ method;
        - 'Kirchner': the method Kirchner adapted from Foster;
        - 'Kirchner_f2py':  the method Kirchner adapted from Foster, implemented with f2py for acceleration;
        - 'Kirchner_numba':  the method Kirchner adapted from Foster, implemented with Numba for acceleration (default);

    Neff_threshold : int

        threshold for the effective number of points

    anti_alias : bool

        If True, uses anti-aliasing

    avgs : int

        flag for whether spectrum is derived from instantaneous point measurements (avgs<>1)
        OR from measurements averaged over each sampling interval (avgs==1)

    wwa : array

        the weighted wavelet amplitude, returned from pyleoclim.utils.wavelet.wwz

    wwz_Neffs : array

        the matrix of effective number of points in the time-scale coordinates,
        returned from pyleoclim.utils.wavelet.wwz

    wwz_freq : array

        the returned frequency vector from pyleoclim.utils.wavelet.wwz

    Returns
    -------

    res : namedtuple

        a namedtuple that includes below items

    psd : array

        power spectral density

    freq : array

        vector of frequency

    See Also
    --------
    pyleoclim.utils.spectral.periodogram : Estimate power spectral density using a periodogram
    pyleoclim.utils.spectral.mtm : Retuns spectral density using a multi-taper method
    pyleoclim.utils.spectral.lomb_scargle : Return the computed periodogram using lomb-scargle algorithm
    pyleoclim.utils.spectral.welch : Estimate power spectral density using the Welch method
    pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
    pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
    pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods  
    pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution 
    pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.

    References
    ----------
    - Foster, G. (1996). Wavelets for period analysis of unevenly sampled time series. The Astronomical Journal, 112(4), 1709-1729.
    - Kirchner, J. W. (2005). Aliasin in 1/f^a noise spectra: origins, consequences, and remedies. Physical Review E covering statistical, nonlinear, biological, and soft matter physics, 71, 66110.
    - Kirchner, J. W. and Neal, C. (2013). Universal fractal scaling in stream chemistry and its impli-cations for solute transport and water quality trend detection. Proc Natl Acad Sci USA 110:12213–12218.
    '''
    
    if standardize == True:
        warnings.warn('Standardizing the timeseries')
    
    ys_cut, ts_cut, freq, tau = prepare_wwz(ys, ts, freq=freq,
                                            freq_method=freq_method,
                                            freq_kwargs=freq_kwargs,tau=tau)

    # get wwa but AR1_q is not needed here so set nMC=0
    #  wwa, _, _, coi, freq, _, Neffs, _ = wwz(ys_cut, ts_cut, freq=freq, tau=tau, c=c, nproc=nproc, nMC=0,
    if wwa is None or wwz_Neffs is None or wwz_freq is None:
        res_wwz = wwz(ys_cut, ts_cut, freq=freq, tau=tau, c=c, nproc=nproc,
                  detrend=detrend, sg_kwargs=sg_kwargs,
                  gaussianize=gaussianize, standardize=standardize, method=method)
        wwa = res_wwz.amplitude
        wwz_Neffs = res_wwz.Neffs
        wwz_freq = res_wwz.freq
        
    psd = wwa2psd(wwa, ts_cut, wwz_Neffs, freq=wwz_freq, Neff_threshold=Neff_threshold, anti_alias=anti_alias, avgs=avgs)
    Results = collections.namedtuple('Results', ['psd', 'freq'])
    res = Results(psd=psd, freq=freq)

    return res

[docs]def cwt_psd(ys, ts, freq=None, freq_method='log', freq_kwargs=None,scale = None, 
            detrend=False,sg_kwargs={}, gaussianize=False, standardize =True, pad=False, 
            mother='MORLET',param=None, cwt_res=None):
    ''' Spectral estimation using the continuous wavelet transform
    Uses the Torrence and Compo [1998] continuous wavelet transform implementation

    Parameters
    ----------

    ys : numpy.array

        the time series.

    ts : numpy.array

        the time axis.

    freq : numpy.array, optional

        The frequency vector. The default is None, which will prompt the use of one the underlying functions

    freq_method : string, optional

        The method by which to obtain the frequency vector. The default is 'log'.
        Options are 'log' (default), 'nfft', 'lomb_scargle', 'welch', and 'scale'

    freq_kwargs : dict, optional

        Optional parameters for the choice of the frequency vector. See make_freq_vector and additional methods for details. The default is {}.

    scale : numpy.array

        Optional scale vector in place of a frequency vector. Default is None. If scale is not None, frequency method and attached arguments will be ignored. 

    detrend : bool, string, {'linear', 'constant', 'savitzy-golay', 'emd'}

        Whether to detrend and with which option. The default is False.

    sg_kwargs : dict, optional

        Additional parameters for the savitzy-golay method. The default is {}.

    gaussianize : bool, optional

        Whether to gaussianize. The default is False.

    standardize : bool, optional

        Whether to standardize. The default is True.

    pad : bool, optional

        Whether or not to pad the timeseries. with zeroes to get N up to the next higher power of 2. 
        This prevents wraparound from the end of the time series to the beginning, and also speeds up the FFT's used to do the wavelet transform.
        This will not eliminate all edge effects. The default is False.

    mother : string, optional

        the mother wavelet function. The default is 'MORLET'. Options are: 'MORLET', 'PAUL', or 'DOG'
        
    param : flaot, option

        the mother wavelet parameter. The default is None since it varies for each mother

            - For 'MORLET' this is k0 (wavenumber), default is 6.
            - For 'PAUL' this is m (order), default is 4.
            - For 'DOG' this is m (m-th derivative), default is 2.

    cwt_res : dict

        Results from pyleoclim.utils.wavelet.cwt

    Returns
    -------
    res : dict
        Dictionary containing:
            - psd: the power density function
            - freq: frequency vector
            - scale: the scale vector
            - mother: the mother wavelet
            - param : the wavelet parameter
            
    See also
    --------
    
    pyleoclim.utils.wavelet.make_freq_vector : make the frequency vector with various methods    
    pyleoclim.utils.wavelet.cwt: Torrence and Compo implementation of the continuous wavelet transform 
    pyleoclim.utils.spectral.periodogram : Spectral estimation using Blackman-Tukey's periodogram
    pyleoclim.utils.spectral.mtm : Spectral estimation using the multi-taper method
    pyleoclim.utils.spectral.lomb_scargle : Spectral estimation using the lomb-scargle periodogram
    pyleoclim.utils.spectral.welch : Spectral estimation using Welch's periodogram
    pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
    pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods  
    pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution 
    pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.
    
    References
    ----------
    
    Torrence, C. and G. P. Compo, 1998: A Practical Guide to Wavelet Analysis. Bull. Amer. Meteor. Soc., 79, 61-78.
    Python routines available at http://paos.colorado.edu/research/wavelets/
    
    '''
    
    if standardize == True:
        warnings.warn('Standardizing the timeseries')
    
        #get the wavelet:
    if cwt_res is None:
        cwt_res = cwt(ys,ts,freq=freq, freq_method=freq_method, freq_kwargs=freq_kwargs,
              scale = scale, detrend=detrend,sg_kwargs=sg_kwargs, gaussianize=gaussianize, 
              standardize = standardize, pad=pad, mother=mother, param=param) 
        n= len(ts)
    else:
        n=len(cwt_res.time)
    
    psd = np.sum(cwt_res.amplitude.T**2,axis=1)/n
    
    
    Results = collections.namedtuple('Results', ['psd', 'freq','scale','mother','param'])
    res = Results(psd=psd, freq=cwt_res.freq, scale=cwt_res.scale, mother=cwt_res.mother,param=cwt_res.param)

    return res

[docs]def beta_estimation(psd, freq, fmin=None, fmax=None, logf_binning_step='max', verbose=False):
    ''' Estimate the scaling exponent of a power spectral density.
    
    Models the spectrum as :math: `S(f) \propto 1/f^{\beta}`. For instance:
    - :math: `\beta = 0` corresponds to white noise
    - :math: `\beta = 1` corresponds to pink noise
    - :math: `\beta = 2` corresponds to red noise (Brownian motion)

    
    Parameters
    ----------

    psd : array

        the power spectral density

    freq : array

        the frequency vector

    fmin : float

        the min of frequency range for beta estimation

    fmax : float

        the max of frequency range for beta estimation

    verbose : bool

         if True, will print out debug information

    Returns
    -------

    beta : float

        the estimated slope

    f_binned : array

        binned frequency vector

    psd_binned : array

        binned power spectral density

    Y_reg : array

        prediction based on linear regression

    '''
    # drop the PSD at frequency zero
    if freq[0] == 0:
        psd = psd[1:]
        freq = freq[1:]

    if fmin is None or fmin == 0:
        fmin = np.min(freq)

    if fmax is None:
        fmax = np.max(freq)

    Results = collections.namedtuple('Results', ['beta', 'f_binned', 'psd_binned', 'Y_reg', 'std_err'])
    if np.max(freq) < fmax or np.min(freq) > fmin:
        if verbose:
            print(fmin, fmax)
            print(np.min(freq), np.max(freq))
            print('WRONG')
        res = Results(beta=np.nan, f_binned=np.nan, psd_binned=np.nan, Y_reg=np.nan, std_err=np.nan)
        return res

    # frequency binning start
    fminindx = np.where(freq >= fmin)[0][0]
    fmaxindx = np.where(freq <= fmax)[0][-1]

    if fminindx >= fmaxindx:
        res = Results(beta=np.nan, f_binned=np.nan, psd_binned=np.nan, Y_reg=np.nan, std_err=np.nan)
        return res

    logf = np.log(freq)
    if logf_binning_step == 'max':
        logf_step = np.max(np.diff(logf))
    elif logf_binning_step == 'first':
        logf_step = logf[fminindx+1] - logf[fminindx]
    else:
        raise ValueError('the option for logf_binning_step is unknown')

    logf_start = logf[fminindx]
    logf_end = logf[fmaxindx]
    logf_binedges = np.arange(logf_start, logf_end+logf_step, logf_step)

    n_intervals = np.size(logf_binedges)-1
    logpsd_binned = np.empty(n_intervals)
    logf_binned = np.empty(n_intervals)

    logpsd = np.log(psd)

    for i in range(n_intervals):
        lb = logf_binedges[i]
        ub = logf_binedges[i+1]
        q = np.where((logf > lb) & (logf <= ub))

        logpsd_binned[i] = np.nanmean(logpsd[q])
        logf_binned[i] = (ub + lb) / 2

    f_binned = np.exp(logf_binned)
    psd_binned = np.exp(logpsd_binned)
    # frequency binning end

    # linear regression below
    Y = np.log10(psd_binned)
    X = np.log10(f_binned)
    X_ex = sm.add_constant(X)

    # note below: 'drop' is used for missing, so NaNs will be removed, and we need to put it back in the end
    model = sm.OLS(Y, X_ex, missing='drop')
    results = model.fit()

    if np.size(results.params) < 2:
        beta = np.nan
        Y_reg = np.nan
        std_err = np.nan
    else:
        beta = -results.params[1]  # the slope we want
        Y_reg_raw = 10**model.predict(results.params)  # prediction based on linear regression
        # handeling potential NaNs in psd_binned
        Y_reg = []
        i = 0
        for psd in psd_binned:
            if np.isnan(psd):
                Y_reg.append(np.nan)
            else:
                Y_reg.append(Y_reg_raw[i])
                i += 1

        Y_reg = np.array(Y_reg)

        std_err = results.bse[1]

    res = Results(beta=beta, f_binned=f_binned, psd_binned=psd_binned, Y_reg=Y_reg, std_err=std_err)

    return res

[docs]def beta2Hurst(beta):
    ''' Translates spectral slope to Hurst exponent

    Parameters
    ----------

    beta : float
        the estimated slope of a power spectral density :math: `S(f) \propto 1/f^{\beta}`
        
    Returns
    -------

    H : float
        Hurst index, should be in (0, 1)
    
    See also
    --------
    pyleoclim.utils.spectral.beta_estimation: Estimate the scaling exponent of a power spectral density.
    
    '''
    H = (beta-1)/2

    return H

[docs]def psd_ar(var_noise, freq, ar_params, f_sampling):
    ''' Theoretical power spectral density (PSD) of an autoregressive model

    Parameters
    ----------

    var_noise : float

        the variance of the noise of the AR process

    freq : array

        vector of frequency

    ar_params : array

        autoregressive coefficients, not including zero-lag

    f_sampling : float

        sampling frequency

    Returns
    -------

    psd : array
        power spectral density

    '''
    p = np.size(ar_params)

    tmp = np.ndarray(shape=(p, np.size(freq)), dtype=complex)
    for k in range(p):
        tmp[k, :] = np.exp(-1j*2*np.pi*(k+1)*freq/f_sampling)

    psd = var_noise / np.absolute(1-np.sum(ar_params*tmp, axis=0))**2

    return psd



[docs]def psd_fBM(freq, ts, H):
    ''' Theoretical power spectral density of a fractional Brownian motion

    Parameters
    ----------

    freq : array

        vector of frequency

    ts : array

        the time axis of the time series

    H : float
    
        Hurst exponent, should be in (0, 1)

    Returns
    -------

    psd : array
        power spectral density

    References
    ----------

    Flandrin, P. On the spectrum of fractional Brownian motions.
    IEEE Transactions on Information Theory 35, 197–199 (1989).

    '''
    nf = np.size(freq)
    psd = np.ndarray(shape=(nf))
    T = np.max(ts) - np.min(ts)

    omega = 2 * np.pi * freq

    for k in range(nf):
        tmp = 2 * omega[k] * T
        psd[k] = (1 - 2**(1 - 2*H)*np.sin(tmp)/tmp) / np.abs(omega[k])**(1 + 2*H)

    return psd