#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Utilities for spectral analysis, including WWZ, CWT, Lomb-Scargle, MTM, and Welch.
Designed for NumPy arrays, either evenly spaced or not (method-dependent).
All spectral methods must return a dictionary containing one vector for the
frequency axis and the power spectral density (PSD).
Additional utilities help compute an optimal frequency vector or estimate scaling exponents.
"""
import numpy as np
from scipy import signal
import nitime.algorithms as nialg
import statsmodels.api as sm
import collections
import warnings
__all__ = [
'wwz_psd',
'cwt_psd',
'mtm',
'lomb_scargle',
'welch',
'periodogram'
]
from .tsbase import (
is_evenly_spaced,
clean_ts
)
from .tsutils import preprocess
from .wavelet import (
make_freq_vector,
prepare_wwz,
wwz,
wwa2psd,
cwt,
)
#-----------
#Wrapper
#-----------
#---------
#Main functions
#---------
[docs]def welch(ys, ts, window='hann',nperseg=None, noverlap=None, nfft=None,
return_onesided=True, detrend = None, sg_kwargs = None,
gaussianize=False, standardize=True,
scaling='density', average='mean'):
'''Estimate power spectral density using Welch's periodogram
Wrapper for the function implemented in scipy.signal.welch
See https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html for details.
Welch's method is an approach for spectral density estimation. It computes
an estimate of the power spectral density by dividing the data into overlapping
segments, computing a modified periodogram for each segment and averaging
the periodograms to lower the estimator's variance.
Parameters
----------
ys : array
a time series
ts : array
time axis of the time series
window : string or tuple
Desired window to use. Possible values:
- boxcar
- triang
- blackman
- hamming
- hann (default)
- bartlett
- flattop
- parzen
- bohman
- blackmanharris
- nuttail
- barthann
- kaiser (needs beta)
- gaussian (needs standard deviation)
- general_gaussian (needs power, width)
- slepian (needs width)
- dpss (needs normalized half-bandwidth)
- chebwin (needs attenuation)
- exponential (needs decay scale)
- tukey (needs taper fraction)
If the window requires no parameters, then window can be a string.
If the window requires parameters, then window must be a tuple with the first argument the string name of the window, and the next arguments the needed parameters.
If window is a floating point number, it is interpreted as the beta parameter of the kaiser window.
nperseg : int
Length of each segment. If none, nperseg=len(ys)/2. Default to None This will give three segments with 50% overlap
noverlap : int
Number of points to overlap. If None, noverlap=nperseg//2. Defaults to None, represents 50% overlap
nfft: int
Length of the FFT used, if a zero padded FFT is desired. If None, the FFT length is nperseg
return_onesided : bool
If True, return a one-sided spectrum for real data. If False return a two-sided spectrum. Defaults to True, but for complex data, a two-sided spectrum is always returned.
detrend : str
If None, no detrending is applied. Available detrending methods:
- None - no detrending will be applied (default);
- linear - a linear least-squares fit to `ys` is subtracted;
- constant - the mean of `ys` is subtracted
- savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
- emd - Empirical mode decomposition
sg_kwargs : dict
The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.
gaussianize : bool
If True, gaussianizes the timeseries
standardize : bool
If True, standardizes the timeseries
scaling : {"density,"spectrum}
Selects between computing the power spectral density (‘density’) where Pxx has units of V**2/Hz and computing the power spectrum (‘spectrum’) where Pxx has units of V**2, if x is measured in V and fs is measured in Hz. Defaults to ‘density'
average : {'mean','median'}
Method to use when combining periodograms. Defaults to ‘mean’.
Returns
-------
res_dict : dict
the result dictionary, including
- freq (array): the frequency vector
- psd (array): the spectral density vector
See also
--------
pyleoclim.utils.spectral.periodogram : Spectral density estimation using a Blackman-Tukey periodogram
pyleoclim.utils.spectral.mtm : Spectral density estimation using the multi-taper method
pyleoclim.utils.spectral.lomb_scargle : Lomb-scargle priodogram
pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods
pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution
pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.
References
----------
P. Welch, “The use of the fast Fourier transform for the estimation of power spectra:
A method based on time averaging over short, modified periodograms”,
IEEE Trans. Audio Electroacoust. vol. 15, pp. 70-73, 1967.
'''
if standardize == True:
warnings.warn('Standardizing the timeseries')
ts = np.array(ts)
ys = np.array(ys)
if len(ts) != len(ys):
raise ValueError('Time and value axis should be the same length')
if nperseg == None:
nperseg = len(ys/2)
# remove NaNs
ys, ts = clean_ts(ys,ts)
# check for evenly-spaced
check = is_evenly_spaced(ts)
if check == False:
raise ValueError('For the Welch method, data should be evenly spaced')
# preprocessing
ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
gaussianize=gaussianize, standardize=standardize)
# calculate sampling frequency fs
dt = np.median(np.diff(ts))
fs = 1 / dt
# spectral analysis with scipy welch
freq, psd = signal.welch(ys, fs=fs, window=window,nperseg=nperseg,noverlap=noverlap,
nfft=nfft, return_onesided=return_onesided, scaling=scaling,
average=average, detrend = False, axis=-1)
# fix zero frequency point
if freq[0] == 0:
psd[0] = np.nan
# output result
res_dict = {
'freq': np.asarray(freq),
'psd' : np.asarray(psd),
}
return res_dict
[docs]def mtm(ys, ts, NW=None, BW=None, detrend = None, sg_kwargs=None,
gaussianize=False, standardize=True, adaptive=False, jackknife=True,
low_bias=True, sides='default', nfft=None):
''' Spectral density using the multi-taper method.
If the NW product, or the BW and Fs in Hz are not specified by the user, a bandwidth of 4 times the fundamental frequency, corresponding to NW = 4 will be used.
Based on the nitime package: http://nipy.org/nitime/api/generated/nitime.algorithms.spectral.html
Parameters
----------
ys : array
a time series
ts : array
time axis of the time series
NW : float
The time-bandwidth product NW governs the width (and therefore, height) of a peak, which can take the values [2, 5/2, 3, 7/2, 4]. This product controls the classical bias-variance tradeoff inherent to spectral estimation: a large product limits the variance but increases leakage out of harmonic line. In other words, small values of NW mean high spectral resolution, low bias, but high variance. Large values of the parameter mean lower resolution, higher bias, but reduced variance. There is no automated way to choose this parameter, and the default (NW=4) corresponds to a conservative choice with low variance.
For a demonstration on the effect of this parameter, see the spectral analysis notebook in our tutorials: https://pyleoclim-util.readthedocs.io/en/master/tutorials.html.
BW : float
The sampling-relative bandwidth of the data tapers
detrend : str
If None, no detrending is applied. Available detrending methods:
- None - no detrending will be applied (default);
- linear - a linear least-squares fit to `ys` is subtracted;
- constant - the mean of `ys` is subtracted
- savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
- emd - Empirical mode decomposition
sg_kwargs : dict
The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.
gaussianize : bool
If True, gaussianizes the timeseries
standardize : bool
If True, standardizes the timeseries
adaptive : {True/False}
Use an adaptive weighting routine to combine the PSD estimates of
different tapers.
jackknife : {True/False}
Use the jackknife method to make an estimate of the PSD variance
at each point.
low_bias : {True/False}
Rather than use 2NW tapers, only use the tapers that have better than
90% spectral concentration within the bandwidth (still using
a maximum of 2NW tapers)
sides : str (optional) [ 'default' | 'onesided' | 'twosided' ]
This determines which sides of the spectrum to return.
For complex-valued inputs, the default is two-sided, for real-valued
inputs, default is one-sided Indicates whether to return a one-sided
or two-sided
Returns
-------
res_dict : dict
the result dictionary, including
- freq (array): the frequency vector
- psd (array): the spectral density vector
See Also
--------
pyleoclim.utils.spectral.periodogram : Spectral density estimation using a Blackman-Tukey periodogram
pyleoclim.utils.spectral.welch : spectral estimation using Welch's periodogram
pyleoclim.utils.spectral.lomb_scargle : Lomb-scargle priodogram
pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods
pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution
pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.
'''
if standardize == True:
warnings.warn('Standardizing the timeseries')
# preprocessing
ts = np.array(ts)
ys = np.array(ys)
if len(ts) != len(ys):
raise ValueError('Time and value axis should be the same length')
# remove NaNs
ys, ts = clean_ts(ys,ts)
# check for evenly-spaced
check = is_evenly_spaced(ts)
if check == False:
raise ValueError('For the MTM method, data should be evenly spaced')
# preprocessing
ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
gaussianize=gaussianize, standardize=standardize)
# calculate sampling frequency fs
dt = np.median(np.diff(ts))
fs = 1 / dt
# spectral analysis
freq, psd, nu = nialg.multi_taper_psd(ys, Fs=fs, NW=NW, BW=BW,adaptive=adaptive,
jackknife=jackknife, low_bias=low_bias,
sides=sides,NFFT=nfft) # call nitime func
# fix the zero frequency point
if freq[0] == 0:
psd[0] = np.nan
# output result
res_dict = {
'freq': np.asarray(freq),
'psd': np.asarray(psd),
}
return res_dict
[docs]def lomb_scargle(ys, ts, freq=None, freq_method='lomb_scargle',
freq_kwargs=None, n50=3, window='hann',
detrend = None, sg_kwargs=None,
gaussianize=False,
standardize=True,
average='mean'):
""" Lomb-scargle priodogram
Appropriate for unevenly-spaced arrays.
Uses the lomb-scargle implementation from scipy.signal: https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.lombscargle.html
Parameters
----------
ys : array
a time series
ts : array
time axis of the time series
freq : str or array
vector of frequency.
If string, uses the following method:
freq_method : str
Method to generate the frequency vector if not set directly. The following options are avialable:
- log
- lomb_scargle (default)
- welch
- scale
- nfft
See utils.wavelet.make_freq_vector for details
freq_kwargs : dict
Arguments for the method chosen in freq_method. See specific functions in utils.wavelet for details
By default, uses dt=median(ts), ofac=4 and hifac=1 for Lomb-Scargle
n50: int
The number of 50% overlapping segment to apply
window : str or tuple
Desired window to use. Possible values:
- boxcar
- triang
- blackman
- hamming
- hann (default)
- bartlett
- flattop
- parzen
- bohman
- blackmanharris
- nuttail
- barthann
- kaiser (needs beta)
- gaussian (needs standard deviation)
- general_gaussian (needs power, width)
- slepian (needs width)
- dpss (needs normalized half-bandwidth)
- chebwin (needs attenuation)
- exponential (needs decay scale)
- tukey (needs taper fraction)
If the window requires no parameters, then window can be a string.
If the window requires parameters, then window must be a tuple with the first argument the string name of the window, and the next arguments the needed parameters.
If window is a floating point number, it is interpreted as the beta parameter of the kaiser window.
detrend : str
If None, no detrending is applied. Available detrending methods:
- None - no detrending will be applied (default);
- linear - a linear least-squares fit to `ys` is subtracted;
- constant - the mean of `ys` is subtracted
- savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
- emd - Empirical mode decomposition
sg_kwargs : dict
The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.
gaussianize : bool
If True, gaussianizes the timeseries
standardize : bool
If True, standardizes the timeseriesprep_args : dict
average : {'mean','median'}
Method to use when averaging periodograms. Defaults to ‘mean’.
Returns
-------
res_dict : dict
the result dictionary, including
- freq (array): the frequency vector
- psd (array): the spectral density vector
See Also
--------
pyleoclim.utils.spectral.periodogram : Estimate power spectral density using a periodogram
pyleoclim.utils.spectral.mtm : Retuns spectral density using a multi-taper method
pyleoclim.utils.spectral.welch : Returns power spectral density using the Welch method
pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods
pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution
pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.
References
----------
Lomb, N. R. (1976). Least-squares frequency analysis of unequally spaced data. Astrophysics and Space Science 39, 447-462.
Scargle, J. D. (1982). Studies in astronomical time series analysis. II. Statistical aspects of spectral analyis of unvenly spaced data. The Astrophysical Journal, 263(2), 835-853.
Scargle, J. D. (1982). Studies in astronomical time series analysis. II. Statistical aspects of spectral analyis of unvenly spaced data. The Astrophysical Journal, 263(2), 835-853.
"""
if standardize == True:
warnings.warn('Standardizing the timeseries')
ts = np.array(ts)
ys = np.array(ys)
if len(ts) != len(ys):
raise ValueError('Time and value axis should be the same length')
if n50<=0:
raise ValueError('Number of overlapping segments should be greater than 1')
# remove NaNs
ys, ts = clean_ts(ys,ts)
# preprocessing
ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
gaussianize=gaussianize, standardize=standardize)
# divide into segments
nseg=int(np.floor(2*len(ts)/(n50+1)))
index=np.array(np.arange(0,len(ts),nseg/2),dtype=int)
if len(index) == n50+2:
index[-1] = len(ts)
else:
index=np.append(index,len(ts)) #make it ends at the time series
ts_seg=[]
ys_seg=[]
if n50>1:
for idx,i in enumerate(np.arange(0,len(index)-2,1)):
ts_seg.append(ts[index[idx]:index[idx+2]])
ys_seg.append(ys[index[idx]:index[idx+2]])
else:
ts_seg.append(ts)
ys_seg.append(ys)
if freq is None:
freq_kwargs = {} if freq_kwargs is None else freq_kwargs.copy()
if 'dt' not in freq_kwargs.keys():
dt = np.median(np.diff(ts))
freq_kwargs.update({'dt':dt})
freq = make_freq_vector(ts_seg[0],
method=freq_method,
**freq_kwargs)
#remove zero freq
if freq[0]==0:
freq=np.delete(freq,0)
freq_angular = 2 * np.pi * freq
psd_seg=[]
for idx,item in enumerate(ys_seg):
# calculate the frequency vector if needed
win=signal.get_window(window,len(ts_seg[idx]))
scale = len(ts_seg[idx])*2*np.mean(np.diff(ts_seg[idx]))/((win*win).sum())
psd_seg.append(signal.lombscargle(ts_seg[idx],
item*win,
freq_angular,precenter=True)*scale)
# average them up
if average=='mean':
psd=np.mean(psd_seg,axis=0)
elif average=='median':
psd=np.median(psd_seg,axis=0)
else:
raise ValueError('Average should either be set to mean or median')
# Fix possible problems at the edge
if psd[0]<psd[1]:
if abs(1-abs(psd[1]-psd[0])/psd[1])<1.e-2:
# warnings.warn("Unstability at the beginning of freq vector, removing point")
# psd=psd[1:]
# freq=freq[1:]
warnings.warn("Unstability at the beginning of freq vector, setting the point to NaN")
psd[0] = np.nan
else:
if abs(1-abs(psd[0]-psd[1])/psd[0])<1.e-2:
# warnings.warn("Unstability at the beginning of freq vector, removing point")
# psd=psd[1:]
# freq=freq[1:]
warnings.warn("Unstability at the beginning of freq vector, setting the point to NaN")
psd[0] = np.nan
if psd[-1]>psd[-2]:
if abs(1-abs(psd[-1]-psd[-2])/psd[-1])<1.e-2:
warnings.warn("Unstability at the end of freq vector, removing point")
# psd=psd[0:-2]
# freq=freq[0:-2]
psd[-1] = np.nan
psd[-2] = np.nan
else:
if abs(1-abs(psd[-2]-psd[-1])/psd[-2])<1.e-2:
# warnings.warn("Unstability at the end of freq vector, removing point")
# psd=psd[0:-2]
# freq=freq[0:-2]
warnings.warn("Unstability at the end of freq vector, setting the point point to NaN")
psd[-1] = np.nan
psd[-2] = np.nan
# output result
res_dict = {
'freq': np.asarray(freq),
'psd': np.asarray(psd),
}
return res_dict
[docs]def periodogram(ys, ts, window='hann', nfft=None,
return_onesided=True, detrend = None, sg_kwargs=None,
gaussianize=False, standardize=True,
scaling='density'):
''' Spectral density estimation using a Blackman-Tukey periodogram
Based on the `function from scipy <https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.periodogram.html>`_.
Parameters
----------
ys : array
a time series
ts : array
time axis of the time series
window : string or tuple
Desired window to use. Possible values:
- boxcar (default)
- triang
- blackman
- hamming
- hann
- bartlett
- flattop
- parzen
- bohman
- blackmanharris
- nuttail
- barthann
- kaiser (needs beta)
- gaussian (needs standard deviation)
- general_gaussian (needs power, width)
- slepian (needs width)
- dpss (needs normalized half-bandwidth)
- chebwin (needs attenuation)
- exponential (needs decay scale)
- tukey (needs taper fraction)
If the window requires no parameters, then window can be a string.
If the window requires parameters, then window must be a tuple with the first argument the string name of the window, and the next arguments the needed parameters.
If window is a floating point number, it is interpreted as the beta parameter of the kaiser window.
nfft: int
Length of the FFT used, if a zero padded FFT is desired. If None, the FFT length is nperseg
return_onesided : bool
If True, return a one-sided spectrum for real data. If False return a two-sided spectrum. Defaults to True, but for complex data, a two-sided spectrum is always returned.
detrend : str
If None, no detrending is applied. Available detrending methods:
- None - no detrending will be applied (default);
- linear - a linear least-squares fit to `ys` is subtracted;
- constant - the mean of `ys` is subtracted
- savitzy-golay - ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
- emd - Empirical mode decomposition
sg_kwargs : dict
The parameters for the Savitzky-Golay filters. see pyleoclim.utils.filter.savitzy_golay for details.
gaussianize : bool
If True, gaussianizes the timeseries
standardize : bool
If True, standardizes the timeseries
scaling : {"density,"spectrum}
Selects between computing the power spectral density (‘density’) where Pxx has units of V**2/Hz and computing the power spectrum (‘spectrum’) where Pxx has units of V**2, if x is measured in V and fs is measured in Hz. Defaults to ‘density'
Returns
-------
res_dict : dict
the result dictionary, including
- freq (array): the frequency vector
- psd (array): the spectral density vector
See Also
--------
pyleoclim.utils.spectral.welch : Estimate power spectral density using the welch method
pyleoclim.utils.spectral.mtm : Retuns spectral density using a multi-taper method
pyleoclim.utils.spectral.lomb_scargle : Return the computed periodogram using lomb-scargle algorithm
pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods
pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution
pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.
'''
if standardize == True:
warnings.warn('Standardizing the timeseries')
ts = np.array(ts)
ys = np.array(ys)
if len(ts) != len(ys):
raise ValueError('Time and value axis should be the same length')
# remove NaNs
ys, ts = clean_ts(ys,ts)
# check for evenly-spaced
check = is_evenly_spaced(ts)
if check == False:
raise ValueError('For the Periodogram method, data should be evenly spaced')
# preprocessing
ys = preprocess(ys, ts, detrend=detrend, sg_kwargs=sg_kwargs,
gaussianize=gaussianize, standardize=standardize)
# calculate sampling frequency fs
dt = np.median(np.diff(ts))
fs = 1 / dt
# spectral analysis
freq, psd = signal.periodogram(ys, fs, window=window, nfft=nfft,
detrend=False, return_onesided=return_onesided,
scaling=scaling, axis=-1)
# fix the zero frequency point
if freq[0] == 0:
psd[0] = np.nan
# output result
res_dict = {
'freq': np.asarray(freq),
'psd': np.asarray(psd),
}
return res_dict
[docs]def wwz_psd(ys, ts, freq=None, freq_method='log', freq_kwargs=None,
tau=None, c=1e-3, nproc=8,
detrend=False, sg_kwargs=None, gaussianize=False,
standardize=True, Neff_threshold=3, anti_alias=False, avgs=2,
method='Kirchner_numba', wwa=None, wwz_Neffs=None, wwz_freq=None):
''' Spectral estimation using the Weighted Wavelet Z-transform
The Weighted wavelet Z-transform (WWZ) is based on Morlet wavelet spectral estimation, using
least squares minimization to suppress the energy leakage caused by data gaps.
WWZ does not rely on interpolation or detrending, and is appropriate for unevenly-spaced datasets.
In particular, we use the variant of Kirchner & Neal (2013), in which basis rotations mitigate the
numerical instability that occurs in pathological cases with the original algorithm (Foster, 1996).
The WWZ method has one adjustable parameter, a decay constant `c` that balances the time and frequency
resolutions of the analysis. The smaller this constant is, the sharper the peaks.
The default value is 1e-3 to obtain smooth spectra that lend themselves to better scaling exponent
estimation, while still capturing the main periodicities.
Note that scalogram applications use the larger value (8π2)−1, justified elsewhere (Foster, 1996).
Parameters
----------
ys : array
a time series, NaNs will be deleted automatically
ts : array
the time points, if `ys` contains any NaNs, some of the time points will be deleted accordingly
freq : array
vector of frequency
freq_method : str, {'log', 'lomb_scargle', 'welch', 'scale', 'nfft'}
Method to generate the frequency vector if not set directly. The following options are avialable:
- 'log' (default)
- 'lomb_scargle'
- 'welch'
- 'scale'
- 'nfft'
See :func:`pyleoclim.utils.wavelet.make_freq_vector` for details
freq_kwargs : dict
Arguments for the method chosen in freq_method. See specific functions in pyleoclim.utils.wavelet for details
tau : array
the evenly-spaced time vector for the analysis, namely the time shift for wavelet analysis
c : float
the decay constant that will determine the analytical resolution of frequency for analysis, the smaller the higher resolution;
the default value 1e-3 is good for most of the spectral analysis cases
nproc : int
the number of processes for multiprocessing
detrend : str, {None, 'linear', 'constant', 'savitzy-golay'}
Methods for detrending include:
- None: the original time series is assumed to have no trend;
- 'linear': a linear least-squares fit to `ys` is subtracted;
- 'constant': the mean of `ys` is subtracted
- 'savitzy-golay': ys is filtered using the Savitzky-Golay filters and the resulting filtered series is subtracted from y.
sg_kwargs : dict
The parameters for the Savitzky-Golay filters. See :func:`pyleoclim.utils.filter.savitzky_golay()` for details.
gaussianize : bool
If True, gaussianizes the timeseries
standardize : bool
If True, standardizes the timeseries
method : string, {'Foster', 'Kirchner', 'Kirchner_f2py', 'Kirchner_numba'}
Available specific implementation of WWZ include:
- 'Foster': the original WWZ method;
- 'Kirchner': the method Kirchner adapted from Foster;
- 'Kirchner_f2py': the method Kirchner adapted from Foster, implemented with f2py for acceleration;
- 'Kirchner_numba': the method Kirchner adapted from Foster, implemented with Numba for acceleration (default);
Neff_threshold : int
threshold for the effective number of points
anti_alias : bool
If True, uses anti-aliasing
avgs : int
flag for whether spectrum is derived from instantaneous point measurements (avgs<>1)
OR from measurements averaged over each sampling interval (avgs==1)
wwa : array
the weighted wavelet amplitude, returned from pyleoclim.utils.wavelet.wwz
wwz_Neffs : array
the matrix of effective number of points in the time-scale coordinates,
returned from pyleoclim.utils.wavelet.wwz
wwz_freq : array
the returned frequency vector from pyleoclim.utils.wavelet.wwz
Returns
-------
res : namedtuple
a namedtuple that includes below items
psd : array
power spectral density
freq : array
vector of frequency
See Also
--------
pyleoclim.utils.spectral.periodogram : Estimate power spectral density using a periodogram
pyleoclim.utils.spectral.mtm : Retuns spectral density using a multi-taper method
pyleoclim.utils.spectral.lomb_scargle : Return the computed periodogram using lomb-scargle algorithm
pyleoclim.utils.spectral.welch : Estimate power spectral density using the Welch method
pyleoclim.utils.spectral.cwt_psd : Spectral estimation using the Continuous Wavelet Transform
pyleoclim.utils.filter.savitzy_golay : Filtering using Savitzy-Golay
pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods
pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution
pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.
References
----------
- Foster, G. (1996). Wavelets for period analysis of unevenly sampled time series. The Astronomical Journal, 112(4), 1709-1729.
- Kirchner, J. W. (2005). Aliasin in 1/f^a noise spectra: origins, consequences, and remedies. Physical Review E covering statistical, nonlinear, biological, and soft matter physics, 71, 66110.
- Kirchner, J. W. and Neal, C. (2013). Universal fractal scaling in stream chemistry and its impli-cations for solute transport and water quality trend detection. Proc Natl Acad Sci USA 110:12213–12218.
'''
if standardize == True:
warnings.warn('Standardizing the timeseries')
ys_cut, ts_cut, freq, tau = prepare_wwz(ys, ts, freq=freq,
freq_method=freq_method,
freq_kwargs=freq_kwargs,tau=tau)
# get wwa but AR1_q is not needed here so set nMC=0
# wwa, _, _, coi, freq, _, Neffs, _ = wwz(ys_cut, ts_cut, freq=freq, tau=tau, c=c, nproc=nproc, nMC=0,
if wwa is None or wwz_Neffs is None or wwz_freq is None:
res_wwz = wwz(ys_cut, ts_cut, freq=freq, tau=tau, c=c, nproc=nproc,
detrend=detrend, sg_kwargs=sg_kwargs,
gaussianize=gaussianize, standardize=standardize, method=method)
wwa = res_wwz.amplitude
wwz_Neffs = res_wwz.Neffs
wwz_freq = res_wwz.freq
psd = wwa2psd(wwa, ts_cut, wwz_Neffs, freq=wwz_freq, Neff_threshold=Neff_threshold, anti_alias=anti_alias, avgs=avgs)
Results = collections.namedtuple('Results', ['psd', 'freq'])
res = Results(psd=psd, freq=freq)
return res
[docs]def cwt_psd(ys, ts, freq=None, freq_method='log', freq_kwargs=None,scale = None,
detrend=False,sg_kwargs={}, gaussianize=False, standardize =True, pad=False,
mother='MORLET',param=None, cwt_res=None):
''' Spectral estimation using the continuous wavelet transform
Uses the Torrence and Compo [1998] continuous wavelet transform implementation
Parameters
----------
ys : numpy.array
the time series.
ts : numpy.array
the time axis.
freq : numpy.array, optional
The frequency vector. The default is None, which will prompt the use of one the underlying functions
freq_method : string, optional
The method by which to obtain the frequency vector. The default is 'log'.
Options are 'log' (default), 'nfft', 'lomb_scargle', 'welch', and 'scale'
freq_kwargs : dict, optional
Optional parameters for the choice of the frequency vector. See make_freq_vector and additional methods for details. The default is {}.
scale : numpy.array
Optional scale vector in place of a frequency vector. Default is None. If scale is not None, frequency method and attached arguments will be ignored.
detrend : bool, string, {'linear', 'constant', 'savitzy-golay', 'emd'}
Whether to detrend and with which option. The default is False.
sg_kwargs : dict, optional
Additional parameters for the savitzy-golay method. The default is {}.
gaussianize : bool, optional
Whether to gaussianize. The default is False.
standardize : bool, optional
Whether to standardize. The default is True.
pad : bool, optional
Whether or not to pad the timeseries. with zeroes to get N up to the next higher power of 2.
This prevents wraparound from the end of the time series to the beginning, and also speeds up the FFT's used to do the wavelet transform.
This will not eliminate all edge effects. The default is False.
mother : string, optional
the mother wavelet function. The default is 'MORLET'. Options are: 'MORLET', 'PAUL', or 'DOG'
param : flaot, option
the mother wavelet parameter. The default is None since it varies for each mother
- For 'MORLET' this is k0 (wavenumber), default is 6.
- For 'PAUL' this is m (order), default is 4.
- For 'DOG' this is m (m-th derivative), default is 2.
cwt_res : dict
Results from pyleoclim.utils.wavelet.cwt
Returns
-------
res : dict
Dictionary containing:
- psd: the power density function
- freq: frequency vector
- scale: the scale vector
- mother: the mother wavelet
- param : the wavelet parameter
See also
--------
pyleoclim.utils.wavelet.make_freq_vector : make the frequency vector with various methods
pyleoclim.utils.wavelet.cwt: Torrence and Compo implementation of the continuous wavelet transform
pyleoclim.utils.spectral.periodogram : Spectral estimation using Blackman-Tukey's periodogram
pyleoclim.utils.spectral.mtm : Spectral estimation using the multi-taper method
pyleoclim.utils.spectral.lomb_scargle : Spectral estimation using the lomb-scargle periodogram
pyleoclim.utils.spectral.welch : Spectral estimation using Welch's periodogram
pyleoclim.utils.spectral.wwz_psd : Spectral estimation using the Weighted Wavelet Z-transform
pyleoclim.utils.tsutils.detrend : detrending functionalities using 4 possible methods
pyleoclim.utils.tsutils.gaussianize: Quantile maps a 1D array to a Gaussian distribution
pyleoclim.utils.tsutils.standardize: Centers and normalizes a given time series.
References
----------
Torrence, C. and G. P. Compo, 1998: A Practical Guide to Wavelet Analysis. Bull. Amer. Meteor. Soc., 79, 61-78.
Python routines available at http://paos.colorado.edu/research/wavelets/
'''
if standardize == True:
warnings.warn('Standardizing the timeseries')
#get the wavelet:
if cwt_res is None:
cwt_res = cwt(ys,ts,freq=freq, freq_method=freq_method, freq_kwargs=freq_kwargs,
scale = scale, detrend=detrend,sg_kwargs=sg_kwargs, gaussianize=gaussianize,
standardize = standardize, pad=pad, mother=mother, param=param)
n= len(ts)
else:
n=len(cwt_res.time)
psd = np.sum(cwt_res.amplitude.T**2,axis=1)/n
Results = collections.namedtuple('Results', ['psd', 'freq','scale','mother','param'])
res = Results(psd=psd, freq=cwt_res.freq, scale=cwt_res.scale, mother=cwt_res.mother,param=cwt_res.param)
return res
[docs]def beta_estimation(psd, freq, fmin=None, fmax=None, logf_binning_step='max', verbose=False):
''' Estimate the scaling exponent of a power spectral density.
Models the spectrum as :math: `S(f) \propto 1/f^{\beta}`. For instance:
- :math: `\beta = 0` corresponds to white noise
- :math: `\beta = 1` corresponds to pink noise
- :math: `\beta = 2` corresponds to red noise (Brownian motion)
Parameters
----------
psd : array
the power spectral density
freq : array
the frequency vector
fmin : float
the min of frequency range for beta estimation
fmax : float
the max of frequency range for beta estimation
verbose : bool
if True, will print out debug information
Returns
-------
beta : float
the estimated slope
f_binned : array
binned frequency vector
psd_binned : array
binned power spectral density
Y_reg : array
prediction based on linear regression
'''
# drop the PSD at frequency zero
if freq[0] == 0:
psd = psd[1:]
freq = freq[1:]
if fmin is None or fmin == 0:
fmin = np.min(freq)
if fmax is None:
fmax = np.max(freq)
Results = collections.namedtuple('Results', ['beta', 'f_binned', 'psd_binned', 'Y_reg', 'std_err'])
if np.max(freq) < fmax or np.min(freq) > fmin:
if verbose:
print(fmin, fmax)
print(np.min(freq), np.max(freq))
print('WRONG')
res = Results(beta=np.nan, f_binned=np.nan, psd_binned=np.nan, Y_reg=np.nan, std_err=np.nan)
return res
# frequency binning start
fminindx = np.where(freq >= fmin)[0][0]
fmaxindx = np.where(freq <= fmax)[0][-1]
if fminindx >= fmaxindx:
res = Results(beta=np.nan, f_binned=np.nan, psd_binned=np.nan, Y_reg=np.nan, std_err=np.nan)
return res
logf = np.log(freq)
if logf_binning_step == 'max':
logf_step = np.max(np.diff(logf))
elif logf_binning_step == 'first':
logf_step = logf[fminindx+1] - logf[fminindx]
else:
raise ValueError('the option for logf_binning_step is unknown')
logf_start = logf[fminindx]
logf_end = logf[fmaxindx]
logf_binedges = np.arange(logf_start, logf_end+logf_step, logf_step)
n_intervals = np.size(logf_binedges)-1
logpsd_binned = np.empty(n_intervals)
logf_binned = np.empty(n_intervals)
logpsd = np.log(psd)
for i in range(n_intervals):
lb = logf_binedges[i]
ub = logf_binedges[i+1]
q = np.where((logf > lb) & (logf <= ub))
logpsd_binned[i] = np.nanmean(logpsd[q])
logf_binned[i] = (ub + lb) / 2
f_binned = np.exp(logf_binned)
psd_binned = np.exp(logpsd_binned)
# frequency binning end
# linear regression below
Y = np.log10(psd_binned)
X = np.log10(f_binned)
X_ex = sm.add_constant(X)
# note below: 'drop' is used for missing, so NaNs will be removed, and we need to put it back in the end
model = sm.OLS(Y, X_ex, missing='drop')
results = model.fit()
if np.size(results.params) < 2:
beta = np.nan
Y_reg = np.nan
std_err = np.nan
else:
beta = -results.params[1] # the slope we want
Y_reg_raw = 10**model.predict(results.params) # prediction based on linear regression
# handeling potential NaNs in psd_binned
Y_reg = []
i = 0
for psd in psd_binned:
if np.isnan(psd):
Y_reg.append(np.nan)
else:
Y_reg.append(Y_reg_raw[i])
i += 1
Y_reg = np.array(Y_reg)
std_err = results.bse[1]
res = Results(beta=beta, f_binned=f_binned, psd_binned=psd_binned, Y_reg=Y_reg, std_err=std_err)
return res
[docs]def beta2Hurst(beta):
''' Translates spectral slope to Hurst exponent
Parameters
----------
beta : float
the estimated slope of a power spectral density :math: `S(f) \propto 1/f^{\beta}`
Returns
-------
H : float
Hurst index, should be in (0, 1)
See also
--------
pyleoclim.utils.spectral.beta_estimation: Estimate the scaling exponent of a power spectral density.
'''
H = (beta-1)/2
return H
[docs]def psd_ar(var_noise, freq, ar_params, f_sampling):
''' Theoretical power spectral density (PSD) of an autoregressive model
Parameters
----------
var_noise : float
the variance of the noise of the AR process
freq : array
vector of frequency
ar_params : array
autoregressive coefficients, not including zero-lag
f_sampling : float
sampling frequency
Returns
-------
psd : array
power spectral density
'''
p = np.size(ar_params)
tmp = np.ndarray(shape=(p, np.size(freq)), dtype=complex)
for k in range(p):
tmp[k, :] = np.exp(-1j*2*np.pi*(k+1)*freq/f_sampling)
psd = var_noise / np.absolute(1-np.sum(ar_params*tmp, axis=0))**2
return psd
[docs]def psd_fBM(freq, ts, H):
''' Theoretical power spectral density of a fractional Brownian motion
Parameters
----------
freq : array
vector of frequency
ts : array
the time axis of the time series
H : float
Hurst exponent, should be in (0, 1)
Returns
-------
psd : array
power spectral density
References
----------
Flandrin, P. On the spectrum of fractional Brownian motions.
IEEE Transactions on Information Theory 35, 197–199 (1989).
'''
nf = np.size(freq)
psd = np.ndarray(shape=(nf))
T = np.max(ts) - np.min(ts)
omega = 2 * np.pi * freq
for k in range(nf):
tmp = 2 * omega[k] * T
psd[k] = (1 - 2**(1 - 2*H)*np.sin(tmp)/tmp) / np.abs(omega[k])**(1 + 2*H)
return psd