"""
The EnsembleSeries class is a child of MultipleSeries, designed for ensemble applications (e.g. draws from a posterior distribution of ages, model ensembles with randomized initial conditions, or some other stochastic ensemble).
In addition to a MultipleSeries object, an EnsembleSeries object has the following properties:
- All series members are assumed to share the same units and other metadata.
- The class enables ensemble-oriented methods for computation (e.g., quantiles) and visualization (e.g., envelope plot).
"""
from ..utils import plotting, lipdutils
from ..utils import correlation as corrutils
from ..core.series import Series
from ..core.correns import CorrEns
from ..core.multipleseries import MultipleSeries
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from copy import deepcopy
from matplotlib.ticker import FormatStrFormatter
import matplotlib.transforms as transforms
import matplotlib as mpl
from tqdm import tqdm
[docs]class EnsembleSeries(MultipleSeries):
''' EnsembleSeries object
The EnsembleSeries object is a child of the MultipleSeries object, that is, a special case of MultipleSeries, aiming for ensembles of similar series.
Ensembles usually arise from age modeling or Bayesian calibrations. All members of an EnsembleSeries object are assumed to share identical labels and units.
All methods available for MultipleSeries are available for EnsembleSeries. Some functions were modified for the special case of ensembles.
The class enables ensemble-oriented methods for computation (e.g., quantiles)
and visualization (e.g., envelope plot) that are unavailable to other classes.
'''
def __init__(self, series_list, label=None):
for i, ts in enumerate(series_list):
if ts.label is None:
series_list[i].label = f"#{i}" # assign labels if missing
self.series_list = series_list
self.label = label
[docs] @classmethod
def from_AgeEnsembleArray(self, series, age_array, value_depth = None, age_depth = None, extrapolate=True,verbose=True):
'''Function to create an EnsembleSeries object using an age ensemble
Function assumes that the input series and the age array share the same units.
If depth vectors are passed, these are also assumed to share the same units
Parameters
----------
series : pyleoclim.core.series.Series
A Series object with the values to be mapped
age_array : np.array
An array of ages to map the values to
value_depth : vector
An array of depths corresponding to the series values
age_depth : vector
An array of depths corresponding to the age array
extrapolate : bool
Whether to extrapolate the age array to the value depth. Default is True
verbose : bool
Whether to print warnings. Default is True
Returns
-------
EnsembleSeries : pyleoclim.core.ensembleseries.EnsembleSeries
The ensemble created using the time axes from age_array and the values from series.
Examples
--------
.. jupyter-execute::
#Create an ensemble of 100 series with random time axes of length 1000
length = 1000
age_array = np.array([pyleo.utils.tsmodel.random_time_axis(length) for i in range(100)]).T
#Create a random series
value = np.random.randn(length)
time = pyleo.utils.tsmodel.random_time_axis(length)
series = pyleo.Series(time=time,value=value, verbose=False)
#Create an ensemble using these objects
#Note that the time axis of the series object and the number of rows in the age array must match when depth is not passed
ens = pyleo.EnsembleSeries.from_AgeEnsembleArray(series = series,age_array=age_array,verbose=False)
.. jupyter-execute::
#If we have depth vectors for our series and age array, we can pass them to the function
age_length = 1000
age_array = np.array([pyleo.utils.tsmodel.random_time_axis(age_length) for i in range(100)]).T
age_depth = np.arange(age_length)
value_length = 800
value = np.random.randn(value_length)
time = pyleo.utils.tsmodel.random_time_axis(value_length)
series = pyleo.Series(time=time, value=value, verbose=False)
value_depth = np.arange(value_length)
#Note that the length of the depth vectors must match the length of the corresponding object (number of values or number of rows in age array)
ens = pyleo.EnsembleSeries.from_AgeEnsembleArray(series = series,age_array=age_array, value_depth=value_depth, age_depth=age_depth,verbose=False)
'''
if not isinstance(series, Series):
raise ValueError('series must be a Series object')
if verbose:
if hasattr(series,'lat') or hasattr(series,'lon'):
warnings.warn('Passed series object looks like a geoseries object, did you mean to use EnsembleGeoSeries.from_AgeEnsembleArray?')
#squeeze paleoValues into a vector
values = np.squeeze(np.array(series.value))
if age_depth is None:
if value_depth is None:
if len(values) != age_array.shape[0]:
raise ValueError("Age array and series need to have the same length when age_depth is not passed.")
else:
mapped_age = age_array
pass
else:
raise ValueError('Age_depth not found. Please pass both a value depth array and age depth array if value and age are not already aligned. Otherwise, pass neither.')
else:
#Check that both arrays were passed
if value_depth is None:
raise ValueError('Value_depth not found. Please pass both a value depth array and age depth array if value and age are not already aligned. Otherwise, pass neither.')
#Make sure that numpy arrays were given and try to coerce them into vectors if possible
age_depth=np.squeeze(np.array(age_depth))
value_depth = np.squeeze(np.array(value_depth))
#Check that arrays are vectors for np.interp
if age_depth.ndim > 1:
raise ValueError('chronDepth has more than one dimension, please pass it as a vector')
if value_depth.ndim > 1:
raise ValueError('paleoDepth has more than one dimension, please pass it as a vector')
#Check that the shape of the depth arrays matches up with the age array and value vector (separately)
if len(age_depth)!=age_array.shape[0]:
raise ValueError("Age depth and age array need to have the same length")
if len(value_depth)!=len(values):
raise ValueError("Paleo depth and series time need to have the same length")
#Interpolate the age array to the value depth
mapped_age = lipdutils.mapAgeEnsembleToPaleoData(
ensembleValues=age_array,
depthEnsemble=age_depth,
depthMapping=value_depth,
extrapolate=extrapolate
)
series_list = []
#check that mapped_age and the original time vector are similar, and that the object is not a geoseries object
if verbose:
if (np.mean(mapped_age[-1,:]) > 10*series.time[-1]) or (np.mean(mapped_age[-1,:]) < 0.1*series.time[-1]):
warnings.warn('The mapped age array is significantly different from the original time vector. You may want to check that the units are appropriate.')
elif (np.mean(mapped_age[0,:]) > 10*series.time[0]) or (np.mean(mapped_age[0,:]) < 0.1*series.time[0]):
warnings.warn('The mapped age array is significantly different from the original time vector. You may want to check that the units are appropriate.')
for s in mapped_age.T:
series_tmp = series.copy()
series_tmp.time = s
series_list.append(series_tmp)
return EnsembleSeries(series_list)
[docs] @classmethod
def from_PaleoEnsembleArray(self, series, paleo_array, paleo_depth = None, age_depth = None, extrapolate=True,verbose=True):
'''Function to create an EnsembleSeries object using a paleo ensemble
Function assumes that the input series and the age array share the same units.
If depth vectors are passed, these are also assumed to share the same units
Parameters
----------
series : pyleoclim.core.series.Series
A Series object with the values to be mapped
paleo_array : np.array
An array of paleo values to map the values to
paleo_depth : vector
An array of depths corresponding to the paleo values
age_depth : vector
An array of depths corresponding to the time axis of the series object
extrapolate : bool
Whether to extrapolate the age array to the value depth. Default is True
verbose : bool
Whether to print warnings. Default is True
Returns
-------
EnsembleSeries : pyleoclim.core.ensembleseries.EnsembleSeries
The ensemble created using the time axes from age_array and the values from series.
Examples
--------
.. jupyter-execute::
#Create an ensemble of 100 series with random time axes of length 1000
length = 1000
paleo_array = np.array([np.random.normal(0, 1, 1000) for _ in range(100)]).T
#Create a random series
value = np.random.normal(0,1,length)
time = pyleo.utils.tsmodel.random_time_axis(length)
series = pyleo.Series(time=time, value=value, verbose=False)
#Create an ensemble using these objects
#Note that the time axis of the series object and the number of rows in the age array must match when depth is not passed
ens = pyleo.EnsembleSeries.from_PaleoEnsembleArray(series = series,paleo_array=paleo_array,verbose=False)
.. jupyter-execute::
#In this example we'll explicitly pass the depth vectors
paleo_length = 1000
paleo_array = np.array([np.random.normal(0, 1, 1000) for _ in range(100)]).T
paleo_depth = np.arange(paleo_length)
age_length = 800
value = np.random.normal(0,1,age_length)
time = pyleo.utils.tsmodel.random_time_axis(age_length)
series = pyleo.Series(time=time, value=value, verbose=False)
age_depth = np.arange(age_length)
ens = pyleo.EnsembleSeries.from_PaleoEnsembleArray(series = series,paleo_array=paleo_array,paleo_depth=paleo_depth,age_depth=age_depth,verbose=False)
'''
if not isinstance(series, Series):
raise ValueError('series must be a Series object')
if verbose:
if hasattr(series,'lat') or hasattr(series,'lon'):
warnings.warn('Passed series object looks like a geoseries object, did you mean to use EnsembleGeoSeries.from_AgeEnsembleArray?')
#squeeze paleoValues into a vector
time = np.squeeze(np.array(series.time))
if paleo_depth is None:
if age_depth is None:
if len(time) != paleo_array.shape[0]:
raise ValueError("Paleo array and series need to have the same length when paleo_depth is not passed.")
else:
mapped_paleo = paleo_array
pass
else:
raise ValueError('Paleo_depth not found. Please pass both a paleo depth array and age depth array if the paleo ensemble and age are not already aligned. Otherwise, pass neither.')
else:
#Check that both arrays were passed
if age_depth is None:
raise ValueError('Value_depth not found. Please pass both a paleo depth array and age depth array if the paleo ensemble and age are not already aligned. Otherwise, pass neither.')
#Make sure that numpy arrays were given and try to coerce them into vectors if possible
age_depth=np.squeeze(np.array(age_depth))
paleo_depth = np.squeeze(np.array(paleo_depth))
#Check that arrays are vectors for np.interp
if age_depth.ndim > 1:
raise ValueError('age_depth has more than one dimension, please pass it as a vector')
if paleo_depth.ndim > 1:
raise ValueError('paleo_depth has more than one dimension, please pass it as a vector')
#Check that the shape of the depth arrays matches up with the age array and value vector (separately)
if len(paleo_depth)!=paleo_array.shape[0]:
raise ValueError("Paleo depth and paleo array need to have the same length")
if len(age_depth)!=len(time):
raise ValueError("Age depth and series time need to have the same length")
#Interpolate the age array to the value depth
mapped_paleo = lipdutils.mapAgeEnsembleToPaleoData(
ensembleValues=paleo_array,
depthEnsemble=paleo_depth,
depthMapping=age_depth,
extrapolate=extrapolate
)
series_list = []
#check that mapped_age and the original time vector are similar, and that the object is not a geoseries object
if verbose:
if (np.mean(mapped_paleo[-1,:]) > 10*series.value[-1]) or (np.mean(mapped_paleo[-1,:]) < 0.1*series.value[-1]):
warnings.warn('The mapped value array is significantly different from the original value vector. You may want to check that the units are appropriate.')
elif (np.mean(mapped_paleo[0,:]) > 10*series.value[0]) or (np.mean(mapped_paleo[0,:]) < 0.1*series.value[0]):
warnings.warn('The mapped value array is significantly different from the original value vector. You may want to check that the units are appropriate.')
for s in mapped_paleo.T:
series_tmp = series.copy()
series_tmp.value = s
series_list.append(series_tmp)
return EnsembleSeries(series_list)
[docs] def make_labels(self):
'''Initialization of labels
Returns
-------
time_header : str
Label for the time axis
value_header : str
Label for the value axis
'''
ts_list = self.series_list
if ts_list[0].time_name is not None:
time_name_str = ts_list[0].time_name
else:
time_name_str = 'time'
if ts_list[0].value_name is not None:
value_name_str = ts_list[0].value_name
else:
value_name_str = 'value'
if ts_list[0].value_unit is not None:
value_header = f'{value_name_str} [{ts_list[0].value_unit}]'
else:
value_header = f'{value_name_str}'
if ts_list[0].time_unit is not None:
time_header = f'{time_name_str} [{ts_list[0].time_unit}]'
else:
time_header = f'{time_name_str}'
return time_header, value_header
[docs] def slice(self, timespan):
''' Selects a limited time span from the object
Parameters
----------
timespan : tuple or list
The list of time points for slicing, whose length must be even.
When there are n time points, the output Series includes n/2 segments.
For example, if timespan = [a, b], then the sliced output includes one segment [a, b];
if timespan = [a, b, c, d], then the sliced output includes segment [a, b] and segment [c, d].
Returns
-------
new : EnsembleSeries
The sliced EnsembleSeries object.
Examples
--------
Select part of an object
.. jupyter-execute::
nn = 20 # number of noise realizations
nt = 200
series_list = []
time, signal = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=2.0)
ts = pyleo.Series(time=time, value = signal, verbose=False).standardize()
noise = np.random.randn(nt,nn)
for idx in range(nn): # noise
ts = pyleo.Series(time=time, value=ts.value+5*noise[:,idx], verbose=False)
series_list.append(ts)
ts_ens = pyleo.EnsembleSeries(series_list)
fig, ax = ts_ens.plot_envelope(curve_lw=1.5)
fig, ax = ts_ens.slice([100, 199]).plot_envelope(curve_lw=1.5)
'''
new = self.copy()
for idx, ts in enumerate(self.series_list):
tsc = ts.slice(timespan)
new.series_list[idx] = tsc
return new
[docs] def quantiles(self, qs=[0.05, 0.5, 0.95], axis = 'value'):
'''Calculate quantiles of an EnsembleSeries object. If axis is 'value', the calculation requires for the time axis to be the same. You can use the common_time method to do so. In essence, it transforms the time uncertainty into a y-axis uncertainty. If axis is 'time', the values should be the same for all members of the emsemble.
Reuses [scipy.stats.mstats.mquantiles](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mstats.mquantiles.html) function.
Parameters
----------
qs : list, optional
List of quantiles to consider for the calculation. The default is [0.05, 0.5, 0.95].
axis : ['time', 'value']
Whether to calculate the quantiles over the values or time. Default is 'value'.
Returns
-------
ens_qs : EnsembleSeries
EnsembleSeries object containing empirical quantiles of original
See also
--------
pyleoclim.core.multipleseries.MultipleSeries.common_time : A method to align axes
Examples
--------
.. jupyter-execute::
nn = 30 # number of noise realizations
nt = 500
series_list = []
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0)
signal = pyleo.Series(t,v)
for idx in range(nn): # noise
noise = np.random.randn(nt,nn)*100
ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx], verbose=False)
series_list.append(ts)
ts_ens = pyleo.EnsembleSeries(series_list)
ens_qs = ts_ens.quantiles()
To calculate in the time dimension:
.. jupyter-execute::
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)
series_list = []
for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)
time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')
'''
if axis == 'value':
time = np.copy(self.series_list[0].time)
vals = []
for ts in self.series_list:
if not np.array_equal(ts.time, time):
raise ValueError('Time axis not consistent across the ensemble!')
vals.append(ts.value)
vals = np.array(vals)
# ens_qs = mquantiles(vals, qs, axis=0)
ens_qs = np.nanquantile(vals, qs, axis=0)
ts_list = []
for i, quant in enumerate(ens_qs):
ts = Series(time=time, value=quant, label=f'{qs[i]*100:g}%', verbose=False)
ts_list.append(ts)
elif axis == 'time':
value = np.copy(self.series_list[0].value)
vals = []
for ts in self.series_list:
if not np.array_equal(ts.value, value):
raise ValueError('Value axis not consistent across the ensemble!')
vals.append(ts.time)
vals = np.array(vals)
# ens_qs = mquantiles(vals, qs, axis=0)
ens_qs = np.nanquantile(vals, qs, axis=0)
ts_list = []
for i, quant in enumerate(ens_qs):
ts = Series(time=quant, value=value, label=f'{qs[i]*100:g}%', verbose=False)
ts_list.append(ts)
else:
raise ValueError("Axis should be either 'value' or 'time'")
ens_qs = EnsembleSeries(series_list=ts_list)
return ens_qs
[docs] def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest', statistic = 'pearsonr',number=1000,
settings=None, fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None):
''' Calculate the correlation between an EnsembleSeries object to a target.
If the target is not specified, then the 1st member of the ensemble will be the target
Note that the FDR approach is applied by default to determine the significance of the p-values (more information in See Also below).
Parameters
----------
target : Series or EnsembleSeries
A pyleoclim Series object or EnsembleSeries object.
When the target is also an EnsembleSeries object, then the calculation of correlation is performed in a one-to-one sense,
and the ourput list of correlation values and p-values will be the size of the series_list of the self object.
That is, if the self object contains n Series, and the target contains n+m Series,
then only the first n Series from the object will be used for the calculation;
otherwise, if the target contains only n-m Series, then the first m Series in the target will be used twice in sequence.
timespan : tuple
The time interval over which to perform the calculation
alpha : float
The significance level (0.05 by default)
method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
method for significance testing. Default is 'ttest' to lower computational cost, but this is not always the best choice
statistic : str
The name of the statistic used to measure the association, to be chosen from a subset of
https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests
Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']
The default is 'pearsonr'.
settings : dict
Parameters for the correlation function (per scipy)
number : int
the number of simulations (default: 1000)
fdr_kwargs : dict
Parameters for the FDR function
common_time_kwargs : dict
Parameters for the method MultipleSeries.common_time()
mute_pbar : bool; {True,False}
If True, the progressbar will be muted. Default is False.
seed : float or int
random seed for isopersistent and isospectral methods
Returns
-------
corr_ens : CorrEns
The resulting object, see pyleoclim.CorrEns
See also
--------
pyleoclim.core.series.Series.correlation : pairwise correlations
pyleoclim.utils.correlation.fdr : False Discovery Rate
pyleoclim.core.correns.CorrEns : The correlation ensemble object
pyleoclim.core.surrogateseries.SurrogateSeries : surrogate series
Examples
--------
.. jupyter-execute::
nn = 50 # number of noise realizations
nt = 100
series_list = []
time, signal = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=2.0)
ts = pyleo.Series(time=time, value = signal, verbose=False).standardize()
noise = np.random.randn(nt,nn)
for idx in range(nn): # noise
ts = pyleo.Series(time=time, value=ts.value+5*noise[:,idx], verbose=False)
series_list.append(ts)
ts_ens = pyleo.EnsembleSeries(series_list)
# to set an arbitrary random seed to fix the result
corr_res = ts_ens.correlation(ts, seed=2333)
print(corr_res)
# to change the statistic:
corr_res = ts_ens.correlation(ts, statistic='kendalltau', method='phaseran', number=20)
print(corr_res)
The `print` function tabulates the output, and conveys the p-value according
to the correlation test applied ("ttest", by default). To plot the result:
.. jupyter-execute::
corr_res.plot()
'''
if target is None:
target = self.series_list[0]
r_list = []
p_list = []
signif_list = []
print("Looping over "+ str(len(self.series_list)) +" Series in the ensemble")
for idx, ts1 in tqdm(enumerate(self.series_list), total=len(self.series_list), disable=mute_pbar):
if hasattr(target, 'series_list'):
nEns = np.size(target.series_list)
if idx < nEns:
ts2 = target.series_list[idx].copy()
#value2 = target.series_list[idx].value
#time2 = target.series_list[idx].time
else:
#value2 = target.series_list[idx-nEns].value
#time2 = target.series_list[idx-nEns].time
ts2 = target.series_list[idx-nEns].copy()
else:
ts2 = target.copy()
#value2 = target.value
#time2 = target.time
#ts2 = target.series_list[idx].copy()
#ts2.time = time2; ts2.value = value2
#ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False)
corr_res = ts1.correlation(ts2, timespan=timespan, method=method,number=number,
statistic=statistic,
settings=settings, mute_pbar=True,
common_time_kwargs=common_time_kwargs, seed=seed)
r_list.append(corr_res.r)
signif_list.append(corr_res.signif)
p_list.append(corr_res.p)
r_list = np.array(r_list)
p_list = np.array(p_list)
signif_fdr_list = []
fdr_kwargs = {} if fdr_kwargs is None else fdr_kwargs.copy()
args = {}
args.update(fdr_kwargs)
for i in range(np.size(signif_list)):
signif_fdr_list.append(False)
fdr_res = corrutils.fdr(p_list, **fdr_kwargs)
if fdr_res is not None:
for i in fdr_res:
signif_fdr_list[i] = True
corr_ens = CorrEns(r_list, p_list, signif_list, signif_fdr_list, alpha)
return corr_ens
[docs] def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num_traces=10, seed=None,
xlim=None, ylim=None, linestyle='-', savefig_settings=None, ax=None, legend=True,
color=sns.xkcd_rgb['pale red'], lw=0.5, alpha=0.3, lgd_kwargs=None):
'''Plot EnsembleSeries as a subset of traces.
Parameters
----------
figsize : list, optional
The figure size. The default is [10, 4].
xlabel : str, optional
x-axis label. The default is None.
ylabel : str, optional
y-axis label. The default is None.
title : str, optional
Plot title. The default is None.
xlim : list, optional
x-axis limits. The default is None.
ylim : list, optional
y-axis limits. The default is None.
color : str, optional
Color of the traces. The default is sns.xkcd_rgb['pale red'].
alpha : float, optional
Transparency of the lines representing the multiple members. The default is 0.3.
linestyle : {'-', '--', '-.', ':', '', (offset, on-off-seq), ...}
Set the linestyle of the line
lw : float, optional
Width of the lines representing the multiple members. The default is 0.5.
num_traces : int, optional
Number of traces to plot. The default is 10.
savefig_settings : dict, optional
the dictionary of arguments for plt.savefig(); some notes below:
- "path" must be specified; it can be any existed or non-existed path,
with or without a suffix; if the suffix is not given in "path", it will follow "format"
- "format" can be one of {"pdf", "eps", "png", "ps"} The default is None.
ax : matplotlib.ax, optional
Matplotlib axis on which to return the plot. The default is None.
legend : bool; {True,False}, optional
Whether to plot the legend. The default is True.
lgd_kwargs : dict, optional
Parameters for the legend. The default is None.
seed : int, optional
Set the seed for the random number generator. Useful for reproducibility. The default is None.
Returns
-------
fig : matplotlib.figure
the figure object from matplotlib
See [matplotlib.pyplot.figure](https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.figure.html) for details.
ax : matplotlib.axis
the axis object from matplotlib
See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details.
See also
--------
pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim
Examples
--------
.. jupyter-execute::
nn = 30 # number of noise realizations
nt = 500
series_list = []
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0)
signal = pyleo.Series(time=t,value=v, verbose=False)
for idx in range(nn): # noise
noise = np.random.randn(nt,nn)*100
ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx], verbose=False)
series_list.append(ts)
ts_ens = pyleo.EnsembleSeries(series_list)
fig, ax = ts_ens.plot_traces(alpha=0.2,num_traces=8)
'''
savefig_settings = {} if savefig_settings is None else savefig_settings.copy()
lgd_kwargs = {} if lgd_kwargs is None else lgd_kwargs.copy()
num_traces = min(num_traces, len(self.series_list)) # restrict to the smaller of the two
# generate default axis labels
time_label, value_label = self.make_labels()
if xlabel is None:
xlabel = time_label
if ylabel is None:
ylabel = value_label
if ax is None:
fig, ax = plt.subplots(figsize=figsize)
if num_traces > 0:
if seed is not None:
np.random.seed(seed)
nts = np.size(self.series_list)
random_draw_idx = np.random.choice(nts, num_traces, replace=False)
for idx in random_draw_idx:
self.series_list[idx].plot(xlabel=xlabel, ylabel=ylabel, zorder=99, linewidth=lw,
xlim=xlim, ylim=ylim, ax=ax, color=color, alpha=alpha,linestyle=linestyle, label='_ignore')
l1, = ax.plot(np.nan, np.nan, color=color, label=f'example members (n={num_traces})',linestyle='-')
if title is not None:
ax.set_title(title)
else:
if self.label is not None:
ax.set_title(self.label)
if legend==True:
lgd_args = {'frameon': False}
lgd_args.update(lgd_kwargs)
ax.legend(**lgd_args)
elif legend==False:
ax.legend().remove()
else:
raise ValueError('legend should be set to either True or False')
if 'fig' in locals():
if 'path' in savefig_settings:
plotting.savefig(fig, settings=savefig_settings)
return fig, ax
else:
return ax
[docs] def plot_envelope(self, figsize=[10, 4], qs=[0.025, 0.25, 0.5, 0.75, 0.975],
xlabel=None, ylabel=None, title=None,
xlim=None, ylim=None, savefig_settings=None, ax=None, plot_legend=True,
curve_clr=sns.xkcd_rgb['pale red'], curve_lw=2, shade_clr=sns.xkcd_rgb['pale red'], shade_alpha=0.2,
inner_shade_label='IQR', outer_shade_label='95% CI', lgd_kwargs=None):
''' Plot EnsembleSeries as an envelope.
Parameters
----------
figsize : list, optional
The figure size. The default is [10, 4].
qs : list, optional
The significance levels to consider. The default is [0.025, 0.25, 0.5, 0.75, 0.975] (median, interquartile range, and central 95% region)
xlabel : str, optional
x-axis label. The default is None.
ylabel : str, optional
y-axis label. The default is None.
title : str, optional
Plot title. The default is None.
xlim : list, optional
x-axis limits. The default is None.
ylim : list, optional
y-axis limits. The default is None.
savefig_settings : dict, optional
the dictionary of arguments for plt.savefig(); some notes below:
- "path" must be specified; it can be any existed or non-existed path,
with or without a suffix; if the suffix is not given in "path", it will follow "format"
- "format" can be one of {"pdf", "eps", "png", "ps"} The default is None.
ax : matplotlib.ax, optional
Matplotlib axis on which to return the plot. The default is None.
plot_legend : bool; {True,False}, optional
Wether to plot the legend. The default is True.
curve_clr : str, optional
Color of the main line (median). The default is sns.xkcd_rgb['pale red'].
curve_lw : str, optional
Width of the main line (median). The default is 2.
shade_clr : str, optional
Color of the shaded envelope. The default is sns.xkcd_rgb['pale red'].
shade_alpha : float, optional
Transparency on the envelope. The default is 0.2.
inner_shade_label : str, optional
Label for the envelope. The default is 'IQR'.
outer_shade_label : str, optional
Label for the envelope. The default is '95\% CI'.
lgd_kwargs : dict, optional
Parameters for the legend. The default is None.
Returns
-------
fig : matplotlib.figure
the figure object from matplotlib
See [matplotlib.pyplot.figure](https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.figure.html) for details.
ax : matplotlib.axis
the axis object from matplotlib
See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details.
See also
--------
pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim
Examples
--------
.. jupyter-execute::
nn = 30 # number of noise realizations
nt = 500
series_list = []
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0)
signal = pyleo.Series(time=t,value=v, verbose=False)
for idx in range(nn): # noise
noise = np.random.randn(nt,nn)*100
ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx], verbose=False)
series_list.append(ts)
ts_ens = pyleo.EnsembleSeries(series_list)
fig, ax = ts_ens.plot_envelope(curve_lw=1.5)
'''
savefig_settings = {} if savefig_settings is None else savefig_settings.copy()
lgd_kwargs = {} if lgd_kwargs is None else lgd_kwargs.copy()
# generate default axis labels
time_label, value_label = self.make_labels()
if xlabel is None:
xlabel = time_label
if ylabel is None:
ylabel = value_label
if ax is None:
fig, ax = plt.subplots(figsize=figsize)
ts_qs = self.quantiles(qs=qs)
if inner_shade_label is None:
inner_shade_label = f'{ts_qs.series_list[1].label}-{ts_qs.series_list[-2].label}'
if outer_shade_label is None:
outer_shade_label = f'{ts_qs.series_list[0].label}-{ts_qs.series_list[-1].label}'
time = ts_qs.series_list[0].time
# plot outer envelope
ax.fill_between(
time, ts_qs.series_list[0].value, ts_qs.series_list[4].value,
color=shade_clr, alpha=shade_alpha, edgecolor=shade_clr, label=outer_shade_label
)
# plot inner envelope on top
ax.fill_between(
time, ts_qs.series_list[1].value, ts_qs.series_list[3].value,
color=shade_clr, alpha=2*shade_alpha, edgecolor=shade_clr, label=inner_shade_label
)
# plot the median
ts_qs.series_list[2].plot(xlabel=xlabel, ylabel=ylabel, linewidth=curve_lw, color=curve_clr,
xlim=xlim, ylim=ylim, ax=ax, zorder=100, label = 'median'
)
if title is not None:
ax.set_title(title)
if plot_legend:
lgd_args = {'frameon': False}
lgd_args.update(lgd_kwargs)
ax.legend(**lgd_args)
else:
ax.legend().set_visible(False)
if 'fig' in locals():
if 'path' in savefig_settings:
plotting.savefig(fig, settings=savefig_settings)
return fig, ax
else:
return ax
[docs] def stackplot(self, figsize=[5, 15], savefig_settings=None, xlim=None, fill_between_alpha=0.2, colors=None, cmap='tab10', norm=None,
spine_lw=1.5, grid_lw=0.5, font_scale=0.8, label_x_loc=-0.15, v_shift_factor=3/4, linewidth=1.5):
''' Stack plot of multiple series
Note that the plotting style is uniquely designed for this one and cannot be properly reset with `pyleoclim.set_style()`.
Parameters
----------
figsize : list
Size of the figure.
colors : list
Colors for plotting.
If None, the plotting will cycle the 'tab10' colormap;
if only one color is specified, then all curves will be plotted with that single color;
if a list of colors are specified, then the plotting will cycle that color list.
cmap : str
The colormap to use when "colors" is None.
norm : matplotlib.colors.Normalize like
The nomorlization for the colormap.
If None, a linear normalization will be used.
savefig_settings : dictionary
the dictionary of arguments for plt.savefig(); some notes below:
- "path" must be specified; it can be any existed or non-existed path,
with or without a suffix; if the suffix is not given in "path", it will follow "format"
- "format" can be one of {"pdf", "eps", "png", "ps"} The default is None.
xlim : list
The x-axis limit.
fill_between_alpha : float
The transparency for the fill_between shades.
spine_lw : float
The linewidth for the spines of the axes.
grid_lw : float
The linewidth for the gridlines.
linewidth : float
The linewidth for the curves.
font_scale : float
The scale for the font sizes. Default is 0.8.
label_x_loc : float
The x location for the label of each curve.
v_shift_factor : float
The factor for the vertical shift of each axis.
The default value 3/4 means the top of the next axis will be located at 3/4 of the height of the previous one.
Returns
-------
fig : matplotlib.figure
the figure object from matplotlib
See [matplotlib.pyplot.figure](https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.figure.html) for details.
ax : matplotlib.axis
the axis object from matplotlib
See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details.
See also
--------
pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim
Examples
--------
.. jupyter-execute::
nn = 10 # number of noise realizations
nt = 200
series_list = []
t, v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0)
signal, _, _ = pyleo.utils.standardize(v)
noise = np.random.randn(nt,nn)
for idx in range(nn): # noise
ts = pyleo.Series(time=t, value=signal+noise[:,idx], label='trace #'+str(idx+1), verbose=False)
series_list.append(ts)
ts_ens = pyleo.EnsembleSeries(series_list)
fig, ax = ts_ens.stackplot()
'''
current_style = deepcopy(mpl.rcParams)
plotting.set_style('journal', font_scale=font_scale)
savefig_settings = {} if savefig_settings is None else savefig_settings.copy()
n_ts = len(self.series_list)
fig = plt.figure(figsize=figsize)
if xlim is None:
time_min = np.inf
time_max = -np.inf
for ts in self.series_list:
if np.min(ts.time) <= time_min:
time_min = np.min(ts.time)
if np.max(ts.time) >= time_max:
time_max = np.max(ts.time)
xlim = [time_min, time_max]
ax = {}
left = 0
width = 1
height = 1/n_ts
bottom = 1
for idx, ts in enumerate(self.series_list):
if colors is None:
cmap_obj = plt.get_cmap(cmap)
if hasattr(cmap_obj, 'colors'):
nc = len(cmap_obj.colors)
else:
nc = len(self.series_list)
if norm is None:
norm = mpl.colors.Normalize(vmin=0, vmax=nc-1)
clr = cmap_obj(norm(idx%nc))
elif type(colors) is str:
clr = colors
elif type(colors) is list:
nc = len(colors)
clr = colors[idx%nc]
else:
raise TypeError('"colors" should be a list of, or one, Python supported color code (a string of hex code or a tuple of rgba values)')
bottom -= height*v_shift_factor
ax[idx] = fig.add_axes([left, bottom, width, height])
ax[idx].plot(ts.time, ts.value, color=clr, lw=linewidth)
ax[idx].patch.set_alpha(0)
ax[idx].set_xlim(xlim)
time_label, value_label = ts.make_labels()
ax[idx].set_ylabel(value_label, weight='bold')
mu = np.mean(ts.value)
std = np.std(ts.value)
ylim = [mu-4*std, mu+4*std]
ax[idx].fill_between(ts.time, ts.value, y2=mu, alpha=fill_between_alpha, color=clr)
trans = transforms.blended_transform_factory(ax[idx].transAxes, ax[idx].transData)
if ts.label is not None:
ax[idx].text(label_x_loc, mu, ts.label, horizontalalignment='right', transform=trans, color=clr, weight='bold')
ax[idx].set_ylim(ylim)
ax[idx].set_yticks(ylim)
ax[idx].yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
ax[idx].grid(False)
if idx % 2 == 0:
ax[idx].spines['left'].set_visible(True)
ax[idx].spines['left'].set_linewidth(spine_lw)
ax[idx].spines['left'].set_color(clr)
ax[idx].spines['right'].set_visible(False)
ax[idx].yaxis.set_label_position('left')
ax[idx].yaxis.tick_left()
else:
ax[idx].spines['left'].set_visible(False)
ax[idx].spines['right'].set_visible(True)
ax[idx].spines['right'].set_linewidth(spine_lw)
ax[idx].spines['right'].set_color(clr)
ax[idx].yaxis.set_label_position('right')
ax[idx].yaxis.tick_right()
ax[idx].yaxis.label.set_color(clr)
ax[idx].tick_params(axis='y', colors=clr)
ax[idx].spines['top'].set_visible(False)
ax[idx].spines['bottom'].set_visible(False)
ax[idx].tick_params(axis='x', which='both', length=0)
ax[idx].set_xlabel('')
ax[idx].set_xticklabels([])
xt = ax[idx].get_xticks()[1:-1]
for x in xt:
ax[idx].axvline(x=x, color='lightgray', linewidth=grid_lw, ls='-', zorder=-1)
ax[idx].axhline(y=mu, color='lightgray', linewidth=grid_lw, ls='-', zorder=-1)
bottom -= height*(1-v_shift_factor)
ax[n_ts] = fig.add_axes([left, bottom, width, height])
ax[n_ts].set_xlabel(time_label)
ax[n_ts].spines['left'].set_visible(False)
ax[n_ts].spines['right'].set_visible(False)
ax[n_ts].spines['bottom'].set_visible(True)
ax[n_ts].spines['bottom'].set_linewidth(spine_lw)
ax[n_ts].set_yticks([])
ax[n_ts].patch.set_alpha(0)
ax[n_ts].set_xlim(xlim)
ax[n_ts].grid(False)
ax[n_ts].tick_params(axis='x', which='both', length=3.5)
xt = ax[n_ts].get_xticks()[1:-1]
for x in xt:
ax[n_ts].axvline(x=x, color='lightgray', linewidth=grid_lw, ls='-', zorder=-1)
if 'fig' in locals():
if 'path' in savefig_settings:
plotting.savefig(fig, settings=savefig_settings)
mpl.rcParams.update(current_style)
return fig, ax
else:
# reset the plotting style
mpl.rcParams.update(current_style)
return ax
[docs] def histplot(self, figsize=[10, 4], title=None, savefig_settings=None,
ax=None, ylabel='KDE', vertical=False, edgecolor='w', **plot_kwargs):
""" Plots the distribution of the timeseries across ensembles
Reuses the seaborn [histplot](https://seaborn.pydata.org/generated/seaborn.histplot.html) function.
Parameters
----------
figsize : list, optional
The size of the figure. The default is [10, 4].
title : str, optional
Title for the figure. The default is None.
savefig_settings : dict, optional
the dictionary of arguments for plt.savefig(); some notes below:
- "path" must be specified; it can be any existed or non-existed path,
with or without a suffix; if the suffix is not given in "path", it will follow "format"
- "format" can be one of {"pdf", "eps", "png", "ps"}.
The default is None.
ax : matplotlib.axis, optional
A matplotlib axis. The default is None.
ylabel : str, optional
Label for the count axis. The default is 'KDE'.
vertical : bool; {True,False}, optional
Whether to flip the plot vertically. The default is False.
edgecolor : matplotlib.color, optional
The color of the edges of the bar. The default is 'w'.
plot_kwargs : dict
Plotting arguments for seaborn histplot: https://seaborn.pydata.org/generated/seaborn.histplot.html.
See also
--------
pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim
Examples
--------
.. jupyter-execute::
nn = 30 # number of noise realizations
nt = 500
series_list = []
time, signal = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0)
ts = pyleo.Series(time=time, value = signal, verbose=False).standardize()
noise = np.random.randn(nt,nn)
for idx in range(nn): # noise
ts = pyleo.Series(time=time, value=signal+noise[:,idx], verbose=False)
series_list.append(ts)
ts_ens = pyleo.EnsembleSeries(series_list)
fig, ax = ts_ens.histplot()
"""
savefig_settings = {} if savefig_settings is None else savefig_settings.copy()
if ax is None:
fig, ax = plt.subplots(figsize=figsize)
#make the data into a dataframe so we can flip the figure
time_label, value_label = self.make_labels()
#append all the values together for the plot
val = self.series_list[0].value
for i in range(1,len(self.series_list)):
val=np.append(val,self.series_list[i].value)
if vertical == True:
data=pd.DataFrame({'value':val})
ax = sns.histplot(data=data, y="value", ax=ax, kde=True, edgecolor=edgecolor, **plot_kwargs)
ax.set_ylabel(value_label)
ax.set_xlabel(ylabel)
else:
ax = sns.histplot(val, ax=ax, kde=True, edgecolor=edgecolor, **plot_kwargs)
ax.set_xlabel(value_label)
ax.set_ylabel(ylabel)
if title is not None:
ax.set_title(title)
if 'fig' in locals():
if 'path' in savefig_settings:
plotting.savefig(fig, settings=savefig_settings)
return fig, ax
else:
return ax
[docs] def to_dataframe(self, axis = 'value'):
'''
Export the ensemble as a Pandas DataFrame, with members of the ensemble as columns. The columns are labeled according to the label in the individual series or numbered if 'label' is None.
Parameters
----------
axis : str, ['time', 'value']
Whether the return the ensemble from value or time. each The default is 'value'.
Raises
------
ValueError
Axis should be either 'time' or 'value'
Returns
-------
df : pandas.DataFrame
A Pandas DataFrame containing members of the ensemble as columns.
.. jupyter-execute::
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)
series_list = []
for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)
time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')
df=ens_qs.to_dataframe(axis='time')
'''
df_dict = {}
idx = 0
if axis == 'value':
for ts in self.series_list:
if ts.label is None: # should no longer be necessary now that it's done in __init__
df_dict[idx]=ts.value
else:
df_dict[ts.label]=ts.value
idx+=1
elif axis == 'time':
for ts in self.series_list:
if ts.label is None: # should no longer be necessary now that it's done in __init__
df_dict[idx]=ts.time
else:
df_dict[ts.label]=ts.time
idx+=1
else:
raise ValueError('Axis should be either "time" or "value"')
df = pd.DataFrame(df_dict)
return df
[docs] def to_array(self, axis='value', labels=True):
'''
Returns an ensemble as a numpy array with an optional list for labels. Each column in the array corresponds to an ensemble member.
Parameters
----------
axis : str, ['time', 'value'], optional
Whether the return the ensemble from value or time. The default is 'value'.
labels : bool, [True,False], optional
Whether to retrun a separate list with the timseries labels. The default is True.
Raises
------
ValueError
Axis should be either 'time' or 'value'
Returns
-------
vals: numpy.array
An array where each column corresponds to an ensemble member
headers: list
A list of corresponding labels for each columm
Examples
--------
.. jupyter-execute::
nn = 30 #number of age models
time = np.arange(1,20000,100) #create a time vector
std_dev = 20 # Noise to be considered
t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0)
series_list = []
for i in range(nn):
noise = np.random.normal(0,std_dev,len(time))
ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False)
series_list.append(ts)
time_ens = pyleo.EnsembleSeries(series_list)
ens_qs = time_ens.quantiles(axis='time')
vals,headers=ens_qs.to_array(axis='time')
'''
vals=np.empty((len(self.series_list[0].value),len(self.series_list)))
headers=[]
if axis == 'value':
for i, ts in enumerate(self.series_list):
headers.append(ts.label)
vals[:,i]=ts.value
elif axis == 'time':
for i, ts in enumerate(self.series_list):
headers.append(ts.label)
vals[:,i]=ts.time
else:
raise ValueError('Axis should be either "time" or "value"')
if labels == True:
return vals, headers
else:
return vals