Source code for pyleoclim.core.ensembleseries

"""
The EnsembleSeries class is a child of MultipleSeries, designed for ensemble applications (e.g. draws from a posterior distribution of ages, model ensembles with randomized initial conditions, or some other stochastic ensemble).
In addition to a MultipleSeries object, an EnsembleSeries object has the following properties:
- All series members are assumed to share the same units and other metadata.
- The class enables ensemble-oriented methods for computation (e.g., quantiles) and visualization (e.g., envelope plot).    

"""

from ..utils import plotting, lipdutils
from ..utils import correlation as corrutils
from ..core.series import Series
from ..core.correns import CorrEns
from ..core.multipleseries import MultipleSeries

import warnings
warnings.filterwarnings("ignore")

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from copy import deepcopy

from matplotlib.ticker import FormatStrFormatter
import matplotlib.transforms as transforms
import matplotlib as mpl
from tqdm import tqdm

[docs]class EnsembleSeries(MultipleSeries): ''' EnsembleSeries object The EnsembleSeries object is a child of the MultipleSeries object, that is, a special case of MultipleSeries, aiming for ensembles of similar series. Ensembles usually arise from age modeling or Bayesian calibrations. All members of an EnsembleSeries object are assumed to share identical labels and units. All methods available for MultipleSeries are available for EnsembleSeries. Some functions were modified for the special case of ensembles. The class enables ensemble-oriented methods for computation (e.g., quantiles) and visualization (e.g., envelope plot) that are unavailable to other classes. ''' def __init__(self, series_list, label=None): for i, ts in enumerate(series_list): if ts.label is None: series_list[i].label = f"#{i}" # assign labels if missing self.series_list = series_list self.label = label
[docs] @classmethod def from_AgeEnsembleArray(self, series, age_array, value_depth = None, age_depth = None, extrapolate=True,verbose=True): '''Function to create an EnsembleSeries object using an age ensemble Function assumes that the input series and the age array share the same units. If depth vectors are passed, these are also assumed to share the same units Parameters ---------- series : pyleoclim.core.series.Series A Series object with the values to be mapped age_array : np.array An array of ages to map the values to value_depth : vector An array of depths corresponding to the series values age_depth : vector An array of depths corresponding to the age array extrapolate : bool Whether to extrapolate the age array to the value depth. Default is True verbose : bool Whether to print warnings. Default is True Returns ------- EnsembleSeries : pyleoclim.core.ensembleseries.EnsembleSeries The ensemble created using the time axes from age_array and the values from series. Examples -------- .. jupyter-execute:: #Create an ensemble of 100 series with random time axes of length 1000 length = 1000 age_array = np.array([pyleo.utils.tsmodel.random_time_axis(length) for i in range(100)]).T #Create a random series value = np.random.randn(length) time = pyleo.utils.tsmodel.random_time_axis(length) series = pyleo.Series(time=time,value=value, verbose=False) #Create an ensemble using these objects #Note that the time axis of the series object and the number of rows in the age array must match when depth is not passed ens = pyleo.EnsembleSeries.from_AgeEnsembleArray(series = series,age_array=age_array,verbose=False) .. jupyter-execute:: #If we have depth vectors for our series and age array, we can pass them to the function age_length = 1000 age_array = np.array([pyleo.utils.tsmodel.random_time_axis(age_length) for i in range(100)]).T age_depth = np.arange(age_length) value_length = 800 value = np.random.randn(value_length) time = pyleo.utils.tsmodel.random_time_axis(value_length) series = pyleo.Series(time=time, value=value, verbose=False) value_depth = np.arange(value_length) #Note that the length of the depth vectors must match the length of the corresponding object (number of values or number of rows in age array) ens = pyleo.EnsembleSeries.from_AgeEnsembleArray(series = series,age_array=age_array, value_depth=value_depth, age_depth=age_depth,verbose=False) ''' if not isinstance(series, Series): raise ValueError('series must be a Series object') if verbose: if hasattr(series,'lat') or hasattr(series,'lon'): warnings.warn('Passed series object looks like a geoseries object, did you mean to use EnsembleGeoSeries.from_AgeEnsembleArray?') #squeeze paleoValues into a vector values = np.squeeze(np.array(series.value)) if age_depth is None: if value_depth is None: if len(values) != age_array.shape[0]: raise ValueError("Age array and series need to have the same length when age_depth is not passed.") else: mapped_age = age_array pass else: raise ValueError('Age_depth not found. Please pass both a value depth array and age depth array if value and age are not already aligned. Otherwise, pass neither.') else: #Check that both arrays were passed if value_depth is None: raise ValueError('Value_depth not found. Please pass both a value depth array and age depth array if value and age are not already aligned. Otherwise, pass neither.') #Make sure that numpy arrays were given and try to coerce them into vectors if possible age_depth=np.squeeze(np.array(age_depth)) value_depth = np.squeeze(np.array(value_depth)) #Check that arrays are vectors for np.interp if age_depth.ndim > 1: raise ValueError('chronDepth has more than one dimension, please pass it as a vector') if value_depth.ndim > 1: raise ValueError('paleoDepth has more than one dimension, please pass it as a vector') #Check that the shape of the depth arrays matches up with the age array and value vector (separately) if len(age_depth)!=age_array.shape[0]: raise ValueError("Age depth and age array need to have the same length") if len(value_depth)!=len(values): raise ValueError("Paleo depth and series time need to have the same length") #Interpolate the age array to the value depth mapped_age = lipdutils.mapAgeEnsembleToPaleoData( ensembleValues=age_array, depthEnsemble=age_depth, depthMapping=value_depth, extrapolate=extrapolate ) series_list = [] #check that mapped_age and the original time vector are similar, and that the object is not a geoseries object if verbose: if (np.mean(mapped_age[-1,:]) > 10*series.time[-1]) or (np.mean(mapped_age[-1,:]) < 0.1*series.time[-1]): warnings.warn('The mapped age array is significantly different from the original time vector. You may want to check that the units are appropriate.') elif (np.mean(mapped_age[0,:]) > 10*series.time[0]) or (np.mean(mapped_age[0,:]) < 0.1*series.time[0]): warnings.warn('The mapped age array is significantly different from the original time vector. You may want to check that the units are appropriate.') for s in mapped_age.T: series_tmp = series.copy() series_tmp.time = s series_list.append(series_tmp) return EnsembleSeries(series_list)
[docs] @classmethod def from_PaleoEnsembleArray(self, series, paleo_array, paleo_depth = None, age_depth = None, extrapolate=True,verbose=True): '''Function to create an EnsembleSeries object using a paleo ensemble Function assumes that the input series and the age array share the same units. If depth vectors are passed, these are also assumed to share the same units Parameters ---------- series : pyleoclim.core.series.Series A Series object with the values to be mapped paleo_array : np.array An array of paleo values to map the values to paleo_depth : vector An array of depths corresponding to the paleo values age_depth : vector An array of depths corresponding to the time axis of the series object extrapolate : bool Whether to extrapolate the age array to the value depth. Default is True verbose : bool Whether to print warnings. Default is True Returns ------- EnsembleSeries : pyleoclim.core.ensembleseries.EnsembleSeries The ensemble created using the time axes from age_array and the values from series. Examples -------- .. jupyter-execute:: #Create an ensemble of 100 series with random time axes of length 1000 length = 1000 paleo_array = np.array([np.random.normal(0, 1, 1000) for _ in range(100)]).T #Create a random series value = np.random.normal(0,1,length) time = pyleo.utils.tsmodel.random_time_axis(length) series = pyleo.Series(time=time, value=value, verbose=False) #Create an ensemble using these objects #Note that the time axis of the series object and the number of rows in the age array must match when depth is not passed ens = pyleo.EnsembleSeries.from_PaleoEnsembleArray(series = series,paleo_array=paleo_array,verbose=False) .. jupyter-execute:: #In this example we'll explicitly pass the depth vectors paleo_length = 1000 paleo_array = np.array([np.random.normal(0, 1, 1000) for _ in range(100)]).T paleo_depth = np.arange(paleo_length) age_length = 800 value = np.random.normal(0,1,age_length) time = pyleo.utils.tsmodel.random_time_axis(age_length) series = pyleo.Series(time=time, value=value, verbose=False) age_depth = np.arange(age_length) ens = pyleo.EnsembleSeries.from_PaleoEnsembleArray(series = series,paleo_array=paleo_array,paleo_depth=paleo_depth,age_depth=age_depth,verbose=False) ''' if not isinstance(series, Series): raise ValueError('series must be a Series object') if verbose: if hasattr(series,'lat') or hasattr(series,'lon'): warnings.warn('Passed series object looks like a geoseries object, did you mean to use EnsembleGeoSeries.from_AgeEnsembleArray?') #squeeze paleoValues into a vector time = np.squeeze(np.array(series.time)) if paleo_depth is None: if age_depth is None: if len(time) != paleo_array.shape[0]: raise ValueError("Paleo array and series need to have the same length when paleo_depth is not passed.") else: mapped_paleo = paleo_array pass else: raise ValueError('Paleo_depth not found. Please pass both a paleo depth array and age depth array if the paleo ensemble and age are not already aligned. Otherwise, pass neither.') else: #Check that both arrays were passed if age_depth is None: raise ValueError('Value_depth not found. Please pass both a paleo depth array and age depth array if the paleo ensemble and age are not already aligned. Otherwise, pass neither.') #Make sure that numpy arrays were given and try to coerce them into vectors if possible age_depth=np.squeeze(np.array(age_depth)) paleo_depth = np.squeeze(np.array(paleo_depth)) #Check that arrays are vectors for np.interp if age_depth.ndim > 1: raise ValueError('age_depth has more than one dimension, please pass it as a vector') if paleo_depth.ndim > 1: raise ValueError('paleo_depth has more than one dimension, please pass it as a vector') #Check that the shape of the depth arrays matches up with the age array and value vector (separately) if len(paleo_depth)!=paleo_array.shape[0]: raise ValueError("Paleo depth and paleo array need to have the same length") if len(age_depth)!=len(time): raise ValueError("Age depth and series time need to have the same length") #Interpolate the age array to the value depth mapped_paleo = lipdutils.mapAgeEnsembleToPaleoData( ensembleValues=paleo_array, depthEnsemble=paleo_depth, depthMapping=age_depth, extrapolate=extrapolate ) series_list = [] #check that mapped_age and the original time vector are similar, and that the object is not a geoseries object if verbose: if (np.mean(mapped_paleo[-1,:]) > 10*series.value[-1]) or (np.mean(mapped_paleo[-1,:]) < 0.1*series.value[-1]): warnings.warn('The mapped value array is significantly different from the original value vector. You may want to check that the units are appropriate.') elif (np.mean(mapped_paleo[0,:]) > 10*series.value[0]) or (np.mean(mapped_paleo[0,:]) < 0.1*series.value[0]): warnings.warn('The mapped value array is significantly different from the original value vector. You may want to check that the units are appropriate.') for s in mapped_paleo.T: series_tmp = series.copy() series_tmp.value = s series_list.append(series_tmp) return EnsembleSeries(series_list)
[docs] def make_labels(self): '''Initialization of labels Returns ------- time_header : str Label for the time axis value_header : str Label for the value axis ''' ts_list = self.series_list if ts_list[0].time_name is not None: time_name_str = ts_list[0].time_name else: time_name_str = 'time' if ts_list[0].value_name is not None: value_name_str = ts_list[0].value_name else: value_name_str = 'value' if ts_list[0].value_unit is not None: value_header = f'{value_name_str} [{ts_list[0].value_unit}]' else: value_header = f'{value_name_str}' if ts_list[0].time_unit is not None: time_header = f'{time_name_str} [{ts_list[0].time_unit}]' else: time_header = f'{time_name_str}' return time_header, value_header
[docs] def slice(self, timespan): ''' Selects a limited time span from the object Parameters ---------- timespan : tuple or list The list of time points for slicing, whose length must be even. When there are n time points, the output Series includes n/2 segments. For example, if timespan = [a, b], then the sliced output includes one segment [a, b]; if timespan = [a, b, c, d], then the sliced output includes segment [a, b] and segment [c, d]. Returns ------- new : EnsembleSeries The sliced EnsembleSeries object. Examples -------- Select part of an object .. jupyter-execute:: nn = 20 # number of noise realizations nt = 200 series_list = [] time, signal = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=2.0) ts = pyleo.Series(time=time, value = signal, verbose=False).standardize() noise = np.random.randn(nt,nn) for idx in range(nn): # noise ts = pyleo.Series(time=time, value=ts.value+5*noise[:,idx], verbose=False) series_list.append(ts) ts_ens = pyleo.EnsembleSeries(series_list) fig, ax = ts_ens.plot_envelope(curve_lw=1.5) fig, ax = ts_ens.slice([100, 199]).plot_envelope(curve_lw=1.5) ''' new = self.copy() for idx, ts in enumerate(self.series_list): tsc = ts.slice(timespan) new.series_list[idx] = tsc return new
[docs] def quantiles(self, qs=[0.05, 0.5, 0.95], axis = 'value'): '''Calculate quantiles of an EnsembleSeries object. If axis is 'value', the calculation requires for the time axis to be the same. You can use the common_time method to do so. In essence, it transforms the time uncertainty into a y-axis uncertainty. If axis is 'time', the values should be the same for all members of the emsemble. Reuses [scipy.stats.mstats.mquantiles](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mstats.mquantiles.html) function. Parameters ---------- qs : list, optional List of quantiles to consider for the calculation. The default is [0.05, 0.5, 0.95]. axis : ['time', 'value'] Whether to calculate the quantiles over the values or time. Default is 'value'. Returns ------- ens_qs : EnsembleSeries EnsembleSeries object containing empirical quantiles of original See also -------- pyleoclim.core.multipleseries.MultipleSeries.common_time : A method to align axes Examples -------- .. jupyter-execute:: nn = 30 # number of noise realizations nt = 500 series_list = [] t,v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0) signal = pyleo.Series(t,v) for idx in range(nn): # noise noise = np.random.randn(nt,nn)*100 ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx], verbose=False) series_list.append(ts) ts_ens = pyleo.EnsembleSeries(series_list) ens_qs = ts_ens.quantiles() To calculate in the time dimension: .. jupyter-execute:: nn = 30 #number of age models time = np.arange(1,20000,100) #create a time vector std_dev = 20 # Noise to be considered t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0) series_list = [] for i in range(nn): noise = np.random.normal(0,std_dev,len(time)) ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False) series_list.append(ts) time_ens = pyleo.EnsembleSeries(series_list) ens_qs = time_ens.quantiles(axis='time') ''' if axis == 'value': time = np.copy(self.series_list[0].time) vals = [] for ts in self.series_list: if not np.array_equal(ts.time, time): raise ValueError('Time axis not consistent across the ensemble!') vals.append(ts.value) vals = np.array(vals) # ens_qs = mquantiles(vals, qs, axis=0) ens_qs = np.nanquantile(vals, qs, axis=0) ts_list = [] for i, quant in enumerate(ens_qs): ts = Series(time=time, value=quant, label=f'{qs[i]*100:g}%', verbose=False) ts_list.append(ts) elif axis == 'time': value = np.copy(self.series_list[0].value) vals = [] for ts in self.series_list: if not np.array_equal(ts.value, value): raise ValueError('Value axis not consistent across the ensemble!') vals.append(ts.time) vals = np.array(vals) # ens_qs = mquantiles(vals, qs, axis=0) ens_qs = np.nanquantile(vals, qs, axis=0) ts_list = [] for i, quant in enumerate(ens_qs): ts = Series(time=quant, value=value, label=f'{qs[i]*100:g}%', verbose=False) ts_list.append(ts) else: raise ValueError("Axis should be either 'value' or 'time'") ens_qs = EnsembleSeries(series_list=ts_list) return ens_qs
[docs] def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest', statistic = 'pearsonr',number=1000, settings=None, fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None): ''' Calculate the correlation between an EnsembleSeries object to a target. If the target is not specified, then the 1st member of the ensemble will be the target Note that the FDR approach is applied by default to determine the significance of the p-values (more information in See Also below). Parameters ---------- target : Series or EnsembleSeries A pyleoclim Series object or EnsembleSeries object. When the target is also an EnsembleSeries object, then the calculation of correlation is performed in a one-to-one sense, and the ourput list of correlation values and p-values will be the size of the series_list of the self object. That is, if the self object contains n Series, and the target contains n+m Series, then only the first n Series from the object will be used for the calculation; otherwise, if the target contains only n-m Series, then the first m Series in the target will be used twice in sequence. timespan : tuple The time interval over which to perform the calculation alpha : float The significance level (0.05 by default) method : str, {'ttest','built-in','ar1sim','phaseran','CN'} method for significance testing. Default is 'ttest' to lower computational cost, but this is not always the best choice statistic : str The name of the statistic used to measure the association, to be chosen from a subset of https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'] The default is 'pearsonr'. settings : dict Parameters for the correlation function (per scipy) number : int the number of simulations (default: 1000) fdr_kwargs : dict Parameters for the FDR function common_time_kwargs : dict Parameters for the method MultipleSeries.common_time() mute_pbar : bool; {True,False} If True, the progressbar will be muted. Default is False. seed : float or int random seed for isopersistent and isospectral methods Returns ------- corr_ens : CorrEns The resulting object, see pyleoclim.CorrEns See also -------- pyleoclim.core.series.Series.correlation : pairwise correlations pyleoclim.utils.correlation.fdr : False Discovery Rate pyleoclim.core.correns.CorrEns : The correlation ensemble object pyleoclim.core.surrogateseries.SurrogateSeries : surrogate series Examples -------- .. jupyter-execute:: nn = 50 # number of noise realizations nt = 100 series_list = [] time, signal = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=2.0) ts = pyleo.Series(time=time, value = signal, verbose=False).standardize() noise = np.random.randn(nt,nn) for idx in range(nn): # noise ts = pyleo.Series(time=time, value=ts.value+5*noise[:,idx], verbose=False) series_list.append(ts) ts_ens = pyleo.EnsembleSeries(series_list) # to set an arbitrary random seed to fix the result corr_res = ts_ens.correlation(ts, seed=2333) print(corr_res) # to change the statistic: corr_res = ts_ens.correlation(ts, statistic='kendalltau', method='phaseran', number=20) print(corr_res) The `print` function tabulates the output, and conveys the p-value according to the correlation test applied ("ttest", by default). To plot the result: .. jupyter-execute:: corr_res.plot() ''' if target is None: target = self.series_list[0] r_list = [] p_list = [] signif_list = [] print("Looping over "+ str(len(self.series_list)) +" Series in the ensemble") for idx, ts1 in tqdm(enumerate(self.series_list), total=len(self.series_list), disable=mute_pbar): if hasattr(target, 'series_list'): nEns = np.size(target.series_list) if idx < nEns: ts2 = target.series_list[idx].copy() #value2 = target.series_list[idx].value #time2 = target.series_list[idx].time else: #value2 = target.series_list[idx-nEns].value #time2 = target.series_list[idx-nEns].time ts2 = target.series_list[idx-nEns].copy() else: ts2 = target.copy() #value2 = target.value #time2 = target.time #ts2 = target.series_list[idx].copy() #ts2.time = time2; ts2.value = value2 #ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False) corr_res = ts1.correlation(ts2, timespan=timespan, method=method,number=number, statistic=statistic, settings=settings, mute_pbar=True, common_time_kwargs=common_time_kwargs, seed=seed) r_list.append(corr_res.r) signif_list.append(corr_res.signif) p_list.append(corr_res.p) r_list = np.array(r_list) p_list = np.array(p_list) signif_fdr_list = [] fdr_kwargs = {} if fdr_kwargs is None else fdr_kwargs.copy() args = {} args.update(fdr_kwargs) for i in range(np.size(signif_list)): signif_fdr_list.append(False) fdr_res = corrutils.fdr(p_list, **fdr_kwargs) if fdr_res is not None: for i in fdr_res: signif_fdr_list[i] = True corr_ens = CorrEns(r_list, p_list, signif_list, signif_fdr_list, alpha) return corr_ens
[docs] def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num_traces=10, seed=None, xlim=None, ylim=None, linestyle='-', savefig_settings=None, ax=None, legend=True, color=sns.xkcd_rgb['pale red'], lw=0.5, alpha=0.3, lgd_kwargs=None): '''Plot EnsembleSeries as a subset of traces. Parameters ---------- figsize : list, optional The figure size. The default is [10, 4]. xlabel : str, optional x-axis label. The default is None. ylabel : str, optional y-axis label. The default is None. title : str, optional Plot title. The default is None. xlim : list, optional x-axis limits. The default is None. ylim : list, optional y-axis limits. The default is None. color : str, optional Color of the traces. The default is sns.xkcd_rgb['pale red']. alpha : float, optional Transparency of the lines representing the multiple members. The default is 0.3. linestyle : {'-', '--', '-.', ':', '', (offset, on-off-seq), ...} Set the linestyle of the line lw : float, optional Width of the lines representing the multiple members. The default is 0.5. num_traces : int, optional Number of traces to plot. The default is 10. savefig_settings : dict, optional the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} The default is None. ax : matplotlib.ax, optional Matplotlib axis on which to return the plot. The default is None. legend : bool; {True,False}, optional Whether to plot the legend. The default is True. lgd_kwargs : dict, optional Parameters for the legend. The default is None. seed : int, optional Set the seed for the random number generator. Useful for reproducibility. The default is None. Returns ------- fig : matplotlib.figure the figure object from matplotlib See [matplotlib.pyplot.figure](https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.figure.html) for details. ax : matplotlib.axis the axis object from matplotlib See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. See also -------- pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim Examples -------- .. jupyter-execute:: nn = 30 # number of noise realizations nt = 500 series_list = [] t,v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0) signal = pyleo.Series(time=t,value=v, verbose=False) for idx in range(nn): # noise noise = np.random.randn(nt,nn)*100 ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx], verbose=False) series_list.append(ts) ts_ens = pyleo.EnsembleSeries(series_list) fig, ax = ts_ens.plot_traces(alpha=0.2,num_traces=8) ''' savefig_settings = {} if savefig_settings is None else savefig_settings.copy() lgd_kwargs = {} if lgd_kwargs is None else lgd_kwargs.copy() num_traces = min(num_traces, len(self.series_list)) # restrict to the smaller of the two # generate default axis labels time_label, value_label = self.make_labels() if xlabel is None: xlabel = time_label if ylabel is None: ylabel = value_label if ax is None: fig, ax = plt.subplots(figsize=figsize) if num_traces > 0: if seed is not None: np.random.seed(seed) nts = np.size(self.series_list) random_draw_idx = np.random.choice(nts, num_traces, replace=False) for idx in random_draw_idx: self.series_list[idx].plot(xlabel=xlabel, ylabel=ylabel, zorder=99, linewidth=lw, xlim=xlim, ylim=ylim, ax=ax, color=color, alpha=alpha,linestyle=linestyle, label='_ignore') l1, = ax.plot(np.nan, np.nan, color=color, label=f'example members (n={num_traces})',linestyle='-') if title is not None: ax.set_title(title) else: if self.label is not None: ax.set_title(self.label) if legend==True: lgd_args = {'frameon': False} lgd_args.update(lgd_kwargs) ax.legend(**lgd_args) elif legend==False: ax.legend().remove() else: raise ValueError('legend should be set to either True or False') if 'fig' in locals(): if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax else: return ax
[docs] def plot_envelope(self, figsize=[10, 4], qs=[0.025, 0.25, 0.5, 0.75, 0.975], xlabel=None, ylabel=None, title=None, xlim=None, ylim=None, savefig_settings=None, ax=None, plot_legend=True, curve_clr=sns.xkcd_rgb['pale red'], curve_lw=2, shade_clr=sns.xkcd_rgb['pale red'], shade_alpha=0.2, inner_shade_label='IQR', outer_shade_label='95% CI', lgd_kwargs=None): ''' Plot EnsembleSeries as an envelope. Parameters ---------- figsize : list, optional The figure size. The default is [10, 4]. qs : list, optional The significance levels to consider. The default is [0.025, 0.25, 0.5, 0.75, 0.975] (median, interquartile range, and central 95% region) xlabel : str, optional x-axis label. The default is None. ylabel : str, optional y-axis label. The default is None. title : str, optional Plot title. The default is None. xlim : list, optional x-axis limits. The default is None. ylim : list, optional y-axis limits. The default is None. savefig_settings : dict, optional the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} The default is None. ax : matplotlib.ax, optional Matplotlib axis on which to return the plot. The default is None. plot_legend : bool; {True,False}, optional Wether to plot the legend. The default is True. curve_clr : str, optional Color of the main line (median). The default is sns.xkcd_rgb['pale red']. curve_lw : str, optional Width of the main line (median). The default is 2. shade_clr : str, optional Color of the shaded envelope. The default is sns.xkcd_rgb['pale red']. shade_alpha : float, optional Transparency on the envelope. The default is 0.2. inner_shade_label : str, optional Label for the envelope. The default is 'IQR'. outer_shade_label : str, optional Label for the envelope. The default is '95\% CI'. lgd_kwargs : dict, optional Parameters for the legend. The default is None. Returns ------- fig : matplotlib.figure the figure object from matplotlib See [matplotlib.pyplot.figure](https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.figure.html) for details. ax : matplotlib.axis the axis object from matplotlib See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. See also -------- pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim Examples -------- .. jupyter-execute:: nn = 30 # number of noise realizations nt = 500 series_list = [] t,v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0) signal = pyleo.Series(time=t,value=v, verbose=False) for idx in range(nn): # noise noise = np.random.randn(nt,nn)*100 ts = pyleo.Series(time=signal.time, value=signal.value+noise[:,idx], verbose=False) series_list.append(ts) ts_ens = pyleo.EnsembleSeries(series_list) fig, ax = ts_ens.plot_envelope(curve_lw=1.5) ''' savefig_settings = {} if savefig_settings is None else savefig_settings.copy() lgd_kwargs = {} if lgd_kwargs is None else lgd_kwargs.copy() # generate default axis labels time_label, value_label = self.make_labels() if xlabel is None: xlabel = time_label if ylabel is None: ylabel = value_label if ax is None: fig, ax = plt.subplots(figsize=figsize) ts_qs = self.quantiles(qs=qs) if inner_shade_label is None: inner_shade_label = f'{ts_qs.series_list[1].label}-{ts_qs.series_list[-2].label}' if outer_shade_label is None: outer_shade_label = f'{ts_qs.series_list[0].label}-{ts_qs.series_list[-1].label}' time = ts_qs.series_list[0].time # plot outer envelope ax.fill_between( time, ts_qs.series_list[0].value, ts_qs.series_list[4].value, color=shade_clr, alpha=shade_alpha, edgecolor=shade_clr, label=outer_shade_label ) # plot inner envelope on top ax.fill_between( time, ts_qs.series_list[1].value, ts_qs.series_list[3].value, color=shade_clr, alpha=2*shade_alpha, edgecolor=shade_clr, label=inner_shade_label ) # plot the median ts_qs.series_list[2].plot(xlabel=xlabel, ylabel=ylabel, linewidth=curve_lw, color=curve_clr, xlim=xlim, ylim=ylim, ax=ax, zorder=100, label = 'median' ) if title is not None: ax.set_title(title) if plot_legend: lgd_args = {'frameon': False} lgd_args.update(lgd_kwargs) ax.legend(**lgd_args) else: ax.legend().set_visible(False) if 'fig' in locals(): if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax else: return ax
[docs] def stackplot(self, figsize=[5, 15], savefig_settings=None, xlim=None, fill_between_alpha=0.2, colors=None, cmap='tab10', norm=None, spine_lw=1.5, grid_lw=0.5, font_scale=0.8, label_x_loc=-0.15, v_shift_factor=3/4, linewidth=1.5): ''' Stack plot of multiple series Note that the plotting style is uniquely designed for this one and cannot be properly reset with `pyleoclim.set_style()`. Parameters ---------- figsize : list Size of the figure. colors : list Colors for plotting. If None, the plotting will cycle the 'tab10' colormap; if only one color is specified, then all curves will be plotted with that single color; if a list of colors are specified, then the plotting will cycle that color list. cmap : str The colormap to use when "colors" is None. norm : matplotlib.colors.Normalize like The nomorlization for the colormap. If None, a linear normalization will be used. savefig_settings : dictionary the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} The default is None. xlim : list The x-axis limit. fill_between_alpha : float The transparency for the fill_between shades. spine_lw : float The linewidth for the spines of the axes. grid_lw : float The linewidth for the gridlines. linewidth : float The linewidth for the curves. font_scale : float The scale for the font sizes. Default is 0.8. label_x_loc : float The x location for the label of each curve. v_shift_factor : float The factor for the vertical shift of each axis. The default value 3/4 means the top of the next axis will be located at 3/4 of the height of the previous one. Returns ------- fig : matplotlib.figure the figure object from matplotlib See [matplotlib.pyplot.figure](https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.figure.html) for details. ax : matplotlib.axis the axis object from matplotlib See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. See also -------- pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim Examples -------- .. jupyter-execute:: nn = 10 # number of noise realizations nt = 200 series_list = [] t, v = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0) signal, _, _ = pyleo.utils.standardize(v) noise = np.random.randn(nt,nn) for idx in range(nn): # noise ts = pyleo.Series(time=t, value=signal+noise[:,idx], label='trace #'+str(idx+1), verbose=False) series_list.append(ts) ts_ens = pyleo.EnsembleSeries(series_list) fig, ax = ts_ens.stackplot() ''' current_style = deepcopy(mpl.rcParams) plotting.set_style('journal', font_scale=font_scale) savefig_settings = {} if savefig_settings is None else savefig_settings.copy() n_ts = len(self.series_list) fig = plt.figure(figsize=figsize) if xlim is None: time_min = np.inf time_max = -np.inf for ts in self.series_list: if np.min(ts.time) <= time_min: time_min = np.min(ts.time) if np.max(ts.time) >= time_max: time_max = np.max(ts.time) xlim = [time_min, time_max] ax = {} left = 0 width = 1 height = 1/n_ts bottom = 1 for idx, ts in enumerate(self.series_list): if colors is None: cmap_obj = plt.get_cmap(cmap) if hasattr(cmap_obj, 'colors'): nc = len(cmap_obj.colors) else: nc = len(self.series_list) if norm is None: norm = mpl.colors.Normalize(vmin=0, vmax=nc-1) clr = cmap_obj(norm(idx%nc)) elif type(colors) is str: clr = colors elif type(colors) is list: nc = len(colors) clr = colors[idx%nc] else: raise TypeError('"colors" should be a list of, or one, Python supported color code (a string of hex code or a tuple of rgba values)') bottom -= height*v_shift_factor ax[idx] = fig.add_axes([left, bottom, width, height]) ax[idx].plot(ts.time, ts.value, color=clr, lw=linewidth) ax[idx].patch.set_alpha(0) ax[idx].set_xlim(xlim) time_label, value_label = ts.make_labels() ax[idx].set_ylabel(value_label, weight='bold') mu = np.mean(ts.value) std = np.std(ts.value) ylim = [mu-4*std, mu+4*std] ax[idx].fill_between(ts.time, ts.value, y2=mu, alpha=fill_between_alpha, color=clr) trans = transforms.blended_transform_factory(ax[idx].transAxes, ax[idx].transData) if ts.label is not None: ax[idx].text(label_x_loc, mu, ts.label, horizontalalignment='right', transform=trans, color=clr, weight='bold') ax[idx].set_ylim(ylim) ax[idx].set_yticks(ylim) ax[idx].yaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax[idx].grid(False) if idx % 2 == 0: ax[idx].spines['left'].set_visible(True) ax[idx].spines['left'].set_linewidth(spine_lw) ax[idx].spines['left'].set_color(clr) ax[idx].spines['right'].set_visible(False) ax[idx].yaxis.set_label_position('left') ax[idx].yaxis.tick_left() else: ax[idx].spines['left'].set_visible(False) ax[idx].spines['right'].set_visible(True) ax[idx].spines['right'].set_linewidth(spine_lw) ax[idx].spines['right'].set_color(clr) ax[idx].yaxis.set_label_position('right') ax[idx].yaxis.tick_right() ax[idx].yaxis.label.set_color(clr) ax[idx].tick_params(axis='y', colors=clr) ax[idx].spines['top'].set_visible(False) ax[idx].spines['bottom'].set_visible(False) ax[idx].tick_params(axis='x', which='both', length=0) ax[idx].set_xlabel('') ax[idx].set_xticklabels([]) xt = ax[idx].get_xticks()[1:-1] for x in xt: ax[idx].axvline(x=x, color='lightgray', linewidth=grid_lw, ls='-', zorder=-1) ax[idx].axhline(y=mu, color='lightgray', linewidth=grid_lw, ls='-', zorder=-1) bottom -= height*(1-v_shift_factor) ax[n_ts] = fig.add_axes([left, bottom, width, height]) ax[n_ts].set_xlabel(time_label) ax[n_ts].spines['left'].set_visible(False) ax[n_ts].spines['right'].set_visible(False) ax[n_ts].spines['bottom'].set_visible(True) ax[n_ts].spines['bottom'].set_linewidth(spine_lw) ax[n_ts].set_yticks([]) ax[n_ts].patch.set_alpha(0) ax[n_ts].set_xlim(xlim) ax[n_ts].grid(False) ax[n_ts].tick_params(axis='x', which='both', length=3.5) xt = ax[n_ts].get_xticks()[1:-1] for x in xt: ax[n_ts].axvline(x=x, color='lightgray', linewidth=grid_lw, ls='-', zorder=-1) if 'fig' in locals(): if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) mpl.rcParams.update(current_style) return fig, ax else: # reset the plotting style mpl.rcParams.update(current_style) return ax
[docs] def histplot(self, figsize=[10, 4], title=None, savefig_settings=None, ax=None, ylabel='KDE', vertical=False, edgecolor='w', **plot_kwargs): """ Plots the distribution of the timeseries across ensembles Reuses the seaborn [histplot](https://seaborn.pydata.org/generated/seaborn.histplot.html) function. Parameters ---------- figsize : list, optional The size of the figure. The default is [10, 4]. title : str, optional Title for the figure. The default is None. savefig_settings : dict, optional the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"}. The default is None. ax : matplotlib.axis, optional A matplotlib axis. The default is None. ylabel : str, optional Label for the count axis. The default is 'KDE'. vertical : bool; {True,False}, optional Whether to flip the plot vertically. The default is False. edgecolor : matplotlib.color, optional The color of the edges of the bar. The default is 'w'. plot_kwargs : dict Plotting arguments for seaborn histplot: https://seaborn.pydata.org/generated/seaborn.histplot.html. See also -------- pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim Examples -------- .. jupyter-execute:: nn = 30 # number of noise realizations nt = 500 series_list = [] time, signal = pyleo.utils.gen_ts(model='colored_noise',nt=nt,alpha=1.0) ts = pyleo.Series(time=time, value = signal, verbose=False).standardize() noise = np.random.randn(nt,nn) for idx in range(nn): # noise ts = pyleo.Series(time=time, value=signal+noise[:,idx], verbose=False) series_list.append(ts) ts_ens = pyleo.EnsembleSeries(series_list) fig, ax = ts_ens.histplot() """ savefig_settings = {} if savefig_settings is None else savefig_settings.copy() if ax is None: fig, ax = plt.subplots(figsize=figsize) #make the data into a dataframe so we can flip the figure time_label, value_label = self.make_labels() #append all the values together for the plot val = self.series_list[0].value for i in range(1,len(self.series_list)): val=np.append(val,self.series_list[i].value) if vertical == True: data=pd.DataFrame({'value':val}) ax = sns.histplot(data=data, y="value", ax=ax, kde=True, edgecolor=edgecolor, **plot_kwargs) ax.set_ylabel(value_label) ax.set_xlabel(ylabel) else: ax = sns.histplot(val, ax=ax, kde=True, edgecolor=edgecolor, **plot_kwargs) ax.set_xlabel(value_label) ax.set_ylabel(ylabel) if title is not None: ax.set_title(title) if 'fig' in locals(): if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax else: return ax
[docs] def to_dataframe(self, axis = 'value'): ''' Export the ensemble as a Pandas DataFrame, with members of the ensemble as columns. The columns are labeled according to the label in the individual series or numbered if 'label' is None. Parameters ---------- axis : str, ['time', 'value'] Whether the return the ensemble from value or time. each The default is 'value'. Raises ------ ValueError Axis should be either 'time' or 'value' Returns ------- df : pandas.DataFrame A Pandas DataFrame containing members of the ensemble as columns. .. jupyter-execute:: nn = 30 #number of age models time = np.arange(1,20000,100) #create a time vector std_dev = 20 # Noise to be considered t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0) series_list = [] for i in range(nn): noise = np.random.normal(0,std_dev,len(time)) ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False) series_list.append(ts) time_ens = pyleo.EnsembleSeries(series_list) ens_qs = time_ens.quantiles(axis='time') df=ens_qs.to_dataframe(axis='time') ''' df_dict = {} idx = 0 if axis == 'value': for ts in self.series_list: if ts.label is None: # should no longer be necessary now that it's done in __init__ df_dict[idx]=ts.value else: df_dict[ts.label]=ts.value idx+=1 elif axis == 'time': for ts in self.series_list: if ts.label is None: # should no longer be necessary now that it's done in __init__ df_dict[idx]=ts.time else: df_dict[ts.label]=ts.time idx+=1 else: raise ValueError('Axis should be either "time" or "value"') df = pd.DataFrame(df_dict) return df
[docs] def to_array(self, axis='value', labels=True): ''' Returns an ensemble as a numpy array with an optional list for labels. Each column in the array corresponds to an ensemble member. Parameters ---------- axis : str, ['time', 'value'], optional Whether the return the ensemble from value or time. The default is 'value'. labels : bool, [True,False], optional Whether to retrun a separate list with the timseries labels. The default is True. Raises ------ ValueError Axis should be either 'time' or 'value' Returns ------- vals: numpy.array An array where each column corresponds to an ensemble member headers: list A list of corresponding labels for each columm Examples -------- .. jupyter-execute:: nn = 30 #number of age models time = np.arange(1,20000,100) #create a time vector std_dev = 20 # Noise to be considered t,v = pyleo.utils.gen_ts(model='colored_noise',nt=len(time),alpha=1.0) series_list = [] for i in range(nn): noise = np.random.normal(0,std_dev,len(time)) ts=pyleo.Series(time=np.sort(time+noise),value=v,verbose=False) series_list.append(ts) time_ens = pyleo.EnsembleSeries(series_list) ens_qs = time_ens.quantiles(axis='time') vals,headers=ens_qs.to_array(axis='time') ''' vals=np.empty((len(self.series_list[0].value),len(self.series_list))) headers=[] if axis == 'value': for i, ts in enumerate(self.series_list): headers.append(ts.label) vals[:,i]=ts.value elif axis == 'time': for i, ts in enumerate(self.series_list): headers.append(ts.label) vals[:,i]=ts.time else: raise ValueError('Axis should be either "time" or "value"') if labels == True: return vals, headers else: return vals