Source code for pyleoclim.core.ensmultivardecomp

"""
A MultipleMultivariateDecomp object is a collection (more precisely, a 
list) of MultivariateDecomp objects. This class currently exists primarily for the storage of MC-PCA products
"""

import numpy as np
import seaborn as sns
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
from matplotlib import cm

from .series import Series
from .ensembleseries import EnsembleSeries
from ..utils import mapping, plotting

[docs]class EnsMultivarDecomp(): def __init__(self, pca_list,label=None): self.pca_list = pca_list self.label = label
[docs] def modeplot(self,index=0,flip=False,plot_envelope_kwargs = None, psd_envelope_kwargs = None, figsize=[8, 8], fig=None, savefig_settings=None,gs=None, title=None, title_kwargs=None, spec_method='mtm', cmap='coolwarm', marker=None, scatter_kwargs=None, map_kwargs=None, gridspec_kwargs=None,quantiles=[.25,.5,.75]): '''Plot relevant information about the specific mode Parameters ---------- index : int The 1-based index of the mode to visualize. Default is 1, corresponding to the first mode. flip : bool Whether or not to flip the PC plot_envelope_kwargs : dict Dictionary of key word arguments for plot envelope plot_envelope_kwargs : dict Dictionary of key word arguments for psd envelope figsize : list, optional The figure size. The default is [8, 8]. savefig_settings : dict the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} title : str, optional text for figure title title_kwargs : dict the keyword arguments for ax.set_title() gs : matplotlib.gridspec object, optional Requires at least two rows and two columns. - top row, left: timeseries of principle component - top row, right: PSD - bottom row: spaghetti plot or map See [matplotlib.gridspec.GridSpec](https://matplotlib.org/stable/tutorials/intermediate/gridspec.html) for details. gridspec_kwargs : dict, optional Dictionary with custom gridspec values. - wspace changes space between columns (default: wspace=0.05) - hspace changes space between rows (default: hspace=0.03) - width_ratios: relative width of each column (default: width_ratios=[5,1,3] where middle column serves as a spacer) - height_ratios: relative height of each row (default: height_ratios=[2,1,5] where middle row serves as a spacer) spec_method: str, optional The name of the spectral method to be applied on the PC. Default: MTM Note that the data are evenly-spaced, so any spectral method that assumes even spacing is applicable here: 'mtm', 'welch', 'periodogram' 'wwz' is relevant if scaling exponents need to be estimated, but ill-advised otherwise, as it is very slow. cmap: str, optional if 'hue' is specified, will be used for map scatter plot values. colormap name for the loadings (https://matplotlib.org/stable/tutorials/colors/colormaps.html) map_kwargs : dict, optional Optional arguments for map configuration - projection: str; Optional value for map projection. Default 'auto'. - proj_default: bool - lakes, land, ocean, rivers, borders, coastline, background: bool or dict; - lgd_kwargs: dict; Optional values for how the map legend is configured - gridspec_kwargs: dict; Optional values for adjusting the arrangement of the colorbar, map and legend in the map subplot - legend: bool; Whether to draw a legend on the figure. Default is True - colorbar: bool; Whether to draw a colorbar on the figure if the data associated with hue are numeric. Default is True The default is None. scatter_kwargs : dict, optional Optional arguments configuring how data are plotted on a map. See description of scatter_kwargs in pyleoclim.utils.mapping.scatter_map hue : str, optional (only applicable if using scatter map) Variable associated with color coding for points plotted on map. May correspond to a continuous or categorical variable. The default is 'EOF'. marker : string, optional (only applicable if using scatter map) Grouping variable that will produce points with different markers. Can have a numeric dtype but will always be treated as categorical. The default is 'archiveType'. quantiles : list,array Quantiles to use for plotting EOFs Returns ------- fig : matplotlib.figure The figure ax : dict dictionary of matplotlib ax See also -------- pyleoclim.core.MultipleSeries.pca : Principal Component Analysis pyleoclim.core.MultipleGeoSeries.pca : Principal Component Analysis pyleoclim.utils.tsutils.eff_sample_size : Effective sample size pyleoclim.utils.mapping.scatter_map : mapping Examples -------- .. jupyter-execute:: n = 100 # number of series soi = pyleo.utils.load_dataset('SOI') soi_time_axes = [pyleo.utils.random_time_axis(n=len(soi.time)) for _ in range(n)] soi_ens = pyleo.EnsembleGeoSeries([pyleo.GeoSeries(time=time, value=soi.value,lat=-5,lon=-85,auto_time_params=True,verbose=False) for time in soi_time_axes]) nino3 = pyleo.utils.load_dataset('NINO3') nino3_time_axes = [pyleo.utils.random_time_axis(n=len(nino3.time)) for _ in range(n)] nino3_ens = pyleo.EnsembleGeoSeries([pyleo.GeoSeries(time=time, value=nino3.value,lat=-5,lon=-85,auto_time_params=True,verbose=False) for time in nino3_time_axes]) mul_ens = pyleo.MulEnsGeoSeries([nino3_ens,soi_ens]) mcpca = mul_ens.mcpca(nsim=10,seed=42) mcpca.modeplot()''' plot_envelope_kwargs = {} if plot_envelope_kwargs is None else plot_envelope_kwargs.copy() psd_envelope_kwargs = {} if psd_envelope_kwargs is None else psd_envelope_kwargs.copy() savefig_settings = {} if savefig_settings is None else savefig_settings.copy() if fig ==None: fig = plt.figure(figsize=figsize) if gs == None: gridspec_kwargs = {} if type(gridspec_kwargs) != dict else gridspec_kwargs gridspec_defaults = dict(wspace=0.05, hspace=0.03, width_ratios=[5,1,3], height_ratios=[2,1,5]) gridspec_defaults.update(gridspec_kwargs) gs = GridSpec(len(gridspec_defaults['height_ratios']), len(gridspec_defaults['width_ratios']), **gridspec_defaults) gs.update(left=0, right=1.1) ax = {} # plot the PC ax['pc'] = fig.add_subplot(gs[0, 0]) label = rf'$PC_{index + 1}$' t_list = [] pc_list = [] eof_array = np.empty(shape=(len(self.pca_list[0].eigvecs[:,index]),len(self.pca_list))) pctvar_array = np.empty(shape=len(self.pca_list)) for idx,pca in enumerate(self.pca_list): if flip: PC = -pca.pcs[:, index] EOF = -pca.eigvecs[:, index] t = pca.orig.series_list[0].time else: PC = pca.pcs[:, index] EOF = pca.eigvecs[:, index] t = pca.orig.series_list[0].time t_list.append(t) pc_list.append(PC) eof_array[:,idx] = EOF pctvar_array[idx]= pca.pctvar[index] eof_quantiles = np.quantile(a=eof_array,q=quantiles,axis=1) time_unit = self.pca_list[0].orig.series_list[0].time_unit pca_ens = EnsembleSeries([Series(time=t_list[idx],value=pc_list[idx],time_unit=time_unit,verbose=False) for idx in range(len(self.pca_list))]) if 'plot_legend' not in plot_envelope_kwargs.keys(): plot_envelope_kwargs['plot_legend'] = False pca_ens.common_time().plot_envelope(ax=ax['pc'],**plot_envelope_kwargs) ax['pc'].set_ylabel(label) # plot its PSD ax['psd'] = fig.add_subplot(gs[0, -1]) if 'plot_legend' not in psd_envelope_kwargs.keys(): psd_envelope_kwargs['plot_legend'] = False if 'members_plot_num' not in psd_envelope_kwargs.keys(): psd_envelope_kwargs['members_plot_num'] = 0 psd = pca_ens.common_time().spectral(method=spec_method) _ = psd.plot_envelope(ax=ax['psd'],**psd_envelope_kwargs) # plot spatial pattern or spaghetti map_kwargs = {} if map_kwargs is None else map_kwargs.copy() projection = map_kwargs.pop('projection', 'auto') proj_default = map_kwargs.pop('proj_default', True) lakes = map_kwargs.pop('lakes', False) land = map_kwargs.pop('land', False) ocean = map_kwargs.pop('ocean', False) rivers = map_kwargs.pop('rivers', False) borders = map_kwargs.pop('borders', True) coastline = map_kwargs.pop('coastline', True) background = map_kwargs.pop('background', True) extent = map_kwargs.pop('extent', 'global') map_gridspec_kwargs = map_kwargs.pop('gridspec_kwargs', {}) lgd_kwargs = map_kwargs.pop('lgd_kwargs', {}) if 'edgecolor' in map_kwargs.keys(): scatter_kwargs.update({'edgecolor': map_kwargs['edgecolor']}) legend = map_kwargs.pop('legend', True) colorbar = map_kwargs.pop('colorbar', True) # This makes a bare bones dataframe from a MultipleGeoSeries object df_list = [] for idx,q in enumerate(quantiles): eof_q = eof_quantiles[idx] df_tmp = mapping.make_df(self.pca_list[0].orig, hue='quantile', marker=marker, size='EOF') # additional columns are added manually df_tmp['EOF'] = np.abs(eof_q) df_tmp['quantile'] = q*100 df_tmp['quantile'][eof_q<0] *= -1 df_list.append(df_tmp) df = pd.concat(df_list) df = df.sort_values(['lat','EOF'],ascending=False)#Make sure large sizes plot first if legend == True: map_gridspec_kwargs['width_ratios'] = map_gridspec_kwargs['width_ratios'] if 'width_ratios' in map_gridspec_kwargs.keys() else [.7,.1, 12, 4] ax_norm = mpl.colors.Normalize(vmin=min(df['quantile'].to_numpy()), vmax=max(df['quantile'].to_numpy()), clip=False) ax_cmap = plt.get_cmap(name=cmap) ax_sm = cm.ScalarMappable(norm=ax_norm, cmap=ax_cmap) _, ax['map'] = mapping.scatter_map(df, hue='quantile', size='EOF', marker=marker, projection=projection, proj_default=proj_default, background=background, borders=borders, coastline=coastline, rivers=rivers, lakes=lakes, ocean=ocean, land=land, extent=extent, figsize=None, scatter_kwargs=scatter_kwargs, lgd_kwargs=lgd_kwargs, gridspec_kwargs=map_gridspec_kwargs, colorbar=False, legend=legend, cmap=ax_sm.cmap, fig=fig, gs_slot=gs[-1, :]) #label rf'$EOF_{index + 1}$' if title is None: title = self.label + ' mode ' + str(index + 1) + ', ' + '{:3.2f}'.format(pctvar_array.mean()) + u" \u00B1 " + '{:3.2f}'.format(pctvar_array.std()) + '% variance explained' # weight='bold', y=0.92) title_kwargs = {} if title_kwargs is None else title_kwargs.copy() t_args = {'y': .92, 'weight': 'bold'} t_args.update(title_kwargs) fig.suptitle(title, **t_args) fig.tight_layout() if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax
[docs] def screeplot(self,clr_eig='C0', linewidth=.3, title='Screeplot', violin_kwargs = None, figsize=[8, 8],savefig_settings=None,ax=None): '''Function to plot the scree plot of the PCA Parameters ---------- quantiles : list,array Quantiles to use for plotting range of pctvar for each mode. clr_eig : str Color to use for the eigenvalues. Default is 'C0'. linewidth : float Linewidth to use for the violin plot. Default is 0.3. violin_kwargs : dict Dictionary of key word arguments for violin plot. If exposed plot arguments {'color','linewidth','title'} are included in kwargs, kwargs will overwrite the exposed argument values. figsize : list, optional The figure size. The default is [8, 8]. savefig_settings : dict the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} ax : matplotlib.axis, optional the axis object from matplotlib See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. Returns ------- fig : matplotlib.figure The figure ax : dict dictionary of matplotlib ax Examples -------- .. jupyter-execute:: n = 100 # number of series soi = pyleo.utils.load_dataset('SOI') soi_time_axes = [pyleo.utils.random_time_axis(n=len(soi.time)) for _ in range(n)] soi_ens = pyleo.EnsembleGeoSeries([pyleo.GeoSeries(time=time, value=soi.value,lat=0,lon=0,auto_time_params=True,verbose=False) for time in soi_time_axes]) nino3 = pyleo.utils.load_dataset('NINO3') nino3_time_axes = [pyleo.utils.random_time_axis(n=len(nino3.time)) for _ in range(n)] nino3_ens = pyleo.EnsembleGeoSeries([pyleo.GeoSeries(time=time, value=nino3.value,lat=0,lon=0,auto_time_params=True,verbose=False) for time in nino3_time_axes]) mul_ens = pyleo.MulEnsGeoSeries([nino3_ens,soi_ens]) mcpca = mul_ens.mcpca(nsim=10,seed=42) mcpca.screeplot()''' violin_kwargs = {} if violin_kwargs is None else violin_kwargs.copy() savefig_settings = {} if savefig_settings is None else savefig_settings.copy() if ax is None: fig,ax = plt.subplots(figsize=figsize) modes = np.arange(len(self.pca_list[0].pctvar)) #pctvar_array = np.empty(shape=(len(modes),len(self.pca_list))) data_dict = {} for mode in modes: data_dict[mode] = np.array([pca.pctvar[mode] for pca in self.pca_list]) #pctvar_array[mode,:] = np.array([pca.pctvar[mode] for pca in self.pca_list]) df = pd.DataFrame(columns=['Mode','Pctvar'],dtype='float') df['Mode'] = np.arange(1,len(data_dict)+1) df['Pctvar'] = data_dict.values() df = df.explode('Pctvar').astype({'Pctvar': 'float'}) # pctvar_quantiles = np.quantile(a=pctvar_array,q=quantiles,axis=1) if 'color' in violin_kwargs.keys(): clr_eig = violin_kwargs.pop('color') if 'linewidth' in violin_kwargs.keys(): linewidth = violin_kwargs.pop('linewidth') sns.violinplot(data=df,x='Mode',y='Pctvar',linewidth=linewidth,color=clr_eig,ax=ax,**violin_kwargs) ax.set_xlabel(r'Mode index $i$') ax.set_ylabel(r'$\lambda_i$') if title: ax.set_title(title) if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax