Source code for pyleoclim.core.multivardecomp

import numpy as np
#import pandas as pd
from matplotlib import pyplot as plt, gridspec
#from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from matplotlib.ticker import MaxNLocator
#import cartopy.crs as ccrs
#import cartopy.feature as cfeature

from ..core import series
from ..utils import plotting, mapping, tsbase


[docs]class MultivariateDecomp: ''' Class to hold the results of multivariate decompositions applies to : `pca()`, `mcpca()`, `mssa()` Parameters ---------- time: float the common time axis name: str name of the dataset/analysis to use in plots eigvals: 1d array vector of eigenvalues from the decomposition eigvecs: 2d array array of eigenvectors from the decomposition (e.g. EOFs) pcs : 1d array array containing the temporal expansion coefficients (e.g. "principal components" in the climate lore) pctvar: float array of pct variance accounted for by each mode orig : MultipleSeries, or MultipleGeoSeries object original data, on a common time axis neff: float scalar representing the effective sample size of the leading mode ''' def __init__(self, name, eigvals, eigvecs, pctvar, pcs, neff, orig): self.name = name self.eigvals = eigvals self.eigvecs = eigvecs self.pctvar = pctvar self.pcs = pcs self.neff = neff self.orig = orig
[docs] def screeplot(self, figsize=[6, 4], uq='N82', title=None, ax=None, savefig_settings=None, title_kwargs=None, xlim=[0, 10], clr_eig='C0'): ''' Plot the eigenvalue spectrum with uncertainties Parameters ---------- figsize : list, optional The figure size. The default is [6, 4]. title : str, optional Plot title. The default is 'scree plot'. savefig_settings : dict the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} title_kwargs : dict, optional the keyword arguments for ax.set_title() ax : matplotlib.axis, optional the axis object from matplotlib See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. xlim : list, optional x-axis limits. The default is [0, 10] (first 10 eigenvalues) uq : str, optional Method used for uncertainty quantification of the eigenvalues. 'N82' uses the North et al "rule of thumb" [1] with effective sample size computed as in [2]. 'MC' uses Monte-Carlo simulations (e.g. MC-EOF). Returns an error if no ensemble is found. clr_eig : str, optional color to be used for plotting eigenvalues See Also -------- pyleoclim.core.MultipleSeries.pca : Principal Component Analysis References ---------- [1]_ North, G. R., T. L. Bell, R. F. Cahalan, and F. J. Moeng (1982), Sampling errors in the estimation of empirical orthogonal functions, Mon. Weather Rev., 110, 699–706. [2]_ Hannachi, A., I. T. Jolliffe, and D. B. Stephenson (2007), Empirical orthogonal functions and related techniques in atmospheric science: A review, International Journal of Climatology, 27(9), 1119–1152, doi:10.1002/joc.1499. ''' savefig_settings = {} if savefig_settings is None else savefig_settings.copy() if ax is None: fig, ax = plt.subplots(figsize=figsize) if self.neff < 2: self.neff = 2 if self.eigvals.ndim == 1: print("The provided eigenvalue array has only one dimension. UQ defaults to NB82") uq = 'N82' # compute 95% CI if uq == 'MC': try: Lq = np.quantile(self.eigvals, [0.025, 0.5, 0.975], axis=1) Lc = Lq[1, :] Lerr = np.tile(Lc, (2, 1)) # declare array Lerr[0, :] = Lq[0, :] Lerr[1, :] = Lq[2, :] eb_lbl = '95% CI (Monte Carlo)' # declare method except ValueError: print("MC method cannot be applied because eigvals has two few MC samples.") elif uq == 'N82': eb_lbl = r'95% CI ($n_\mathrm{eff} = $' + '{:.1f}'.format(self.neff) + ')' # declare method Lc = self.eigvals # central estimate Lerr = np.tile(Lc, (2, 1)) # declare array Lerr[0, :] = Lc * np.sqrt(1 - np.sqrt(2 / self.neff)) Lerr[1, :] = Lc * np.sqrt(1 + np.sqrt(2 / self.neff)) else: raise NameError("unknown UQ method. No action taken") Lc = self.eigvals # central estimate Lerr = np.tile(Lc, (2, 1)) #Lerr = np.zeros((len(Lc),2)) idx = np.arange(len(Lc)) + 1 ax.errorbar(x=idx, y=Lc, yerr=Lerr, color=clr_eig, marker='o', ls='', alpha=1.0, label=eb_lbl) if title is None: title = self.name + ' eigenvalues' ax.set_title(title, fontweight='bold'); ax.legend(); ax.set_xlabel(r'Mode index $i$'); ax.set_ylabel(r'$\lambda_i$') ax.xaxis.set_major_locator(MaxNLocator(integer=True)) # enforce integer values if xlim is not None: ax.set_xlim(0.5, min(max(xlim), len(Lc))) if title is not None: title_kwargs = {} if title_kwargs is None else title_kwargs.copy() t_args = {'y': 1.1, 'weight': 'bold'} t_args.update(title_kwargs) ax.set_title(title, **t_args) if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax
[docs] def modeplot(self, index=0, figsize=[8, 8], fig=None, savefig_settings=None,gs=None, title=None, title_kwargs=None, spec_method='mtm', cmap=None, hue='EOF', marker='archiveType', size=None, scatter_kwargs=None, flip = False, map_kwargs=None, gridspec_kwargs=None): ''' Dashboard visualizing the properties of a given mode, including: 1. The temporal coefficient (PC or similar) 2. its spectrum 3. The loadings (EOF or similar), possibly geolocated. If the object does not have geolocation information, a spaghetti plot of the standardized series is displayed. Parameters ---------- index : int the (0-based) index of the mode to visualize. Default is 0, corresponding to the first mode. figsize : list, optional The figure size. The default is [8, 8]. savefig_settings : dict the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} title : str, optional text for figure title title_kwargs : dict the keyword arguments for ax.set_title() gs : matplotlib.gridspec object, optional Requires at least two rows and two columns. - top row, left: timeseries of principle component - top row, right: PSD - bottom row: spaghetti plot or map See [matplotlib.gridspec.GridSpec](https://matplotlib.org/stable/tutorials/intermediate/gridspec.html) for details. gridspec_kwargs : dict, optional Dictionary with custom gridspec values. - wspace changes space between columns (default: wspace=0.05) - hspace changes space between rows (default: hspace=0.03) - width_ratios: relative width of each column (default: width_ratios=[5,1,3] where middle column serves as a spacer) - height_ratios: relative height of each row (default: height_ratios=[2,1,5] where middle row serves as a spacer) spec_method: str, optional The name of the spectral method to be applied on the PC. Default: MTM Note that the data are evenly-spaced, so any spectral method that assumes even spacing is applicable here: 'mtm', 'welch', 'periodogram' 'wwz' is relevant if scaling exponents need to be estimated, but ill-advised otherwise, as it is very slow. cmap: str, optional if 'hue' is specified, will be used for map scatter plot values. colormap name for the loadings (https://matplotlib.org/stable/tutorials/colors/colormaps.html) map_kwargs : dict, optional Optional arguments for map configuration - projection: str; Optional value for map projection. Default 'auto'. - proj_default: bool - lakes, land, ocean, rivers, borders, coastline, background: bool or dict; - lgd_kwargs: dict; Optional values for how the map legend is configured - gridspec_kwargs: dict; Optional values for adjusting the arrangement of the colorbar, map and legend in the map subplot - legend: bool; Whether to draw a legend on the figure. Default is True - colorbar: bool; Whether to draw a colorbar on the figure if the data associated with hue are numeric. Default is True The default is None. scatter_kwargs : dict, optional Optional arguments configuring how data are plotted on a map. See description of scatter_kwargs in pyleoclim.utils.mapping.scatter_map hue : str, optional (only applicable if using scatter map) Variable associated with color coding for points plotted on map. May correspond to a continuous or categorical variable. The default is 'EOF'. size : str, optional (only applicable if using scatter map) Variable associated with size. Must correspond to a continuous numeric variable. The default is None. marker : string, optional (only applicable if using scatter map) Grouping variable that will produce points with different markers. Can have a numeric dtype but will always be treated as categorical. The default is 'archiveType'. Returns ------- fig : matplotlib.figure The figure ax : dict dictionary of matplotlib ax See also -------- pyleoclim.core.MultipleSeries.pca : Principal Component Analysis pyleoclim.core.MultipleGeoSeries.pca : Principal Component Analysis pyleoclim.utils.tsutils.eff_sample_size : Effective sample size pyleoclim.utils.mapping.scatter_map : mapping ''' from ..core.multiplegeoseries import MultipleGeoSeries savefig_settings = {} if savefig_settings is None else savefig_settings.copy() if flip: PC = -self.pcs[:, index] EOF = -self.eigvecs[:, index] else: PC = self.pcs[:, index] EOF = self.eigvecs[:, index] if fig ==None: fig = plt.figure(figsize=figsize) if gs == None: gridspec_kwargs = {} if type(gridspec_kwargs) != dict else gridspec_kwargs gridspec_defaults = dict(wspace=0.05, hspace=0.03, width_ratios=[5,1,3], height_ratios=[2,1,5]) gridspec_defaults.update(gridspec_kwargs) gs = gridspec.GridSpec(len(gridspec_defaults['height_ratios']), len(gridspec_defaults['width_ratios']), **gridspec_defaults) gs.update(left=0, right=1.1) ax = {} # plot the PC ax['pc'] = fig.add_subplot(gs[0, 0]) label = rf'$PC_{index + 1}$' t = self.orig.series_list[0].time # get time unit if self.orig.time_unit is not None: time_unit = self.orig.time_unit else: time_unit = self.orig.series_list[0].time_unit time_name, _ = tsbase.disambiguate_time_metadata(time_unit) ts = series.Series(time=t, value=PC, verbose=False, time_name=time_name, time_unit=time_unit) # define timeseries object for the PC ts.plot(ax=ax['pc']) ax['pc'].set_ylabel(label) # plot its PSD ax['psd'] = fig.add_subplot(gs[0, -1]) psd = ts.interp().spectral(method=spec_method) _ = psd.plot(ax=ax['psd'], label=label) # plot spatial pattern or spaghetti map_kwargs = {} if map_kwargs is None else map_kwargs.copy() projection = map_kwargs.pop('projection', 'auto') proj_default = map_kwargs.pop('proj_default', True) lakes = map_kwargs.pop('lakes', False) land = map_kwargs.pop('land', False) ocean = map_kwargs.pop('ocean', False) rivers = map_kwargs.pop('rivers', False) borders = map_kwargs.pop('borders', True) coastline = map_kwargs.pop('coastline', True) background = map_kwargs.pop('background', True) extent = map_kwargs.pop('extent', 'global') map_gridspec_kwargs = map_kwargs.pop('gridspec_kwargs', {}) lgd_kwargs = map_kwargs.pop('lgd_kwargs', {}) if 'edgecolor' in map_kwargs.keys(): scatter_kwargs.update({'edgecolor': map_kwargs['edgecolor']}) legend = map_kwargs.pop('legend', True) colorbar = map_kwargs.pop('colorbar', True) if isinstance(self.orig, MultipleGeoSeries): # This makes a bare bones dataframe from a MultipleGeoSeries object df = mapping.make_df(self.orig, hue=hue, marker=marker, size=size) # additional columns are added manually df['EOF'] = EOF if legend == True: map_gridspec_kwargs['width_ratios'] = map_gridspec_kwargs['width_ratios'] if 'width_ratios' in map_gridspec_kwargs.keys() else [.7,.1, 12, 4] _, ax['map'] = mapping.scatter_map(df, hue=hue, size=size, marker=marker, projection=projection, proj_default=proj_default, background=background, borders=borders, coastline=coastline, rivers=rivers, lakes=lakes, ocean=ocean, land=land, extent=extent, figsize=None, scatter_kwargs=scatter_kwargs, lgd_kwargs=lgd_kwargs, gridspec_kwargs=map_gridspec_kwargs, colorbar=colorbar, legend=legend, cmap=cmap, fig=fig, gs_slot=gs[-1, :]) #label rf'$EOF_{index + 1}$' else: # it must be a plain old MultipleSeries. No map for you! Just a spaghetti plot with the standardizes series ax['map'] = fig.add_subplot(gs[1:, :]) self.orig.standardize().plot(ax=ax['map'], title='', ylabel = 'Original Data (standardized)') if title is None: title = self.name + ' mode ' + str(index + 1) + ', ' + '{:3.2f}'.format(self.pctvar[index]) + '% variance explained' # weight='bold', y=0.92) title_kwargs = {} if title_kwargs is None else title_kwargs.copy() t_args = {'y': .92, 'weight': 'bold'} t_args.update(title_kwargs) fig.suptitle(title, **t_args) fig.tight_layout() if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax