Source code for pyleoclim.core.multiplegeoseries

"""
A MultipleGeoSeries object is a collection (more precisely, a 
list) of GeoSeries objects. This is handy in case you want to apply the same method 
to such a collection at once (e.g. process a bunch of series in a consistent fashion).
Compared to its parent class MultipleSeries, MultipleGeoSeries opens new possibilites regarding mapping.
"""
from ..core.multipleseries import MultipleSeries
from ..utils import mapping as mp
from ..utils import plotting
import warnings
import copy

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
from itertools import cycle
import matplotlib.lines as mlines
import numpy as np
#import warnings

#import matplotlib.pyplot as plt
#import matplotlib as mpl
#from matplotlib import cm
#from itertools import cycle
#import matplotlib.lines as mlines
#import copy


[docs]class MultipleGeoSeries(MultipleSeries): '''MultipleGeoSeries object. This object handles a collection of the type GeoSeries and can be created from a list of such objects. MultipleGeoSeries should be used when the need to run analysis on multiple records arises, such as running principal component analysis. Some of the methods automatically transform the time axis prior to analysis to ensure consistency. Parameters ---------- series_list : list a list of pyleoclim.Series objects time_unit : str The target time unit for every series in the list. If None, then no conversion will be applied; Otherwise, the time unit of every series in the list will be converted to the target. label : str label of the collection of timeseries (e.g. 'Euro 2k') Examples -------- .. jupyter-execute:: from pylipd.utils.dataset import load_dir lipd = load_dir(name='Pages2k') df = lipd.get_timeseries_essentials() dfs = df.query("archiveType in ('tree','documents','coral','lake sediment')") # place in a MultipleGeoSeries object ts_list = [] for _, row in dfs.iterrows(): ts_list.append(pyleo.GeoSeries(time=row['time_values'],value=row['paleoData_values'], time_name=row['time_variableName'],value_name=row['paleoData_variableName'], time_unit=row['time_units'], value_unit=row['paleoData_units'], lat = row['geo_meanLat'], lon = row['geo_meanLon'], archiveType = row['archiveType'], verbose = False, label=row['dataSetName']+'_'+row['paleoData_variableName'])) Euro2k = pyleo.MultipleGeoSeries(ts_list, label='Euro2k',time_unit='years AD') Euro2k.map() ''' def __init__(self, series_list, time_unit=None, label=None): self.series_list = series_list from ..core.geoseries import GeoSeries # check that all components are GeoSeries if not all([isinstance(ts, GeoSeries) for ts in series_list]): raise ValueError('All components must be GeoSeries objects') super().__init__(series_list, time_unit, label) # ============ MAP goes here ================
[docs] def map(self, marker='archiveType', hue='archiveType', size=None, cmap=None, edgecolor='k', projection='auto', proj_default=True, crit_dist=5000, colorbar=True, background=True, borders=False, coastline=True,rivers=False, lakes=False, land=True,ocean=True, figsize=None, fig=None, scatter_kwargs=None, gridspec_kwargs=None, legend=True, gridspec_slot=None, lgd_kwargs=None, savefig_settings=None, **kwargs): ''' Parameters ---------- hue : string, optional Grouping variable that will produce points with different colors. Can be either categorical or numeric, although color mapping will behave differently in latter case. The default is 'archiveType'. size : string, optional Grouping variable that will produce points with different sizes. Expects to be numeric. Any data without a value for the size variable will be filtered out. The default is None. marker : string, optional Grouping variable that will produce points with different markers. Can have a numeric dtype but will always be treated as categorical. The default is 'archiveType'. edgecolor : color (string) or list of rgba tuples, optional Color of marker edge. The default is 'w'. projection : string the map projection. Available projections: 'Robinson' (default), 'PlateCarree', 'AlbertsEqualArea', 'AzimuthalEquidistant','EquidistantConic','LambertConformal', 'LambertCylindrical','Mercator','Miller','Mollweide','Orthographic', 'Sinusoidal','Stereographic','TransverseMercator','UTM', 'InterruptedGoodeHomolosine','RotatedPole','OSGB','EuroPP', 'Geostationary','NearsidePerspective','EckertI','EckertII', 'EckertIII','EckertIV','EckertV','EckertVI','EqualEarth','Gnomonic', 'LambertAzimuthalEqualArea','NorthPolarStereo','OSNI','SouthPolarStereo' By default, projection == 'auto', so the projection will be picked based on the degree of clustering of the sites. proj_default : bool, optional If True, uses the standard projection attributes. Enter new attributes in a dictionary to change them. Lists of attributes can be found in the `Cartopy documentation <https://scitools.org.uk/cartopy/docs/latest/crs/projections.html#eckertiv>`_. The default is True. crit_dist : float, optional critical radius for projection choice. Default: 5000 km Only active if projection == 'auto' background : bool, optional If True, uses a shaded relief background (only one available in Cartopy) Default is on (True). borders : bool or dict, optional Draws the countries border. If a dictionary of formatting arguments is supplied (e.g. linewidth, alpha), will draw according to specifications. Defaults is off (False). coastline : bool or dict, optional Draws the coastline. If a dictionary of formatting arguments is supplied (e.g. linewidth, alpha), will draw according to specifications. Defaults is on (True). land : bool or dict, optional Colors land masses. If a dictionary of formatting arguments is supplied (e.g. color, alpha), will draw according to specifications. Default is off (True). Overriden if background=True. ocean : bool or dict, optional Colors oceans. If a dictionary of formatting arguments is supplied (e.g. color, alpha), will draw according to specifications. Default is on (True). Overriden if background=True. rivers : bool or dict, optional Draws major rivers. If a dictionary of formatting arguments is supplied (e.g. linewidth, alpha), will draw according to specifications. Default is off (False). lakes : bool or dict, optional Draws major lakes. If a dictionary of formatting arguments is supplied (e.g. color, alpha), will draw according to specifications. Default is off (False). figsize : list or tuple, optional Size for the figure scatter_kwargs : dict, optional Dict of arguments available in `seaborn.scatterplot <https://seaborn.pydata.org/generated/seaborn.scatterplot.html>`_. Dictionary of arguments available in `matplotlib.pyplot.scatter <https://matplotlib.org/3.2.1/api/_as_gen/matplotlib.pyplot.scatter.html>`_. legend : bool, optional Whether to draw a legend on the figure. Default is True. colorbar : bool, optional Whether to draw a colorbar on the figure if the data associated with hue are numeric. Default is True. lgd_kwargs : dict, optional Dictionary of arguments for `matplotlib.pyplot.legend <https://matplotlib.org/3.2.1/api/_as_gen/matplotlib.pyplot.legend.html>`_. savefig_settings : dict, optional Dictionary of arguments for matplotlib.pyplot.saveFig. - "path" must be specified; it can be any existed or non-existed path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} extent : TYPE, optional DESCRIPTION. The default is 'global'. cmap : string or list, optional Matplotlib supported colormap id or list of colors for creating a colormap. See `choosing a matplotlib colormap <https://matplotlib.org/3.5.0/tutorials/colors/colormaps.html>`_. The default is None. fig : matplotlib.pyplot.figure, optional See matplotlib.pyplot.figure <https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.pyplot.figure.html#matplotlib-pyplot-figure>_. The default is None. gs_slot : Gridspec slot, optional If generating a map for a multi-plot, pass a gridspec slot. The default is None. gridspec_kwargs : dict, optional Function assumes the possibility of a colorbar, map, and legend. A list of floats associated with the keyword `width_ratios` will assume the first (index=0) is the relative width of the colorbar, the second to last (index = -2) is the relative width of the map, and the last (index = -1) is the relative width of the area for the legend. For information about Gridspec configuration, refer to `Matplotlib documentation <https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.gridspec.GridSpec.html#matplotlib.gridspec.GridSpec>_. The default is None. kwargs: dict, optional - 'missing_val_hue', 'missing_val_marker', 'missing_val_label' can all be used to change the way missing values are represented ('k', '?', are default hue and marker values will be associated with the label: 'missing'). - 'hue_mapping' and 'marker_mapping' can be used to submit dictionaries mapping hue values to colors and marker values to markers. Does not replace passing a string value for hue or marker. Returns ------- fig, ax_d Matplotlib figure, dictionary of ax objects which includes the as many as three items: 'cb' (colorbar ax), 'map' (scatter map), and 'leg' (legend ax) See also -------- pyleoclim.utils.mapping.scatter_map: information-rich scatterplot on Cartopy map Examples -------- .. jupyter-execute:: from pylipd.utils.dataset import load_dir lipd = load_dir(name='Pages2k') df = lipd.get_timeseries_essentials() dfs = df.query("archiveType in ('tree','documents','coral','lake sediment','borehole')") # place in a MultipleGeoSeries object ts_list = [] for _, row in dfs.iterrows(): ts_list.append(pyleo.GeoSeries(time=row['time_values'],value=row['paleoData_values'], time_name=row['time_variableName'],value_name=row['paleoData_variableName'], time_unit=row['time_units'], value_unit=row['paleoData_units'], lat = row['geo_meanLat'], lon = row['geo_meanLon'], elevation = row['geo_meanElev'], observationType = row['paleoData_proxy'], archiveType = row['archiveType'], verbose = False, label=row['dataSetName']+'_'+row['paleoData_variableName'])) Euro2k = pyleo.MultipleGeoSeries(ts_list, label='Euro2k',time_unit='years AD') Euro2k.map() By default, a projection is picked based on the degree of geographic clustering of the sites. To focus on Europe and use a more local projection, do: .. jupyter-execute:: eur_coord = {'central_latitude':45, 'central_longitude':20} Euro2k.map(projection='Orthographic',proj_default=eur_coord) By default, the shape and colors of symbols denote proxy archives; however, one can use either graphical device to convey other information. For instance, if elevation is available, it may be displayed by size, like so: .. jupyter-execute:: Euro2k.map(projection='Orthographic', size='elevation', proj_default=eur_coord) Same with observationType: .. jupyter-execute:: Euro2k.map(projection='Orthographic', hue = 'observationType', proj_default=eur_coord) All three sources of information may be combined, but the figure height will need to be enlarged manually to fit the legend: .. jupyter-execute:: Euro2k.map(projection='Orthographic',hue='observationType', size='elevation', proj_default=eur_coord, figsize=[18, 8]) ''' fig, ax_d = mp.scatter_map(self, hue=hue, size=size, marker=marker, edgecolor=edgecolor, projection=projection, proj_default=proj_default, crit_dist=crit_dist, background=background, borders=borders, rivers=rivers, lakes=lakes, ocean=ocean, coastline=coastline, land=land, gridspec_kwargs=gridspec_kwargs, figsize=figsize, scatter_kwargs=scatter_kwargs, lgd_kwargs=lgd_kwargs, legend=legend, colorbar=colorbar, cmap=cmap, fig=fig, gs_slot=gridspec_slot, **kwargs) return fig, ax_d
[docs] def pca(self, weights=None,missing='fill-em',tol_em=5e-03, max_em_iter=100,**pca_kwargs): '''Principal Component Analysis (Empirical Orthogonal Functions) Decomposition of MultipleGeoSeries object in terms of orthogonal basis functions. Tolerant to missing values, infilled by an EM algorithm. Do make sure the time axes are aligned, however! (e.g. use `common_time()`) Algorithm from statsmodels: https://www.statsmodels.org/stable/generated/statsmodels.multivariate.pca.PCA.html Parameters ---------- weights : ndarray, optional Series weights to use after transforming data according to standardize or demean when computing the principal components. missing : {str, None} Method for missing data. Choices are: * 'drop-row' - drop rows with missing values. * 'drop-col' - drop columns with missing values. * 'drop-min' - drop either rows or columns, choosing by data retention. * 'fill-em' - use EM algorithm to fill missing value [ default]. ncomp should be set to the number of factors required. * `None` raises if data contains NaN values. tol_em : float Tolerance to use when checking for convergence of the EM algorithm. max_em_iter : int Maximum iterations for the EM algorithm. Returns ------- res: MultivariateDecomp Resulting pyleoclim.MultivariateDecomp object See also -------- pyleoclim.utils.tsutils.eff_sample_size : Effective Sample Size of timeseries y pyleoclim.core.multivardecomp.MultivariateDecomp : The multivariate decomposition object pyleoclim.core.mulitpleseries.MulitpleSeries.common_time : align time axes Examples -------- .. jupyter-execute:: from pylipd.utils.dataset import load_dir lipd = load_dir(name='Pages2k') # this loads a small subset of the PAGES 2k database lipd_euro = lipd.filter_by_geo_bbox(-20,20,40,80) df = lipd_euro.get_timeseries_essentials() dfs = df.query("archiveType in ('tree') & paleoData_variableName not in ('year')") # place in a MultipleGeoSeries object ts_list = [] for _, row in dfs.iterrows(): ts_list.append(pyleo.GeoSeries(time=row['time_values'],value=row['paleoData_values'], time_name=row['time_variableName'],value_name=row['paleoData_variableName'], time_unit=row['time_units'], value_unit=row['paleoData_units'], lat = row['geo_meanLat'], lon = row['geo_meanLon'], elevation = row['geo_meanElev'], observationType = row['paleoData_proxy'], archiveType = row['archiveType'], verbose = False, label=row['dataSetName']+'_'+row['paleoData_variableName'])) Euro2k = pyleo.MultipleGeoSeries(ts_list, label='Euro2k',time_unit='years AD') res = Euro2k.common_time().pca() # carry out PCA type(res) # the result is a MultivariateDecomp object To plot the eigenvalue spectrum: .. jupyter-execute:: res.screeplot() To plot the first mode, equivalent to `res.modeplot(index=0)`: .. jupyter-execute:: res.modeplot() To plot the second (note the zero-based indexing): .. jupyter-execute:: res.modeplot(index=1) One can use map semantics to display the observation type as well: .. jupyter-execute:: res.modeplot(index=1, marker='observationType', size='elevation') There are many ways to configure the map component. As a simple example, specifying the projection: .. jupyter-execute:: res.modeplot(index=1, marker='observationType', size='elevation', map_kwargs={'projection':'Robinson'}) Or dive into the nuances of gridspec and legend configurations: .. jupyter-execute:: res.modeplot(index=1, marker='observationType', size='elevation', map_kwargs={'projection':'Robinson', 'gridspec_kwargs': {'width_ratios': [.5, 1,14, 4], 'wspace':-.065}, 'lgd_kwargs':{'bbox_to_anchor':[-.015,1]}}) ''' # apply PCA fom parent class pca_res = super().pca(weights=weights,missing=missing,tol_em=tol_em, max_em_iter=max_em_iter,**pca_kwargs) pca_res.orig = self # add original object for plotting purposes return pca_res
[docs] def time_geo_plot(self, figsize=[10, 3], marker=None, markersize=None, alpha = .8, y_criteria = 'lat', linestyle=None, linewidth=10, colors=None, cmap='turbo', norm=None, xlabel=None, ylabel=None, title=None, time_unit = None, legend=True, inline_legend=False, plot_kwargs=None, lgd_kwargs=None, label_x_offset=200,label_y_offset=0,savefig_settings=None, ax=None, invert_xaxis=False, invert_yaxis=False): '''A plot of the temporal coverage of the records in a MultipleGeoSeries object organized by latitude or longitude. Similar in behaviour to MultipleSeries.time_coverage_plot Inspired by Dr. Mara Y. McPartland. Parameters ---------- figsize : list, optional Size of the figure. The default is [10, 4]. marker : str, optional Marker type. The default is None. markersize : float, optional Marker size. The default is None. alpha : float, optional Alpha of the lines y_criteria : str, optional Criteria for the creation of the y_axis. Can be {'lat','lon',} linestyle : str, optional Line style. The default is None. linewidth : float, optional The width of the line. The default is 10. colors : a list of, or one, Python supported color code (a string of hex code or a tuple of rgba values) Colors for plotting. If None, the plotting will cycle the 'viridis' colormap; if only one color is specified, then all curves will be plotted with that single color; if a list of colors are specified, then the plotting will cycle that color list. cmap : str The colormap to use when "colors" is None. Default is 'turbo'. norm : matplotlib.colors.Normalize The normalization for the colormap. If None, a linear normalization will be used. xlabel : str, optional x-axis label. The default is None. ylabel : str, optional y-axis label. The default is None. title : str, optional Title. The default is None. time_unit : str the target time unit, possible input: { 'year', 'years', 'yr', 'yrs', 'y BP', 'yr BP', 'yrs BP', 'year BP', 'years BP', 'ky BP', 'kyr BP', 'kyrs BP', 'ka BP', 'ka', 'my BP', 'myr BP', 'myrs BP', 'ma BP', 'ma', } default is None, in which case the code picks the most common time unit in the collection. If no unambiguous winner can be found, the unit of the first series in the collection is used. legend : bool, optional Whether the show the legend. The default is True. inline_legend : bool, optional Whether to use inline labels or the default pyleoclim legend. This option overrides lgd_kwargs plot_kwargs : dict, optional Plot parameters. The default is None. lgd_kwargs : dict, optional Legend parameters. The default is None. If inline_legend is True, lgd_kwargs will be passed to ax.text() (see matplotlib.axes.Axes.text documentation) If inline_legend is False, lgd_kwargs will be passed to ax.legend() (see matplotlib.axes.Axes.legend documentation) label_x_offset: float or list, optional Amount to offset label by in the x direction. Only used if inline_legend is True. Default is 200. If list, should have the same number of elements as the MultipleSeries object. label_y_offset : float or list, optional Amount to offset label by in the y direction. Only used if inline_legend is True. Default is 0. If list, should have the same number of elements as the MultipleSeries object. savefig_settings : dictionary, optional the dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existing or non-existing path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"} The default is None. ax : matplotlib.ax, optional The matplotlib axis onto which to return the figure. The default is None. invert_xaxis : bool, optional if True, the x-axis of the plot will be inverted invert_yaxis : bool, optional if True, the y-axis of the plot will be inverted Returns ------- fig : matplotlib.figure the figure object from matplotlib See [matplotlib.pyplot.figure](https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.figure.html) for details. ax : matplotlib.axis the axis object from matplotlib See [matplotlib.axes](https://matplotlib.org/api/axes_api.html) for details. See also -------- pyleoclim.multipleseries.MultipleSeries.time_coverage_plot pyleoclim.utils.plotting.savefig : Saving figure in Pyleoclim Examples -------- .. jupyter-execute:: from pylipd.utils.dataset import load_dir lipd = load_dir(name='Pages2k') df = lipd.get_timeseries_essentials() dfs = df.query("archiveType in ('tree','documents','coral','lake sediment')") # place in a MultipleGeoSeries object ts_list = [] for _, row in dfs.iloc[:5].iterrows(): ts_list.append(pyleo.GeoSeries(time=row['time_values'],value=row['paleoData_values'], time_name=row['time_variableName'],value_name=row['paleoData_variableName'], time_unit=row['time_units'], value_unit=row['paleoData_units'], lat = row['geo_meanLat'], lon = row['geo_meanLon'], archiveType = row['archiveType'], verbose = False, label=row['dataSetName']+'_'+row['paleoData_variableName'])) ms = pyleo.MultipleGeoSeries(ts_list, time_unit='years AD') ms.time_geo_plot() ''' savefig_settings = {} if savefig_settings is None else savefig_settings.copy() plot_kwargs = {} if plot_kwargs is None else plot_kwargs.copy() lgd_kwargs = {} if lgd_kwargs is None else lgd_kwargs.copy() # deal with time units self = self.convert_time_unit(time_unit=time_unit) if ax is None: fig, ax = plt.subplots(figsize=figsize) if title is None and self.label is not None: ax.set_title(self.label, fontweight='bold') for idx, s in enumerate(self.series_list): if colors is None: cmap_obj = plt.get_cmap(cmap) nc = len(self.series_list) if norm is None: norm = mpl.colors.Normalize(vmin=0, vmax=nc-1) clr = cmap_obj(norm(idx%nc)) elif type(colors) is str: clr = colors elif type(colors) is list: nc = len(colors) clr = colors[idx%nc] else: raise TypeError("'colors' should be a list of, or one of, Python's supported color codes (a string of hex code or a tuple of rgba values)") if y_criteria == 'lat': y_mod = s.lat if ylabel is None: ylabel = 'Latitude' elif y_criteria == 'lon': y_mod = s.lon if ylabel is None: ylabel = 'Longitude' else: raise ValueError('Passed y_criteria is not recognized. Please choose from "lat" or "lon"') s.value = np.ones(len(s.value))*y_mod if legend and inline_legend: ax = s.plot( figsize=figsize, marker=marker, markersize=markersize, alpha=alpha, color=clr, linestyle=linestyle, linewidth=linewidth, label=s.label, xlabel=xlabel, ylabel=ylabel, title=title, legend=False, lgd_kwargs=None, plot_kwargs=plot_kwargs, ax=ax, ) if isinstance(label_x_offset,list): x_offset = label_x_offset[idx] else: x_offset=label_x_offset if isinstance(label_y_offset,list): y_offset = label_y_offset[idx] else: y_offset=label_y_offset ax.text(s.time[-1]+x_offset, s.value[-1]+y_offset, s.label, color=clr, **lgd_kwargs) else: ax = s.plot( figsize=figsize, marker=marker, markersize=markersize, alpha=alpha, color=clr, linestyle=linestyle, linewidth=linewidth, label=s.label, xlabel=xlabel, ylabel=ylabel, title=title, legend=legend, lgd_kwargs=lgd_kwargs, plot_kwargs=plot_kwargs, ax=ax, ) if invert_xaxis: ax.invert_xaxis() if invert_yaxis: ax.invert_yaxis() if 'fig' in locals(): if 'path' in savefig_settings: plotting.savefig(fig, settings=savefig_settings) return fig, ax else: return ax