"""
The GeoSeries class is a child of Series, with additional metadata latitude (lat) and longitude (lon)
This unlocks plotting capabilities like map() and dashboard().
"""
from ..utils import plotting, mapping, lipdutils, jsonutils, tsbase, tsutils
from ..core.series import Series
import matplotlib.pyplot as plt
import re
import pandas as pd
#from copy import deepcopy
from matplotlib import gridspec
#import cartopy.crs as ccrs
#import cartopy.feature as cfeature
import warnings
[docs]class GeoSeries(Series):
'''The GeoSeries class is a child of the Series class, and requires geolocation
information (latitude, longitude). Elevation is optional, but can be used in mapping, if present.
The class also allows for ancillary data and metadata, detailed below.
Parameters
----------
time : list or numpy.array
independent variable (t)
value : list of numpy.array
values of the dependent variable (y)
lat : float
latitude N in decimal degrees. Must be in the range [-90;+90]
lon : float
longitude East in decimal degrees. Must be in the range [-180;+360]
No conversion is applied as mapping utilities convert to [-180,+180] internally
elevation : float
elevation of the sample, in meters above sea level. Negative numbers indicate depth below global mean sea level, therefore.
time_unit : string
Units for the time vector (e.g., 'years').
Default is None
time_name : string
Name of the time vector (e.g., 'Time','Age').
Default is None. This is used to label the time axis on plots
value_name : string
Name of the value vector (e.g., 'temperature')
Default is None
value_unit : string
Units for the value vector (e.g., 'deg C')
Default is None
label : string
Name of the time series (e.g., 'Nino 3.4')
Default is None
log : dict
Dictionary of tuples documentating the various transformations applied to the object
keep_log : bool
Whether to keep a log of applied transformations. False by default
importedFrom : string
source of the dataset. If it came from a LiPD file, this could be the datasetID property
archiveType : string
climate archive, one of 'Borehole', 'Coral', 'FluvialSediment', 'GlacierIce', 'GroundIce', 'LakeSediment', 'MarineSediment', 'Midden', 'MolluskShell', 'Peat', 'Sclerosponge', 'Shoreline', 'Speleothem', 'TerrestrialSediment', 'Wood'
Reference: https://lipdverse.org/vocabulary/archivetype/
control_archiveType : [True, False]
Whether to standardize the name of the archiveType agains the vocabulary from: https://lipdverse.org/vocabulary/paleodata_proxy/.
If set to True, will only allow for these terms and automatically convert known synonyms to the standardized name. Only standardized variable names will be automatically assigned a color scheme.
Default is False.
sensorType : string
sensor, e.g. a paleoclimate proxy sensor. This property can be used to differentiate between species of foraminifera
observationType : string
observation type, e.g. a proxy observation. See https://lipdverse.org/vocabulary/paleodata_proxy/. Note: this is preferred terminology but not enforced
depth : array
depth at which the values were collected
depth_name : string
name of the field, e.g. 'mid-depth', 'top-depth', etc
depth_unit : string
units of the depth axis, e.g. 'cm'
dropna : bool
Whether to drop NaNs from the series to prevent downstream functions from choking on them
defaults to True
sort_ts : str
Direction of sorting over the time coordinate; 'ascending' or 'descending'
Defaults to 'ascending'
verbose : bool
If True, will print warning messages if there is any
clean_ts : boolean flag
set to True to remove the NaNs and make time axis strictly prograde with duplicated timestamps reduced by averaging the values
Default is None (marked for deprecation)
auto_time_params : bool,
If True, uses tsbase.disambiguate_time_metadata to ensure that time_name and time_unit are usable by Pyleoclim. This may override the provided metadata.
If False, the provided time_name and time_unit are used. This may break some functionalities (e.g. common_time and convert_time_unit), so use at your own risk.
If not provided, code will set to True for internal consistency.
Examples
--------
Import the EPICA Dome C deuterium record and display a quick synopsis:
.. jupyter-execute::
import pyleoclim as pyleo
ts = pyleo.utils.datasets.load_dataset('EDC-dD')
ts_interp = ts.convert_time_unit('kyr BP').interp(step=.5) # interpolate for a faster result
fig, ax = ts_interp.dashboard()
'''
def __init__(self, time, value, lat, lon, elevation = None, time_unit=None, time_name=None,
value_name=None, value_unit=None, label=None, importedFrom=None,
archiveType = None, control_archiveType = False,
sensorType = None, observationType = None,
log=None, keep_log=False, verbose=True,
depth = None, depth_name = None, depth_unit= None,
sort_ts = 'ascending', dropna = True, clean_ts=False, auto_time_params = None):
if auto_time_params is None:
auto_time_params = True
if verbose:
warnings.warn('auto_time_params is not specified. Currently default behavior sets this to True, which might modify your supplied time metadata. Please set to False if you want a different behavior.', UserWarning, stacklevel=2)
if auto_time_params:
# assign time metadata if they are not provided or provided incorrectly
offending = [tsbase.MATCH_CE, tsbase.MATCH_BP]
if time_unit is None:
time_unit='years CE'
if verbose:
warnings.warn(f'No time_unit parameter provided. Assuming {time_unit}.', UserWarning, stacklevel=2)
elif time_unit.lower().replace(".","") in frozenset().union(*offending):
# fix up time name and units for offending cases
time_name, time_unit = tsbase.disambiguate_time_metadata(time_unit)
else:
# give a proper time name to those series that confuse that notion with time units
time_name, _ = tsbase.disambiguate_time_metadata(time_unit)
if time_name is None:
if verbose:
warnings.warn('No time_name parameter provided. Assuming "Time".', UserWarning, stacklevel=2)
time_name='Time'
elif time_name in tsbase.MATCH_A:
if verbose:
warnings.warn(f'{time_name} refers to the units, not the name of the axis. Picking "Time" instead', UserWarning, stacklevel=2)
time_name='Time'
else:
pass
# assign latitude
if lat is not None:
lat = float(lat)
if -90 <= lat <= 90:
self.lat = lat
else:
ValueError('Latitude must be a number in [-90; 90]')
else:
self.lat = None
# assign longitude
if lon is not None:
lon = float(lon)
if -180 < lon <= 360:
self.lon = lon
# elif 180 < lon <= 360:
# self.lon = mapping.lon_360_to_180(lon)
# if verbose:
# print('Longitude has been converted to the [-180,+180] range')
else:
ValueError('Longitude must be a number in [-180,360]')
else:
self.lon = None
# elevation
self.elevation = elevation
# PSM
self.sensorType = sensorType
self.observationType = observationType
# depth infornation
self.depth = depth
self.depth_name = depth_name
self.depth_unit = depth_unit
#assign all the rest
super().__init__(time, value, time_unit, time_name, value_name,
value_unit, label, importedFrom, archiveType, control_archiveType,
log, keep_log, sort_ts, dropna, verbose, clean_ts)
@property
def metadata(self):
return dict(
lat = self.lat,
lon = self.lon,
elevation = self.elevation,
time_unit = self.time_unit,
time_name = self.time_name,
value_unit = self.value_unit,
value_name = self.value_name,
label = self.label,
archiveType = self.archiveType,
sensorType = self.sensorType,
observationType = self.observationType,
importedFrom = self.importedFrom,
control_archiveType = self.control_archiveType,
log = self.log,
)
[docs] @classmethod
def from_json(cls, path):
''' Creates a pyleoclim.Series from a JSON file
The keys in the JSON file must correspond to the parameter associated with a GeoSeries object
Parameters
----------
path : str
Path to the JSON file
Returns
-------
ts : pyleoclim.core.series.Series
A Pyleoclim Series object.
'''
a = jsonutils.open_json(path)
b = jsonutils.iterate_through_dict(a, 'GeoSeries')
return cls(**b)
@classmethod
def from_Series(lat, lon, elevation=None,sensorType=None,observationType=None,
depth=None, depth_name=None, depth_unit=None):
print('a')
# time, value, lat, lon, elevation = None, time_unit=None, time_name=None,
# value_name=None, value_unit=None, label=None, importedFrom=None,
# archiveType = None, control_archiveType = False,
# sensorType = None, observationType = None,
# log=None, keep_log=False, verbose=True,
# depth = None, depth_name = None, depth_unit= None,
# sort_ts = 'ascending', dropna = True, clean_ts=False, auto_time_params = None
[docs] def map(self, projection='Orthographic', proj_default=True,
background=True, borders=False, coastline=True, rivers=False, lakes=False, ocean=True,
land=True, fig=None, gridspec_slot=None,
figsize=None, marker='archiveType', hue='archiveType', size=None, edgecolor='w',
markersize=None, scatter_kwargs=None, cmap=None, colorbar=False, gridspec_kwargs=None,
legend=True, lgd_kwargs=None, savefig_settings=None):
'''Map the location of the record
Parameters
----------
projection : str, optional
The projection to use. The default is 'Orthographic'.
proj_default : bool; {True, False}, optional
Whether to use the Pyleoclim defaults for each projection type. The default is True.
background : bool, optional
If True, uses a shaded relief background (only one available in Cartopy)
Default is on (True).
borders : bool or dict, optional
Draws the countries border.
If a dictionary of formatting arguments is supplied (e.g. linewidth, alpha), will draw according to specifications.
Defaults is off (False).
coastline : bool or dict, optional
Draws the coastline.
If a dictionary of formatting arguments is supplied (e.g. linewidth, alpha), will draw according to specifications.
Defaults is on (True).
land : bool or dict, optional
Colors land masses.
If a dictionary of formatting arguments is supplied (e.g. color, alpha), will draw according to specifications.
Default is off (True). Overriden if background=True.
ocean : bool or dict, optional
Colors oceans.
If a dictionary of formatting arguments is supplied (e.g. color, alpha), will draw according to specifications.
Default is on (True). Overriden if background=True.
rivers : bool or dict, optional
Draws major rivers.
If a dictionary of formatting arguments is supplied (e.g. linewidth, alpha), will draw according to specifications.
Default is off (False).
lakes : bool or dict, optional
Draws major lakes.
If a dictionary of formatting arguments is supplied (e.g. color, alpha), will draw according to specifications.
Default is off (False).
figsize : list or tuple, optional
The size of the figure. The default is None.
marker : str, optional
The marker type for each archive.
The default is None. Uses plot_default
hue : str, optional
Variable associated with color coding.
The default is None. Uses plot_default.
markersize : float, optional
Size of the marker. The default is None.
scatter_kwargs : dict, optional
Parameters for the scatter plot. The default is None.
legend : bool; {True, False}, optional
Whether to plot the legend. The default is True.
lgd_kwargs : dict, optional
Arguments for the legend. The default is None.
savefig_settings : dict, optional
the dictionary of arguments for plt.savefig(); some notes below:
- "path" must be specified; it can be any existed or non-existed path,
with or without a suffix; if the suffix is not given in "path", it will follow "format"
- "format" can be one of {"pdf", "eps", "png", "ps"}. The default is None.
Returns
-------
res : fig,ax_d
See also
--------
pyleoclim.utils.mapping.scatter_map : Underlying mapping function for Pyleoclim
Examples
--------
.. jupyter-execute::
import pyleoclim as pyleo
ts = pyleo.utils.datasets.load_dataset('EDC-dD')
fig, ax = ts.map()
'''
if markersize != None:
scatter_kwargs['markersize'] = markersize
fig, ax_d = mapping.scatter_map(self, hue=hue, size=size, marker=marker, projection=projection,
proj_default=proj_default,
background=background, borders=borders, rivers=rivers, lakes=lakes,
ocean=ocean, land=land, coastline=coastline,
figsize=figsize, scatter_kwargs=scatter_kwargs, gridspec_kwargs=gridspec_kwargs,
lgd_kwargs=lgd_kwargs, legend=legend, colorbar=colorbar,
cmap=cmap, edgecolor=edgecolor,
fig=fig, gs_slot=gridspec_slot)
return fig, ax_d
[docs] def map_neighbors(self, mgs, radius=3000, projection='Orthographic', proj_default=True,
background=True, borders=False, rivers=False, lakes=False, ocean=True,
land=True, fig=None, gridspec_slot=None,
figsize=None, marker='archiveType', hue='archiveType', size=None, edgecolor='w',
markersize=None, scatter_kwargs=None, cmap=None, colorbar=False, gridspec_kwargs=None,
legend=True, lgd_kwargs=None, savefig_settings=None):
'''Map all records within a given radius of the object
Parameters
----------
mgs : MultipleGeoSeries
object containing the series to be considered as neighbors
radius : float
search radius for the record, in km. Default is 3000.
projection : str, optional
The projection to use. The default is 'Orthographic'.
proj_default : bool; {True, False}, optional
Whether to use the Pyleoclim defaults for each projection type. The default is True.
background : bool; {True, False}, optional
Whether to use a background. The default is True.
borders : bool; {True, False}, optional
Draw borders. The default is False.
rivers : bool; {True, False}, optional
Draw rivers. The default is False.
lakes : bool; {True, False}, optional
Draw lakes. The default is False.
figsize : list or tuple, optional
The size of the figure. The default is None.
marker : str, optional
The marker type for each archive.
The default is None. Uses plot_default
hue : str, optional
Variable associated with color coding.
The default is None. Uses plot_default.
markersize : float, optional
Size of the marker. The default is None.
scatter_kwargs : dict, optional
Parameters for the scatter plot. The default is None.
legend : bool; {True, False}, optional
Whether to plot the legend. The default is True.
lgd_kwargs : dict, optional
Arguments for the legend. The default is None.
savefig_settings : dict, optional
the dictionary of arguments for plt.savefig(); some notes below:
- "path" must be specified; it can be any existed or non-existed path,
with or without a suffix; if the suffix is not given in "path", it will follow "format"
- "format" can be one of {"pdf", "eps", "png", "ps"}. The default is None.
Returns
-------
res : fig,ax_d
See also
--------
pyleoclim.utils.mapping.map : Underlying mapping function for Pyleoclim
Examples
--------
.. jupyter-execute::
from pylipd.utils.dataset import load_dir
lipd = load_dir(name='Pages2k')
df = lipd.get_timeseries_essentials()
dfs = df.query("archiveType in ('tree','documents','coral','lake sediment') and paleoData_variableName not in ('year')")
# place in a MultipleGeoSeries object
ts_list = []
for _, row in dfs.iterrows():
ts_list.append(pyleo.GeoSeries(time=row['time_values'],value=row['paleoData_values'],
time_name=row['time_variableName'],value_name=row['paleoData_variableName'],
time_unit=row['time_units'], value_unit=row['paleoData_units'],
lat = row['geo_meanLat'], lon = row['geo_meanLon'],
archiveType = row['archiveType'], verbose = False,
label=row['dataSetName']+'_'+row['paleoData_variableName']))
mgs = pyleo.MultipleGeoSeries(ts_list,time_unit='years AD')
gs = mgs.series_list[6] # extract one record as the target one
gs.map_neighbors(mgs, radius=4000)
'''
from ..core.multiplegeoseries import MultipleGeoSeries
if markersize != None:
scatter_kwargs['markersize'] = markersize
# find neighbors
lats = [ts.lat for ts in mgs.series_list]
lons = [ts.lon for ts in mgs.series_list]
dist = mapping.compute_dist(self.lat, self.lon, lats, lons)
neigh_idx = mapping.within_distance(dist, radius)
neighbors =[mgs.series_list[i] for i in neigh_idx if i !=0]
neighbors = MultipleGeoSeries(neighbors)
df = mapping.make_df(neighbors, hue=hue, marker=marker, size=size)
df_self = mapping.make_df(self, hue=hue, marker=marker, size=size)
neighborhood = pd.concat([df, df_self], axis=0)
# additional columns are added manually
neighbor_coloring = ['w' for ik in range(len(neighborhood))]
neighbor_coloring[-1] = 'k'
neighborhood['original'] =neighbor_coloring
# plot neighbors
fig, ax_d = mapping.scatter_map(neighborhood, fig=fig, gs_slot=gridspec_slot, hue=hue, size=size, marker=marker, projection=projection,
proj_default=proj_default,
background=background, borders=borders, rivers=rivers, lakes=lakes,
ocean=ocean, land=land,
figsize=figsize, scatter_kwargs=scatter_kwargs, lgd_kwargs=lgd_kwargs,
gridspec_kwargs=gridspec_kwargs, colorbar=colorbar,
legend=legend, cmap=cmap, edgecolor=neighborhood['original'].values)
return fig, ax_d
[docs] def dashboard(self, figsize=[11, 8], gs=None, plt_kwargs=None, histplt_kwargs=None, spectral_kwargs=None,
spectralsignif_kwargs=None, spectralfig_kwargs=None, map_kwargs=None,
hue='archiveType', marker='archiveType', size=None, scatter_kwargs=None,
gridspec_kwargs=None,
savefig_settings=None):
'''
Parameters
----------
figsize : list or tuple, optional
Figure size. The default is [11,8].
gs : matplotlib.gridspec object, optional
Requires at least two rows and 4 columns.
- top row, left: timeseries
- top row, right: histogram
- bottom left: map
- bottom right: PSD
See [matplotlib.gridspec.GridSpec](https://matplotlib.org/stable/tutorials/intermediate/gridspec.html) for details.
plt_kwargs : dict, optional
Optional arguments for the timeseries plot. See Series.plot() or EnsembleSeries.plot_envelope(). The default is None.
histplt_kwargs : dict, optional
Optional arguments for the distribution plot. See Series.histplot() or EnsembleSeries.plot_distplot(). The default is None.
spectral_kwargs : dict, optional
Optional arguments for the spectral method. Default is to use Lomb-Scargle method. See Series.spectral() or EnsembleSeries.spectral(). The default is None.
spectralsignif_kwargs : dict, optional
Optional arguments to estimate the significance of the power spectrum. See PSD.signif_test. Note that we currently do not support significance testing for ensembles. The default is None.
spectralfig_kwargs : dict, optional
Optional arguments for the power spectrum figure. See PSD.plot() or MultiplePSD.plot_envelope(). The default is None.
map_kwargs : dict, optional
Optional arguments for map configuration
- projection: str; Optional value for map projection. Default 'auto'.
- proj_default: bool
- lakes, land, ocean, rivers, borders, coastline, background: bool or dict;
- lgd_kwargs: dict; Optional values for how the map legend is configured
- gridspec_kwargs: dict; Optional values for adjusting the arrangement of the colorbar, map and legend in the map subplot
- legend: bool; Whether to draw a legend on the figure. Default is True
- colorbar: bool; Whether to draw a colorbar on the figure if the data associated with hue are numeric. Default is True
The default is None.
hue : str, optional
Variable associated with color coding for points plotted on map. May correspond to a continuous or categorical variable.
The default is 'archiveType'.
size : str, optional
Variable associated with size. Must correspond to a continuous numeric variable.
The default is None.
marker : string, optional
Grouping variable that will produce points with different markers. Can have a numeric dtype but will always be treated as categorical.
The default is 'archiveType'.
scatter_kwargs : dict, optional
Optional arguments configuring how data are plotted on a map. See description of scatter_kwargs in pyleoclim.utils.mapping.scatter_map
gridspec_kwargs : dict, optional
Optional dictionary for configuring dashboard layout using gridspec
For information about Gridspec configuration, refer to `Matplotlib documentation <https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.gridspec.GridSpec.html#matplotlib.gridspec.GridSpec>_. The default is None.
savefig_settings : dict, optional
the dictionary of arguments for plt.savefig(); some notes below:
- "path" must be specified; it can be any existed or non-existed path,
with or without a suffix; if the suffix is not given in "path", it will follow "format"
- "format" can be one of {"pdf", "eps", "png", "ps"}.
The default is None.
Returns
-------
fig : matplotlib.figure
The figure
ax : dict
dictionary of matplotlib ax
See also
--------
pyleoclim.core.series.Series.plot : plot a timeseries
pyleoclim.core.ensembleseries.EnsembleSeries.plot_envelope: Envelope plots for an ensemble
pyleoclim.core.series.Series.histplot : plot a distribution of the timeseries
pyleoclim.core.ensembleseries.EnsembleSeries.histplot : plot a distribution of the timeseries across ensembles
pyleoclim.core.series.Series.spectral : spectral analysis method.
pyleoclim.core.multipleseries.MultipleSeries.spectral : spectral analysis method for multiple series.
pyleoclim.core.psds.PSD.signif_test : significance test for timeseries analysis
pyleoclim.core.psds.PSD.plot : plot power spectrum
pyleoclim.core.psds.MulitplePSD.plot : plot envelope of power spectrum
pyleoclim.core.geoseries.GeoSeries.map : map location of dataset
pyleoclim.utils.mapping.scatter_map : Underlying mapping function for Pyleoclim
Examples
--------
.. jupyter-execute::
import pyleoclim as pyleo
ts = pyleo.utils.datasets.load_dataset('EDC-dD')
ts_interp = ts.convert_time_unit('kyr BP').interp(step=.5) # interpolate for a faster result
fig, ax = ts_interp.dashboard()
'''
savefig_settings = {} if savefig_settings is None else savefig_settings.copy()
# start plotting
fig = plt.figure(figsize=figsize)
if gs == None:
gridspec_kwargs = {} if type(gridspec_kwargs) != dict else gridspec_kwargs
gridspec_defaults = dict(wspace=0, width_ratios=[3, .25, 2, 1],
height_ratios=[1, .1, 1], left=0, right=1.1)
gridspec_defaults.update(gridspec_kwargs)
gs = gridspec.GridSpec(len(gridspec_defaults['height_ratios']), len(gridspec_defaults['width_ratios']), **gridspec_defaults)
ax = {}
# Plot the timeseries
plt_kwargs = {} if plt_kwargs is None else plt_kwargs.copy()
ax['ts'] = fig.add_subplot(gs[0, :-1])
plt_kwargs.update({'ax': ax['ts']})
# use the defaults if color/markers not specified
if self.archiveType is not None:
archiveType = lipdutils.LipdToOntology(self.archiveType)
if archiveType not in lipdutils.PLOT_DEFAULT.keys():
archiveType = 'Other'
else:
archiveType = 'Other'
if 'marker' not in plt_kwargs.keys():
plt_kwargs.update({'marker': lipdutils.PLOT_DEFAULT[archiveType][1]})
if 'color' not in plt_kwargs.keys():
plt_kwargs.update({'color': lipdutils.PLOT_DEFAULT[archiveType][0]})
ax['ts'] = self.plot(**plt_kwargs)
ymin, ymax = ax['ts'].get_ylim()
# plot the histplot
histplt_kwargs = {} if histplt_kwargs is None else histplt_kwargs.copy()
ax['dts'] = fig.add_subplot(gs[0, -1:])
histplt_kwargs.update({'ax': ax['dts']})
histplt_kwargs.update({'ylabel': 'Counts'})
histplt_kwargs.update({'vertical': True})
if 'color' not in histplt_kwargs.keys():
histplt_kwargs.update({'color': lipdutils.PLOT_DEFAULT[archiveType][0]})
ax['dts'] = self.histplot(**histplt_kwargs)
ax['dts'].set_ylim([ymin, ymax])
ax['dts'].set_yticklabels([])
ax['dts'].set_ylabel('')
ax['dts'].set_yticks([])
# plot map
map_kwargs = {} if map_kwargs is None else map_kwargs.copy()
projection = map_kwargs.pop('projection', 'Orthographic')
proj_default = map_kwargs.pop('proj_default', True)
lakes = map_kwargs.pop('lakes', False)
land = map_kwargs.pop('land', False)
ocean = map_kwargs.pop('ocean', False)
rivers = map_kwargs.pop('rivers', False)
borders = map_kwargs.pop('borders', True)
coastline = map_kwargs.pop('coastline', True)
background = map_kwargs.pop('background', True)
map_gridspec_kwargs = map_kwargs.pop('gridspec_kwargs', {})
lgd_kwargs = map_kwargs.pop('lgd_kwargs', {})
if 'edgecolor' in map_kwargs.keys():
scatter_kwargs.update({'edgecolor': map_kwargs['edgecolor']})
cmap = map_kwargs.pop('cmap', None)
legend = map_kwargs.pop('legend', False)
colorbar = map_kwargs.pop('colorbar', False)
if legend == False:
map_gridspec_kwargs['width_ratios'] = [.5,16, 1]
_, ax['map'] =mapping.scatter_map(self, hue=hue, size=size, marker=marker, projection=projection, proj_default=proj_default,
background=background, borders=borders, coastline=coastline, rivers=rivers, lakes=lakes, ocean=ocean, land=land,
figsize=None, scatter_kwargs=scatter_kwargs,gridspec_kwargs = map_gridspec_kwargs,
lgd_kwargs=lgd_kwargs, legend=legend, cmap=cmap, colorbar=colorbar,
fig=fig, gs_slot=gs[-1, 0:1])
# spectral analysis
spectral_kwargs = {} if spectral_kwargs is None else spectral_kwargs.copy()
if 'method' in spectral_kwargs.keys():
pass
else:
spectral_kwargs.update({'method': 'lomb_scargle'})
if 'freq_method' in spectral_kwargs.keys():
pass
else:
spectral_kwargs.update({'freq_method': 'lomb_scargle'})
ax['spec'] = fig.add_subplot(gs[-1, -2:])
spectralfig_kwargs = {} if spectralfig_kwargs is None else spectralfig_kwargs.copy()
spectralfig_kwargs.update({'ax': ax['spec']})
ts_preprocess = self.detrend().standardize()
psd = ts_preprocess.spectral(**spectral_kwargs)
# Significance test
spectralsignif_kwargs = {} if spectralsignif_kwargs is None else spectralsignif_kwargs.copy()
psd_signif = psd.signif_test(**spectralsignif_kwargs)
# plot
if 'color' not in spectralfig_kwargs.keys():
spectralfig_kwargs.update({'color': lipdutils.PLOT_DEFAULT[archiveType][0]})
if 'signif_clr' not in spectralfig_kwargs.keys():
spectralfig_kwargs.update({'signif_clr': 'grey'})
ax['spec'] = psd_signif.plot(**spectralfig_kwargs)
if 'path' in savefig_settings:
plotting.savefig(fig, settings=savefig_settings)
return fig, ax
[docs] def segment(self, factor=10, verbose = False):
"""Gap detection
This function segments a timeseries into n parts following a gap- detection algorithm. The rule of gap detection is very simple:
we define the intervals between time points as dts, then if dts[i] is larger than factor * dts[i-1],
we think that the change of dts (or the gradient) is too large, and we regard it as a breaking point
and divide the time series into two segments here
Parameters
----------
factor : float
The factor that adjusts the threshold for gap detection
verbose : bool
If True, will print warning messages if there is any
Returns
-------
res : MultiplegGeoSeries or GeoSeries
If gaps were detected, returns the segments in a MultipleGeoSeries object,
else, returns the original timeseries.
Examples
--------
.. jupyter-execute::
import numpy as np
gs = pyleo.utils.datasets.load_dataset('EDC-dD')
gs.value[4000:5000] = np.nan # cut a large gap in the middle
mgs = gs.segment()
mgs.plot()
"""
from ..core.multiplegeoseries import MultipleGeoSeries
seg_y, seg_t, n_segs = tsutils.ts2segments(self.value,self.time,factor=factor)
if len(seg_y)>1:
s_list=[]
for idx,s in enumerate(seg_y):
if self.label is not None:
s_lbl = self.label + ' segment ' + str(idx+1)
else:
s_lbl = 'segment ' + str(idx+1)
s_tmp = self.copy() # copy metadata
s_tmp.time = seg_t[idx]
s_tmp.value = s
s_tmp.label = s_lbl
s_list.append(s_tmp)
res=MultipleGeoSeries(series_list=s_list)
elif len(seg_y)==1:
res=self.copy()
else:
raise ValueError('No timeseries detected')
return res
[docs] def resample(self, rule, keep_log = False, **kwargs):
"""
Run analogue to pandas.Series.resample.
This is a convenience method: doing
ser.resample('AS').mean()
will do the same thing as
ser.pandas_method(lambda x: x.resample('AS').mean())
but will also accept some extra resampling rules, such as `'Ga'` (see below).
Parameters
----------
rule : str
The offset string or object representing target conversion.
Can also accept pyleoclim units, such as 'ka' (1000 years),
'Ma' (1 million years), and 'Ga' (1 billion years).
Check the [pandas resample docs](https://pandas.pydata.org/docs/dev/reference/api/pandas.DataFrame.resample.html)
for more details.
kwargs : dict
Any other arguments which will be passed to pandas.Series.resample.
Returns
-------
SeriesResampler
Resampler object, not meant to be used to directly. Instead,
an aggregation should be called on it, see examples below.
Examples
--------
.. jupyter-execute::
ts = pyleo.utils.load_dataset('EDC-dD').convert_time_unit('ky BP')
ts5k = ts.resample('1ka').mean()
fig, ax = ts.plot()
ts5k.plot(ax=ax,color='C1')
"""
search = re.search(r'(\d*)([a-zA-Z]+)', rule)
if search is None:
raise ValueError(f"Invalid rule provided, got: {rule}")
md = self.metadata
if md['label'] is not None:
md['label'] = md['label'] + ' (' + rule + ' resampling)'
multiplier = search.group(1)
if multiplier == '':
multiplier = 1
else:
multiplier = int(multiplier)
unit = search.group(2)
if unit.lower() in tsbase.MATCH_A:
rule = f'{multiplier}AS'
elif unit.lower() in tsbase.MATCH_KA:
rule = f'{1_000*multiplier}AS'
elif unit.lower() in tsbase.MATCH_MA:
rule = f'{1_000_000*multiplier}AS'
elif unit.lower() in tsbase.MATCH_GA:
rule = f'{1_000_000_000*multiplier}AS'
ser = self.to_pandas()
return GeoSeriesResampler(rule, ser, md, keep_log, kwargs)
class GeoSeriesResampler:
"""
This is only meant to be used internally, and is not meant to
be public-facing or to be used directly by users.
If users call
ts.resample('1Y').mean()
then they will get back a pyleoclim.GeoSeries, and `GeoSeriesResampler`
will only be used in an intermediate step. Think of it as an
implementation detail.
"""
def __init__(self, rule, series, metadata, keep_log, kwargs):
self.rule = rule
self.series = series
self.metadata = metadata
self.keep_log = keep_log
self.kwargs = kwargs
def __getattr__(self, attr):
attr = getattr(self.series.resample(self.rule, **self.kwargs), attr)
def func(*args, **kwargs):
series = attr(*args, **kwargs)
series.index = series.index + (series.index[1] - series.index[0])/2 # sample midpoints
_, __, direction = tsbase.time_unit_to_datum_exp_dir(self.metadata['time_unit'], self.metadata['time_name'])
if direction == 'prograde':
from_pandas = GeoSeries.from_pandas(series, metadata=self.metadata)
else:
from_pandas = GeoSeries.from_pandas(series.sort_index(ascending=False), metadata=self.metadata)
if self.keep_log == True:
if from_pandas.log is None:
from_pandas.log=()
from_pandas.log += ({len(from_pandas.log): 'resample','rule': self.rule},)
return from_pandas
return func